synapse/tests/http/federation/test_srv_resolver.py
Eric Eastwood 5143f93dc9
Fix server_name in logging context for multiple Synapse instances in one process (#18868)
### Background

As part of Element's plan to support a light form of vhosting (virtual
host) (multiple instances of Synapse in the same Python process), we're
currently diving into the details and implications of running multiple
instances of Synapse in the same Python process.

"Per-tenant logging" tracked internally by
https://github.com/element-hq/synapse-small-hosts/issues/48

### Prior art

Previously, we exposed `server_name` by providing a static logging
`MetadataFilter` that injected the values:


205d9e4fc4/synapse/config/logger.py (L216)

While this can work fine for the normal case of one Synapse instance per
Python process, this configures things globally and isn't compatible
when we try to start multiple Synapse instances because each subsequent
tenant will overwrite the previous tenant.


### What does this PR do?

We remove the `MetadataFilter` and replace it by tracking the
`server_name` in the `LoggingContext` and expose it with our existing
[`LoggingContextFilter`](205d9e4fc4/synapse/logging/context.py (L584-L622))
that we already use to expose information about the `request`.

This means that the `server_name` value follows wherever we log as
expected even when we have multiple Synapse instances running in the
same process.


### A note on logcontext

Anywhere, Synapse mistakenly uses the `sentinel` logcontext to log
something, we won't know which server sent the log. We've been fixing up
`sentinel` logcontext usage as tracked by
https://github.com/element-hq/synapse/issues/18905

Any further `sentinel` logcontext usage we find in the future can be
fixed piecemeal as normal.


d2a966f922/docs/log_contexts.md (L71-L81)


### Testing strategy

1. Adjust your logging config to include `%(server_name)s` in the format
    ```yaml
    formatters:
        precise:
format: '%(asctime)s - %(server_name)s - %(name)s - %(lineno)d -
%(levelname)s - %(request)s - %(message)s'
    ```
1. Start Synapse: `poetry run synapse_homeserver --config-path
homeserver.yaml`
1. Make some requests (`curl
http://localhost:8008/_matrix/client/versions`, etc)
1. Open the homeserver logs and notice the `server_name` in the logs as
expected. `unknown_server_from_sentinel_context` is expected for the
`sentinel` logcontext (things outside of Synapse).
2025-09-26 17:10:48 -05:00

221 lines
7.7 KiB
Python

#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
from typing import Dict, Generator, List, Tuple, cast
from unittest.mock import Mock
from twisted.internet import defer
from twisted.internet.defer import Deferred
from twisted.internet.error import ConnectError
from twisted.names import dns, error
from synapse.http.federation.srv_resolver import Server, SrvResolver
from synapse.logging.context import LoggingContext, current_context
from tests import unittest
from tests.utils import MockClock
class SrvResolverTestCase(unittest.TestCase):
def test_resolve(self) -> None:
dns_client_mock = Mock()
service_name = b"test_service.example.com"
host_name = b"example.com"
answer_srv = dns.RRHeader(
type=dns.SRV, payload=dns.Record_SRV(target=host_name)
)
result_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred()
dns_client_mock.lookupService.return_value = result_deferred
cache: Dict[bytes, List[Server]] = {}
resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
@defer.inlineCallbacks
def do_lookup() -> Generator["Deferred[object]", object, List[Server]]:
with LoggingContext(
name="one",
server_name="test_server",
) as ctx:
resolve_d = resolver.resolve_service(service_name)
result: List[Server]
result = yield defer.ensureDeferred(resolve_d) # type: ignore[assignment]
# should have restored our context
self.assertIs(current_context(), ctx)
return result
test_d = do_lookup()
self.assertNoResult(test_d)
dns_client_mock.lookupService.assert_called_once_with(service_name)
result_deferred.callback(([answer_srv], None, None))
servers = self.successResultOf(test_d)
self.assertEqual(len(servers), 1)
self.assertEqual(servers, cache[service_name])
self.assertEqual(servers[0].host, host_name)
@defer.inlineCallbacks
def test_from_cache_expired_and_dns_fail(
self,
) -> Generator["Deferred[object]", object, None]:
dns_client_mock = Mock()
dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError())
service_name = b"test_service.example.com"
entry = Mock(spec_set=["expires", "priority", "weight"])
entry.expires = 0
entry.priority = 0
entry.weight = 0
cache = {service_name: [cast(Server, entry)]}
resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
servers: List[Server]
servers = yield defer.ensureDeferred(resolver.resolve_service(service_name)) # type: ignore[assignment]
dns_client_mock.lookupService.assert_called_once_with(service_name)
self.assertEqual(len(servers), 1)
self.assertEqual(servers, cache[service_name])
@defer.inlineCallbacks
def test_from_cache(self) -> Generator["Deferred[object]", object, None]:
clock = MockClock()
dns_client_mock = Mock(spec_set=["lookupService"])
dns_client_mock.lookupService = Mock(spec_set=[])
service_name = b"test_service.example.com"
entry = Mock(spec_set=["expires", "priority", "weight"])
entry.expires = 999999999
entry.priority = 0
entry.weight = 0
cache = {service_name: [cast(Server, entry)]}
resolver = SrvResolver(
dns_client=dns_client_mock, cache=cache, get_time=clock.time
)
servers: List[Server]
servers = yield defer.ensureDeferred(resolver.resolve_service(service_name)) # type: ignore[assignment]
self.assertFalse(dns_client_mock.lookupService.called)
self.assertEqual(len(servers), 1)
self.assertEqual(servers, cache[service_name])
@defer.inlineCallbacks
def test_empty_cache(self) -> Generator["Deferred[object]", object, None]:
dns_client_mock = Mock()
dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError())
service_name = b"test_service.example.com"
cache: Dict[bytes, List[Server]] = {}
resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
with self.assertRaises(error.DNSServerError):
yield defer.ensureDeferred(resolver.resolve_service(service_name))
@defer.inlineCallbacks
def test_name_error(self) -> Generator["Deferred[object]", object, None]:
dns_client_mock = Mock()
dns_client_mock.lookupService.return_value = defer.fail(error.DNSNameError())
service_name = b"test_service.example.com"
cache: Dict[bytes, List[Server]] = {}
resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
servers: List[Server]
servers = yield defer.ensureDeferred(resolver.resolve_service(service_name)) # type: ignore[assignment]
self.assertEqual(len(servers), 0)
self.assertEqual(len(cache), 0)
def test_disabled_service(self) -> None:
"""
test the behaviour when there is a single record which is ".".
"""
service_name = b"test_service.example.com"
lookup_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred()
dns_client_mock = Mock()
dns_client_mock.lookupService.return_value = lookup_deferred
cache: Dict[bytes, List[Server]] = {}
resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
# Old versions of Twisted don't have an ensureDeferred in failureResultOf.
resolve_d = defer.ensureDeferred(resolver.resolve_service(service_name))
# returning a single "." should make the lookup fail with a ConenctError
lookup_deferred.callback(
(
[dns.RRHeader(type=dns.SRV, payload=dns.Record_SRV(target=b"."))],
None,
None,
)
)
self.failureResultOf(resolve_d, ConnectError)
def test_non_srv_answer(self) -> None:
"""
test the behaviour when the dns server gives us a spurious non-SRV response
"""
service_name = b"test_service.example.com"
lookup_deferred: "Deferred[Tuple[List[dns.RRHeader], None, None]]" = Deferred()
dns_client_mock = Mock()
dns_client_mock.lookupService.return_value = lookup_deferred
cache: Dict[bytes, List[Server]] = {}
resolver = SrvResolver(dns_client=dns_client_mock, cache=cache)
# Old versions of Twisted don't have an ensureDeferred in successResultOf.
resolve_d = defer.ensureDeferred(resolver.resolve_service(service_name))
lookup_deferred.callback(
(
[
dns.RRHeader(type=dns.A, payload=dns.Record_A()),
dns.RRHeader(type=dns.SRV, payload=dns.Record_SRV(target=b"host")),
],
None,
None,
)
)
servers = self.successResultOf(resolve_d)
self.assertEqual(len(servers), 1)
self.assertEqual(servers, cache[service_name])
self.assertEqual(servers[0].host, b"host")