From 715cc5ee37d031cdd512a9d6b5691c8d48a25d03 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 9 Oct 2025 13:12:10 -0500 Subject: [PATCH] Split homeserver creation and setup (#19015) ### Background As part of Element's plan to support a light form of vhosting (virtual host) (multiple instances of Synapse in the same Python process), we're currently diving into the details and implications of running multiple instances of Synapse in the same Python process. "Clean tenant provisioning" tracked internally by https://github.com/element-hq/synapse-small-hosts/issues/221 ### Partial startup problem In the context of Synapse Pro for Small Hosts, since the Twisted reactor is already running (from the `multi_synapse` shard process itself), when provisioning a homeserver tenant, the `reactor.callWhenRunning(...)` callbacks will be invoked immediately. This includes the Synapse's [`start`](https://github.com/element-hq/synapse/blob/0615b64bb49684b846110465052642a46fd27028/synapse/app/homeserver.py#L418-L429) callback which sets up everything (including listeners, background tasks, etc). If we encounter an error at this point, we are partially setup but the exception will [bubble back to us](https://github.com/element-hq/synapse-small-hosts/blob/8be122186bf1acb8c0426d84eb3abded25d682b7/multi_synapse/app/shard.py#L114-L121) without us having a handle to the homeserver yet so we can't call `hs.shutdown()` and clean everything up. ### What does this PR do? Structures Synapse so we split creating the homeserver instance from setting everything up. This way we have access to `hs` if anything goes wrong during setup and can subsequently `hs.shutdown()` to clean everything up. --- changelog.d/19015.misc | 1 + synapse/app/homeserver.py | 84 +++++++++++++++++------- tests/app/test_homeserver_start.py | 8 ++- tests/config/test_registration_config.py | 8 ++- 4 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 changelog.d/19015.misc diff --git a/changelog.d/19015.misc b/changelog.d/19015.misc new file mode 100644 index 000000000..cabc45346 --- /dev/null +++ b/changelog.d/19015.misc @@ -0,0 +1 @@ +Split homeserver creation (`create_homeserver`) and setup (`setup`). diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 3424cdbdb..6ea412b9f 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -83,6 +83,10 @@ def gz_wrap(r: Resource) -> Resource: class SynapseHomeServer(HomeServer): + """ + Homeserver class for the main Synapse process. + """ + DATASTORE_CLASS = DataStore def _listener_http( @@ -345,23 +349,17 @@ def load_or_generate_config(argv_options: List[str]) -> HomeServerConfig: return config -def setup( +def create_homeserver( config: HomeServerConfig, reactor: Optional[ISynapseReactor] = None, - freeze: bool = True, ) -> SynapseHomeServer: """ - Create and setup a Synapse homeserver instance given a configuration. + Create a homeserver instance for the Synapse main process. Args: config: The configuration for the homeserver. reactor: Optionally provide a reactor to use. Can be useful in different scenarios that you want control over the reactor, such as tests. - freeze: whether to freeze the homeserver base objects in the garbage collector. - May improve garbage collection performance by marking objects with an effectively - static lifetime as frozen so they don't need to be considered for cleanup. - If you ever want to `shutdown` the homeserver, this needs to be - False otherwise the homeserver cannot be garbage collected after `shutdown`. Returns: A homeserver instance. @@ -372,7 +370,6 @@ def setup( "You have specified `worker_app` in the config but are attempting to start a non-worker " "instance. Please use `python -m synapse.app.generic_worker` instead (or remove the option if this is the main process)." ) - sys.exit(1) events.USE_FROZEN_DICTS = config.server.use_frozen_dicts synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage @@ -397,24 +394,50 @@ def setup( ) hs = SynapseHomeServer( - config.server.server_name, + hostname=config.server.server_name, config=config, reactor=reactor, ) - setup_logging(hs, config, use_worker_options=False) + return hs + + +def setup( + hs: SynapseHomeServer, + *, + freeze: bool = True, +) -> None: + """ + Setup a Synapse homeserver instance given a configuration. + + Args: + hs: The homeserver to setup. + freeze: whether to freeze the homeserver base objects in the garbage collector. + May improve garbage collection performance by marking objects with an effectively + static lifetime as frozen so they don't need to be considered for cleanup. + If you ever want to `shutdown` the homeserver, this needs to be + False otherwise the homeserver cannot be garbage collected after `shutdown`. + + Returns: + A homeserver instance. + """ + + setup_logging(hs, hs.config, use_worker_options=False) + + # Log after we've configured logging. + logger.info("Setting up server") # Start the tracer init_tracer(hs) # noqa - logger.info("Setting up server") - try: hs.setup() except Exception as e: handle_startup_exception(e) - async def start() -> None: + async def _start_when_reactor_running() -> None: + # TODO: Feels like this should be moved somewhere else. + # # Load the OIDC provider metadatas, if OIDC is enabled. if hs.config.oidc.oidc_enabled: oidc = hs.get_oidc_handler() @@ -423,21 +446,31 @@ def setup( await _base.start(hs, freeze) + # TODO: This should be moved to `SynapseHomeServer.start_background_tasks` (not + # `HomeServer.start_background_tasks`) (this way it matches the behavior of only + # running on `main`) hs.get_datastores().main.db_pool.updates.start_doing_background_updates() - register_start(hs, start) - - return hs + # Register a callback to be invoked once the reactor is running + register_start(hs, _start_when_reactor_running) -def run(hs: HomeServer) -> None: +def start_reactor( + config: HomeServerConfig, +) -> None: + """ + Start the reactor (Twisted event-loop). + + Args: + config: The configuration for the homeserver. + """ _base.start_reactor( "synapse-homeserver", - soft_file_limit=hs.config.server.soft_file_limit, - gc_thresholds=hs.config.server.gc_thresholds, - pid_file=hs.config.server.pid_file, - daemonize=hs.config.server.daemonize, - print_pidfile=hs.config.server.print_pidfile, + soft_file_limit=config.server.soft_file_limit, + gc_thresholds=config.server.gc_thresholds, + pid_file=config.server.pid_file, + daemonize=config.server.daemonize, + print_pidfile=config.server.print_pidfile, logger=logger, ) @@ -448,13 +481,14 @@ def main() -> None: with LoggingContext(name="main", server_name=homeserver_config.server.server_name): # check base requirements check_requirements() - hs = setup(homeserver_config) + hs = create_homeserver(homeserver_config) + setup(hs) # redirect stdio to the logs, if configured. if not hs.config.logging.no_redirect_stdio: redirect_stdio_to_logs() - run(hs) + start_reactor(homeserver_config) if __name__ == "__main__": diff --git a/tests/app/test_homeserver_start.py b/tests/app/test_homeserver_start.py index 0d257c98a..36a7170d1 100644 --- a/tests/app/test_homeserver_start.py +++ b/tests/app/test_homeserver_start.py @@ -37,7 +37,13 @@ class HomeserverAppStartTestCase(ConfigFileTestCase): self.add_lines_to_config([" main:", " host: 127.0.0.1", " port: 1234"]) # Ensure that starting master process with worker config raises an exception with self.assertRaises(ConfigError): + # Do a normal homeserver creation and setup homeserver_config = synapse.app.homeserver.load_or_generate_config( ["-c", self.config_file] ) - synapse.app.homeserver.setup(homeserver_config) + # XXX: The error will be raised at this point + hs = synapse.app.homeserver.create_homeserver(homeserver_config) + # Continue with the setup. We don't expect this to run because we raised + # earlier, but in the future, the code could be refactored to raise the + # error in a different place. + synapse.app.homeserver.setup(hs) diff --git a/tests/config/test_registration_config.py b/tests/config/test_registration_config.py index a8520c91d..9da0a3f42 100644 --- a/tests/config/test_registration_config.py +++ b/tests/config/test_registration_config.py @@ -112,7 +112,13 @@ class RegistrationConfigTestCase(ConfigFileTestCase): # Test that allowing open registration without verification raises an error with self.assertRaises(ConfigError): + # Do a normal homeserver creation and setup homeserver_config = synapse.app.homeserver.load_or_generate_config( ["-c", self.config_file] ) - synapse.app.homeserver.setup(homeserver_config) + # XXX: The error will be raised at this point + hs = synapse.app.homeserver.create_homeserver(homeserver_config) + # Continue with the setup. We don't expect this to run because we raised + # earlier, but in the future, the code could be refactored to raise the + # error in a different place. + synapse.app.homeserver.setup(hs)