From a7107458c6c7bc6de7c1056c21196c259c4a2ea4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 3 Nov 2025 12:04:43 -0600 Subject: [PATCH] Refactor app entrypoints (avoid `exit(1)` in our composable functions) (#19121) - Move `register_start` (calls `os._exit(1)`) out of `setup` (our composable function) - We want to avoid `exit(...)` because we use these composable functions in Synapse Pro for small hosts where we have multiple Synapse instances running in the same process. We don't want a problem from one homeserver tenant causing the entire Python process to exit and affect all of the other homeserver tenants. - Continuation of https://github.com/element-hq/synapse/pull/19116 - Align our app entrypoints: `homeserver` (main), `generic_worker` (worker), and `admin_cmd` ### Background As part of Element's plan to support a light form of vhosting (virtual host) (multiple instances of Synapse in the same Python process) (c.f Synapse Pro for small hosts), we're currently diving into the details and implications of running multiple instances of Synapse in the same Python process. "Clean tenant provisioning" tracked internally by https://github.com/element-hq/synapse-small-hosts/issues/48 --- changelog.d/19121.misc | 1 + synapse/app/_base.py | 2 +- synapse/app/admin_cmd.py | 85 +++++++++++++++++++++------- synapse/app/appservice.py | 9 +-- synapse/app/client_reader.py | 9 +-- synapse/app/event_creator.py | 9 +-- synapse/app/federation_reader.py | 9 +-- synapse/app/federation_sender.py | 9 +-- synapse/app/frontend_proxy.py | 9 +-- synapse/app/generic_worker.py | 95 ++++++++++++++++++++++++++++---- synapse/app/homeserver.py | 90 ++++++++++++++++++++---------- synapse/app/media_repository.py | 9 +-- synapse/app/pusher.py | 9 +-- synapse/app/synchrotron.py | 9 +-- synapse/app/user_dir.py | 8 +-- 15 files changed, 231 insertions(+), 131 deletions(-) create mode 100644 changelog.d/19121.misc diff --git a/changelog.d/19121.misc b/changelog.d/19121.misc new file mode 100644 index 000000000..cb1fb8f02 --- /dev/null +++ b/changelog.d/19121.misc @@ -0,0 +1 @@ +Refactor and align app entrypoints (avoid `exit(1)` in our composable functions). diff --git a/synapse/app/_base.py b/synapse/app/_base.py index c0fcf8ca2..e5f4cfb0e 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -602,7 +602,7 @@ def setup_sighup_handling() -> None: _already_setup_sighup_handling = True -async def start(hs: "HomeServer", freeze: bool = True) -> None: +async def start(hs: "HomeServer", *, freeze: bool = True) -> None: """ Start a Synapse server or worker. diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index b5b1edac0..dac603de8 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -64,7 +64,7 @@ from synapse.storage.databases.main.state import StateGroupWorkerStore from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.storage.databases.main.tags import TagsWorkerStore from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore -from synapse.types import JsonMapping, StateMap +from synapse.types import ISynapseReactor, JsonMapping, StateMap from synapse.util.logcontext import LoggingContext logger = logging.getLogger("synapse.app.admin_cmd") @@ -289,7 +289,21 @@ def load_config(argv_options: list[str]) -> tuple[HomeServerConfig, argparse.Nam return config, args -def start(config: HomeServerConfig, args: argparse.Namespace) -> None: +def create_homeserver( + config: HomeServerConfig, + reactor: Optional[ISynapseReactor] = None, +) -> AdminCmdServer: + """ + Create a homeserver instance for the Synapse admin command process. + + Args: + config: The configuration for the homeserver. + reactor: Optionally provide a reactor to use. Can be useful in different + scenarios that you want control over the reactor, such as tests. + + Returns: + A homeserver instance. + """ if config.worker.worker_app is not None: assert config.worker.worker_app == "synapse.app.admin_cmd" @@ -312,33 +326,62 @@ def start(config: HomeServerConfig, args: argparse.Namespace) -> None: synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts - ss = AdminCmdServer( + admin_command_server = AdminCmdServer( config.server.server_name, config=config, + reactor=reactor, ) - setup_logging(ss, config, use_worker_options=True) + return admin_command_server - ss.setup() - # We use task.react as the basic run command as it correctly handles tearing - # down the reactor when the deferreds resolve and setting the return value. - # We also make sure that `_base.start` gets run before we actually run the - # command. +def setup(admin_command_server: AdminCmdServer) -> None: + """ + Setup a `AdminCmdServer` instance. - async def run() -> None: - with LoggingContext(name="command", server_name=config.server.server_name): - await _base.start(ss) - await args.func(ss, args) - - _base.start_worker_reactor( - "synapse-admin-cmd", - config, - run_command=lambda: task.react(lambda _reactor: defer.ensureDeferred(run())), + Args: + admin_command_server: The homeserver to setup. + """ + setup_logging( + admin_command_server, admin_command_server.config, use_worker_options=True ) + admin_command_server.setup() + + +async def start(admin_command_server: AdminCmdServer, args: argparse.Namespace) -> None: + """ + Should be called once the reactor is running. + + Args: + admin_command_server: The homeserver to setup. + args: Command line arguments. + """ + # This needs a logcontext unlike other entrypoints because we're not using + # `register_start(...)` to run this function. + with LoggingContext(name="start", server_name=admin_command_server.hostname): + # We make sure that `_base.start` gets run before we actually run the command. + await _base.start(admin_command_server) + # Run the command + await args.func(admin_command_server, args) + + +def main() -> None: + homeserver_config, args = load_config(sys.argv[1:]) + with LoggingContext(name="main", server_name=homeserver_config.server.server_name): + admin_command_server = create_homeserver(homeserver_config) + setup(admin_command_server) + + _base.start_worker_reactor( + "synapse-admin-cmd", + admin_command_server.config, + # We use task.react as the basic run command as it correctly handles tearing + # down the reactor when the deferreds resolve and setting the return value. + run_command=lambda: task.react( + lambda _reactor: defer.ensureDeferred(start(admin_command_server, args)) + ), + ) + if __name__ == "__main__": - homeserver_config, args = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config, args) + main() diff --git a/synapse/app/appservice.py b/synapse/app/appservice.py index 823e1908b..d251d0ab6 100644 --- a/synapse/app/appservice.py +++ b/synapse/app/appservice.py @@ -19,16 +19,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 823e1908b..d251d0ab6 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -19,16 +19,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/event_creator.py b/synapse/app/event_creator.py index 1a9b0ad15..5b18578bd 100644 --- a/synapse/app/event_creator.py +++ b/synapse/app/event_creator.py @@ -18,16 +18,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index 823e1908b..d251d0ab6 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -19,16 +19,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index 823e1908b..d251d0ab6 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -19,16 +19,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/frontend_proxy.py b/synapse/app/frontend_proxy.py index 823e1908b..d251d0ab6 100644 --- a/synapse/app/frontend_proxy.py +++ b/synapse/app/frontend_proxy.py @@ -19,16 +19,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 1a7bedaac..a1dde368d 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -21,6 +21,7 @@ # import logging import sys +from typing import Optional from twisted.web.resource import Resource @@ -111,6 +112,7 @@ from synapse.storage.databases.main.transactions import TransactionWorkerStore from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore from synapse.storage.databases.main.user_directory import UserDirectoryStore from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore +from synapse.types import ISynapseReactor from synapse.util.httpresourcetree import create_resource_tree logger = logging.getLogger("synapse.app.generic_worker") @@ -332,7 +334,30 @@ def load_config(argv_options: list[str]) -> HomeServerConfig: return config -def start(config: HomeServerConfig) -> None: +def create_homeserver( + config: HomeServerConfig, + reactor: Optional[ISynapseReactor] = None, +) -> GenericWorkerServer: + """ + Create a homeserver instance for the Synapse worker process. + + Our composable functions (`create_homeserver`, `setup`, `start`) should not exit the + Python process (call `exit(...)`) and instead raise exceptions which can be handled + by the caller as desired. This doesn't matter for the normal case of one Synapse + instance running in the Python process (as we're only affecting ourselves), but is + important when we have multiple Synapse homeserver tenants running in the same + Python process (c.f. Synapse Pro for small hosts) as we don't want some problem from + one tenant stopping the rest of the tenants. + + Args: + config: The configuration for the homeserver. + reactor: Optionally provide a reactor to use. Can be useful in different + scenarios that you want control over the reactor, such as tests. + + Returns: + A homeserver instance. + """ + # For backwards compatibility let any of the old app names. assert config.worker.worker_app in ( "synapse.app.appservice", @@ -357,9 +382,29 @@ def start(config: HomeServerConfig) -> None: hs = GenericWorkerServer( config.server.server_name, config=config, + reactor=reactor, ) - setup_logging(hs, config, use_worker_options=True) + return hs + + +def setup(hs: GenericWorkerServer) -> None: + """ + Setup a `GenericWorkerServer` (worker) instance. + + Our composable functions (`create_homeserver`, `setup`, `start`) should not exit the + Python process (call `exit(...)`) and instead raise exceptions which can be handled + by the caller as desired. This doesn't matter for the normal case of one Synapse + instance running in the Python process (as we're only affecting ourselves), but is + important when we have multiple Synapse homeserver tenants running in the same + Python process (c.f. Synapse Pro for small hosts) as we don't want some problem from + one tenant stopping the rest of the tenants. + + Args: + hs: The homeserver to setup. + """ + + setup_logging(hs, hs.config, use_worker_options=True) # Start the tracer init_tracer(hs) # noqa @@ -370,26 +415,56 @@ def start(config: HomeServerConfig) -> None: # streams. Will no-op if no streams can be written to by this worker. hs.get_replication_streamer() - async def start() -> None: - await _base.start(hs) - register_start(hs, start) +async def start( + hs: GenericWorkerServer, + *, + freeze: bool = True, +) -> None: + """ + Should be called once the reactor is running. - # redirect stdio to the logs, if configured. - if not hs.config.logging.no_redirect_stdio: - redirect_stdio_to_logs() + Our composable functions (`create_homeserver`, `setup`, `start`) should not exit the + Python process (call `exit(...)`) and instead raise exceptions which can be handled + by the caller as desired. This doesn't matter for the normal case of one Synapse + instance running in the Python process (as we're only affecting ourselves), but is + important when we have multiple Synapse homeserver tenants running in the same + Python process (c.f. Synapse Pro for small hosts) as we don't want some problem from + one tenant stopping the rest of the tenants. - _base.start_worker_reactor("synapse-generic-worker", config) + Args: + hs: The homeserver to setup. + freeze: whether to freeze the homeserver base objects in the garbage collector. + May improve garbage collection performance by marking objects with an effectively + static lifetime as frozen so they don't need to be considered for cleanup. + If you ever want to `shutdown` the homeserver, this needs to be + False otherwise the homeserver cannot be garbage collected after `shutdown`. + """ + + await _base.start(hs, freeze=freeze) def main() -> None: homeserver_config = load_config(sys.argv[1:]) + + # Create a logging context as soon as possible so we can start associating + # everything with this homeserver. with LoggingContext(name="main", server_name=homeserver_config.server.server_name): + # redirect stdio to the logs, if configured. + if not homeserver_config.logging.no_redirect_stdio: + redirect_stdio_to_logs() + + hs = create_homeserver(homeserver_config) try: - start(homeserver_config) + setup(hs) except Exception as e: handle_startup_exception(e) + # Register a callback to be invoked once the reactor is running + register_start(hs, start, hs) + + _base.start_worker_reactor("synapse-generic-worker", homeserver_config) + if __name__ == "__main__": main() diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 9fd65b271..3807a18ab 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -71,7 +71,6 @@ from synapse.rest.well_known import well_known_resource from synapse.server import HomeServer from synapse.storage import DataStore from synapse.types import ISynapseReactor -from synapse.util.check_dependencies import check_requirements from synapse.util.httpresourcetree import create_resource_tree from synapse.util.module_loader import load_module @@ -356,6 +355,14 @@ def create_homeserver( """ Create a homeserver instance for the Synapse main process. + Our composable functions (`create_homeserver`, `setup`, `start`) should not exit the + Python process (call `exit(...)`) and instead raise exceptions which can be handled + by the caller as desired. This doesn't matter for the normal case of one Synapse + instance running in the Python process (as we're only affecting ourselves), but is + important when we have multiple Synapse homeserver tenants running in the same + Python process (c.f. Synapse Pro for small hosts) as we don't want some problem from + one tenant stopping the rest of the tenants. + Args: config: The configuration for the homeserver. reactor: Optionally provide a reactor to use. Can be useful in different @@ -388,22 +395,20 @@ def create_homeserver( def setup( hs: SynapseHomeServer, - *, - freeze: bool = True, ) -> None: """ - Setup a Synapse homeserver instance given a configuration. + Setup a `SynapseHomeServer` (main) instance. + + Our composable functions (`create_homeserver`, `setup`, `start`) should not exit the + Python process (call `exit(...)`) and instead raise exceptions which can be handled + by the caller as desired. This doesn't matter for the normal case of one Synapse + instance running in the Python process (as we're only affecting ourselves), but is + important when we have multiple Synapse homeserver tenants running in the same + Python process (c.f. Synapse Pro for small hosts) as we don't want some problem from + one tenant stopping the rest of the tenants. Args: hs: The homeserver to setup. - freeze: whether to freeze the homeserver base objects in the garbage collector. - May improve garbage collection performance by marking objects with an effectively - static lifetime as frozen so they don't need to be considered for cleanup. - If you ever want to `shutdown` the homeserver, this needs to be - False otherwise the homeserver cannot be garbage collected after `shutdown`. - - Returns: - A homeserver instance. """ setup_logging(hs, hs.config, use_worker_options=False) @@ -416,22 +421,44 @@ def setup( hs.setup() - async def _start_when_reactor_running() -> None: - # TODO: Feels like this should be moved somewhere else. - # - # Load the OIDC provider metadatas, if OIDC is enabled. - if hs.config.oidc.oidc_enabled: - oidc = hs.get_oidc_handler() - # Loading the provider metadata also ensures the provider config is valid. - await oidc.load_metadata() - await _base.start(hs, freeze) +async def start( + hs: SynapseHomeServer, + *, + freeze: bool = True, +) -> None: + """ + Should be called once the reactor is running. - # TODO: Feels like this should be moved somewhere else. - hs.get_datastores().main.db_pool.updates.start_doing_background_updates() + Our composable functions (`create_homeserver`, `setup`, `start`) should not exit the + Python process (call `exit(...)`) and instead raise exceptions which can be handled + by the caller as desired. This doesn't matter for the normal case of one Synapse + instance running in the Python process (as we're only affecting ourselves), but is + important when we have multiple Synapse homeserver tenants running in the same + Python process (c.f. Synapse Pro for small hosts) as we don't want some problem from + one tenant stopping the rest of the tenants. - # Register a callback to be invoked once the reactor is running - register_start(hs, _start_when_reactor_running) + Args: + hs: The homeserver to setup. + freeze: whether to freeze the homeserver base objects in the garbage collector. + May improve garbage collection performance by marking objects with an effectively + static lifetime as frozen so they don't need to be considered for cleanup. + If you ever want to `shutdown` the homeserver, this needs to be + False otherwise the homeserver cannot be garbage collected after `shutdown`. + """ + + # TODO: Feels like this should be moved somewhere else. + # + # Load the OIDC provider metadatas, if OIDC is enabled. + if hs.config.oidc.oidc_enabled: + oidc = hs.get_oidc_handler() + # Loading the provider metadata also ensures the provider config is valid. + await oidc.load_metadata() + + await _base.start(hs, freeze=freeze) + + # TODO: Feels like this should be moved somewhere else. + hs.get_datastores().main.db_pool.updates.start_doing_background_updates() def start_reactor( @@ -457,18 +484,21 @@ def start_reactor( def main() -> None: homeserver_config = load_or_generate_config(sys.argv[1:]) + # Create a logging context as soon as possible so we can start associating + # everything with this homeserver. with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - # check base requirements - check_requirements() + # redirect stdio to the logs, if configured. + if not homeserver_config.logging.no_redirect_stdio: + redirect_stdio_to_logs() + hs = create_homeserver(homeserver_config) try: setup(hs) except Exception as e: handle_startup_exception(e) - # redirect stdio to the logs, if configured. - if not hs.config.logging.no_redirect_stdio: - redirect_stdio_to_logs() + # Register a callback to be invoked once the reactor is running + register_start(hs, start, hs) start_reactor(homeserver_config) diff --git a/synapse/app/media_repository.py b/synapse/app/media_repository.py index 823e1908b..d251d0ab6 100644 --- a/synapse/app/media_repository.py +++ b/synapse/app/media_repository.py @@ -19,16 +19,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index 823e1908b..d251d0ab6 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -19,16 +19,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 823e1908b..d251d0ab6 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -19,16 +19,11 @@ # # -import sys - -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__": diff --git a/synapse/app/user_dir.py b/synapse/app/user_dir.py index f64d82e41..2c47d9f4f 100644 --- a/synapse/app/user_dir.py +++ b/synapse/app/user_dir.py @@ -19,16 +19,12 @@ # # -import sys -from synapse.app.generic_worker import load_config, start -from synapse.util.logcontext import LoggingContext +from synapse.app.generic_worker import main as worker_main def main() -> None: - homeserver_config = load_config(sys.argv[1:]) - with LoggingContext(name="main", server_name=homeserver_config.server.server_name): - start(homeserver_config) + worker_main() if __name__ == "__main__":