From 84d64251dc741938eed3e2f3ba41e031b119d966 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Sep 2025 17:15:08 -0500 Subject: [PATCH] Remove `sentinel` logcontext where we log in `setup`, `start` and exit (#18870) Remove `sentinel` logcontext where we log in `setup`, `start`, and exit. Instead of having one giant PR that removes all places we use `sentinel` logcontext, I've decided to tackle this more piece-meal. This PR covers the parts if you just startup Synapse and exit it with no requests or activity going on in between. Part of https://github.com/element-hq/synapse/issues/18905 (Remove `sentinel` logcontext where we log in Synapse) Prerequisite for https://github.com/element-hq/synapse/pull/18868. Logging with the `sentinel` logcontext means we won't know which server the log came from. ### Why https://github.com/element-hq/synapse/blob/9cc400177822805e2a08d4d934daad6f3bc2a4df/docs/log_contexts.md#L71-L81 (docs updated in https://github.com/element-hq/synapse/pull/18900) ### Testing strategy 1. Run Synapse normally and with `daemonize: true`: `poetry run synapse_homeserver --config-path homeserver.yaml` 1. Execute some requests 1. Shutdown the server 1. Look for any bad log entries in your homeserver logs: - `Expected logging context sentinel but found main` - `Expected logging context main was lost` - `Expected previous context` - `utime went backwards!`/`stime went backwards!` - `Called stop on logcontext POST-0 without recording a start rusage` 1. Look for any logs coming from the `sentinel` context With these changes, you should only see the following logs (not from Synapse) using the `sentinel` context if you start up Synapse and exit: `homeserver.log` ``` 2025-09-10 14:45:39,924 - asyncio - 64 - DEBUG - sentinel - Using selector: EpollSelector 2025-09-10 14:45:40,562 - twisted - 281 - INFO - sentinel - Received SIGINT, shutting down. 2025-09-10 14:45:40,562 - twisted - 281 - INFO - sentinel - (TCP Port 9322 Closed) 2025-09-10 14:45:40,563 - twisted - 281 - INFO - sentinel - (TCP Port 8008 Closed) 2025-09-10 14:45:40,563 - twisted - 281 - INFO - sentinel - (TCP Port 9093 Closed) 2025-09-10 14:45:40,564 - twisted - 281 - INFO - sentinel - Main loop terminated. ``` --- changelog.d/18870.misc | 1 + synapse/app/_base.py | 40 +++++++++++++++++------------------ synapse/app/generic_worker.py | 7 +++++- synapse/app/homeserver.py | 16 ++++++++------ synapse/util/daemonize.py | 22 ++++++++++++++----- 5 files changed, 53 insertions(+), 33 deletions(-) create mode 100644 changelog.d/18870.misc diff --git a/changelog.d/18870.misc b/changelog.d/18870.misc new file mode 100644 index 000000000..e54ba4f37 --- /dev/null +++ b/changelog.d/18870.misc @@ -0,0 +1 @@ +Remove `sentinel` logcontext usage where we log in `setup`, `start` and exit. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index bce6f4d82..cf3d260e6 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -72,7 +72,7 @@ from synapse.events.auto_accept_invites import InviteAutoAccepter from synapse.events.presence_router import load_legacy_presence_router from synapse.handlers.auth import load_legacy_password_auth_providers from synapse.http.site import SynapseSite -from synapse.logging.context import PreserveLoggingContext +from synapse.logging.context import LoggingContext, PreserveLoggingContext from synapse.logging.opentracing import init_tracer from synapse.metrics import install_gc_manager, register_threadpool from synapse.metrics.background_process_metrics import run_as_background_process @@ -183,25 +183,23 @@ def start_reactor( if gc_thresholds: gc.set_threshold(*gc_thresholds) install_gc_manager() - run_command() - # make sure that we run the reactor with the sentinel log context, - # otherwise other PreserveLoggingContext instances will get confused - # and complain when they see the logcontext arbitrarily swapping - # between the sentinel and `run` logcontexts. - # - # We also need to drop the logcontext before forking if we're daemonizing, - # otherwise the cputime metrics get confused about the per-thread resource usage - # appearing to go backwards. - with PreserveLoggingContext(): - if daemonize: - assert pid_file is not None + # Reset the logging context when we start the reactor (whenever we yield control + # to the reactor, the `sentinel` logging context needs to be set so we don't + # leak the current logging context and erroneously apply it to the next task the + # reactor event loop picks up) + with PreserveLoggingContext(): + run_command() - if print_pidfile: - print(pid_file) + if daemonize: + assert pid_file is not None - daemonize_process(pid_file, logger) - run() + if print_pidfile: + print(pid_file) + + daemonize_process(pid_file, logger) + + run() def quit_with_error(error_string: str) -> NoReturn: @@ -601,10 +599,12 @@ async def start(hs: "HomeServer") -> None: hs.get_datastores().main.db_pool.start_profiling() hs.get_pusherpool().start() + def log_shutdown() -> None: + with LoggingContext("log_shutdown"): + logger.info("Shutting down...") + # Log when we start the shut down process. - hs.get_reactor().addSystemEventTrigger( - "before", "shutdown", logger.info, "Shutting down..." - ) + hs.get_reactor().addSystemEventTrigger("before", "shutdown", log_shutdown) setup_sentry(hs) setup_sdnotify(hs) diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 4f5bea6bd..543b26d8b 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -355,7 +355,12 @@ def start(config_options: List[str]) -> None: except Exception as e: handle_startup_exception(e) - register_start(_base.start, hs) + async def start() -> None: + # Re-establish log context now that we're back from the reactor + with LoggingContext("start"): + await _base.start(hs) + + register_start(start) # redirect stdio to the logs, if configured. if not hs.config.logging.no_redirect_stdio: diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e027b5eae..dfc4a0071 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -377,15 +377,17 @@ def setup(config_options: List[str]) -> SynapseHomeServer: handle_startup_exception(e) async def start() -> None: - # Load the OIDC provider metadatas, if OIDC is enabled. - if hs.config.oidc.oidc_enabled: - oidc = hs.get_oidc_handler() - # Loading the provider metadata also ensures the provider config is valid. - await oidc.load_metadata() + # Re-establish log context now that we're back from the reactor + with LoggingContext("start"): + # Load the OIDC provider metadatas, if OIDC is enabled. + if hs.config.oidc.oidc_enabled: + oidc = hs.get_oidc_handler() + # Loading the provider metadata also ensures the provider config is valid. + await oidc.load_metadata() - await _base.start(hs) + await _base.start(hs) - hs.get_datastores().main.db_pool.updates.start_doing_background_updates() + hs.get_datastores().main.db_pool.updates.start_doing_background_updates() register_start(start) diff --git a/synapse/util/daemonize.py b/synapse/util/daemonize.py index 9fdefc5a7..e653abff9 100644 --- a/synapse/util/daemonize.py +++ b/synapse/util/daemonize.py @@ -29,6 +29,11 @@ import sys from types import FrameType, TracebackType from typing import NoReturn, Optional, Type +from synapse.logging.context import ( + LoggingContext, + PreserveLoggingContext, +) + def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") -> None: """daemonize the current process @@ -64,8 +69,14 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") - pid_fh.write(old_pid) sys.exit(1) - # Fork, creating a new process for the child. - process_id = os.fork() + # Stop the existing context *before* we fork the process. Otherwise the cputime + # metrics get confused about the per-thread resource usage appearing to go backwards + # because we're comparing the resource usage from the original process to the forked + # process. `PreserveLoggingContext` already takes care of restarting the original + # context *after* the block. + with PreserveLoggingContext(): + # Fork, creating a new process for the child. + process_id = os.fork() if process_id != 0: # parent process: exit. @@ -140,9 +151,10 @@ def daemonize_process(pid_file: str, logger: logging.Logger, chdir: str = "/") - # Cleanup pid file at exit. def exit() -> None: - logger.warning("Stopping daemon.") - os.remove(pid_file) - sys.exit(0) + with LoggingContext("atexit"): + logger.warning("Stopping daemon.") + os.remove(pid_file) + sys.exit(0) atexit.register(exit)