From 2f2b854ac125f5a4d2fe93f2731cd8f5b53181bf Mon Sep 17 00:00:00 2001
From: Eric Eastwood <erice@element.io>
Date: Fri, 26 Sep 2025 11:10:02 -0500
Subject: [PATCH] Fix logcontext handling in `timeout_deferred` tests (#18974)

Related to https://github.com/element-hq/synapse/issues/18905

These fixes were split off from
https://github.com/element-hq/synapse/pull/18828 where @devonh was
seeing some test failures because `timeout_deferred(...)` is being
updated to use `Clock` utilities instead of raw `reactor` methods. This
test was failing in that branch/PR until we made this new version that
handles the logcontexts properly.

While the previous version of this test does pass on `develop`, it was
using what appears completely wrong assertions, assumptions, and bad
patterns to make it happen (see diff comments below)

---

Test originally introduced in https://github.com/matrix-org/synapse/pull/4407
---
 changelog.d/18974.misc           |   1 +
 tests/util/test_async_helpers.py | 104 ++++++++++++++++++++-----------
 2 files changed, 70 insertions(+), 35 deletions(-)
 create mode 100644 changelog.d/18974.misc

diff --git a/changelog.d/18974.misc b/changelog.d/18974.misc
new file mode 100644
index 000000000..ca300a17a
--- /dev/null
+++ b/changelog.d/18974.misc
@@ -0,0 +1 @@
+Fix logcontext handling in `timeout_deferred` tests.
diff --git a/tests/util/test_async_helpers.py b/tests/util/test_async_helpers.py
index cfd288241..881b43db7 100644
--- a/tests/util/test_async_helpers.py
+++ b/tests/util/test_async_helpers.py
@@ -17,8 +17,9 @@
 # [This file includes modifications made by New Vector Limited]
 #
 #
+import logging
 import traceback
-from typing import Any, Coroutine, Generator, List, NoReturn, Optional, Tuple, TypeVar
+from typing import Any, Coroutine, List, NoReturn, Optional, Tuple, TypeVar
 
 from parameterized import parameterized_class
 
@@ -47,6 +48,8 @@ from synapse.util.async_helpers import (
 from tests.server import get_clock
 from tests.unittest import TestCase
 
+logger = logging.getLogger(__name__)
+
 T = TypeVar("T")
 
 
@@ -188,47 +191,78 @@ class TimeoutDeferredTest(TestCase):
 
         self.failureResultOf(timing_out_d, defer.TimeoutError)
 
-    def test_logcontext_is_preserved_on_cancellation(self) -> None:
-        blocking_was_cancelled = False
+    async def test_logcontext_is_preserved_on_cancellation(self) -> None:
+        # Sanity check that we start in the sentinel context
+        self.assertEqual(current_context(), SENTINEL_CONTEXT)
 
-        @defer.inlineCallbacks
-        def blocking() -> Generator["Deferred[object]", object, None]:
-            nonlocal blocking_was_cancelled
+        incomplete_deferred_was_cancelled = False
 
-            non_completing_d: Deferred = Deferred()
-            with PreserveLoggingContext():
-                try:
-                    yield non_completing_d
-                except CancelledError:
-                    blocking_was_cancelled = True
-                    raise
+        def mark_was_cancelled(res: Failure) -> None:
+            """
+            A passthrough errback which sets `incomplete_deferred_was_cancelled`.
+
+            This means we re-raise any exception and allows further errbacks (in
+            `timeout_deferred(...)`) to do their thing. Just trying to be a transparent
+            proxy of any exception while doing our internal test book-keeping.
+            """
+            nonlocal incomplete_deferred_was_cancelled
+            if res.check(CancelledError):
+                incomplete_deferred_was_cancelled = True
+            else:
+                logger.error(
+                    "Expected incomplete_d to fail with `CancelledError` because our "
+                    "`timeout_deferred(...)` utility canceled it but saw %s",
+                    res,
+                )
+
+            # Re-raise the exception so that any further errbacks can do their thing as
+            # normal
+            res.raiseException()
+
+        # Create a deferred which we will never complete
+        incomplete_d: Deferred = Deferred()
+        incomplete_d.addErrback(mark_was_cancelled)
 
         with LoggingContext("one") as context_one:
-            # the errbacks should be run in the test logcontext
-            def errback(res: Failure, deferred_name: str) -> Failure:
-                self.assertIs(
-                    current_context(),
-                    context_one,
-                    "errback %s run in unexpected logcontext %s"
-                    % (deferred_name, current_context()),
-                )
-                return res
-
-            original_deferred = blocking()
-            original_deferred.addErrback(errback, "orig")
-            timing_out_d = timeout_deferred(original_deferred, 1.0, self.clock)
-            self.assertNoResult(timing_out_d)
-            self.assertIs(current_context(), SENTINEL_CONTEXT)
-            timing_out_d.addErrback(errback, "timingout")
-
-            self.clock.pump((1.0,))
-
-            self.assertTrue(
-                blocking_was_cancelled, "non-completing deferred was not cancelled"
+            timing_out_d = timeout_deferred(
+                deferred=incomplete_d,
+                timeout=1.0,
+                reactor=self.clock,
             )
-            self.failureResultOf(timing_out_d, defer.TimeoutError)
+            self.assertNoResult(timing_out_d)
+            # We should still be in the logcontext we started in
             self.assertIs(current_context(), context_one)
 
+            # Pump the reactor until we trigger the timeout
+            #
+            # We're manually pumping the reactor (and causing any pending callbacks to
+            # be called) so we need to be in the sentinel logcontext to avoid leaking
+            # our current logcontext into the reactor (which would then get picked up
+            # and associated with the next thing the reactor does). `with
+            # PreserveLoggingContext()` will reset the logcontext to the sentinel while
+            # we're pumping the reactor in the block and return us back to our current
+            # logcontext after the block.
+            with PreserveLoggingContext():
+                self.clock.pump(
+                    # We only need to pump `1.0` (seconds) as we set
+                    # `timeout_deferred(timeout=1.0)` above
+                    (1.0,)
+                )
+
+            # We expect the incomplete deferred to have been cancelled because of the
+            # timeout by this point
+            self.assertTrue(
+                incomplete_deferred_was_cancelled,
+                "incomplete deferred was not cancelled",
+            )
+            # We should see the `TimeoutError` (instead of a `CancelledError`)
+            self.failureResultOf(timing_out_d, defer.TimeoutError)
+            # We're still in the same logcontext
+            self.assertIs(current_context(), context_one)
+
+        # Back to the sentinel context
+        self.assertEqual(current_context(), SENTINEL_CONTEXT)
+
 
 class _TestException(Exception):
     pass