Cheaper logcontext debug logs (random_string_insecure_fast(...)) (#19094)

Follow-up to https://github.com/element-hq/synapse/pull/18966

During the weekly Backend team meeting, it was mentioned that
`random_string(...)` was taking a significant amount of CPU on
`matrix.org`. This makes sense as it relies on
[`secrets.choice(...)`](https://docs.python.org/3/library/secrets.html#secrets.choice),
a cryptographically secure function that is inherently computationally
expensive. And since https://github.com/element-hq/synapse/pull/18966,
we're calling `random_string(...)` as part of a bunch of logcontext
utilities.

Since we don't need cryptographically secure random strings for our
debug logs, this PR is introducing a new `random_string_insecure_fast(...)`
function that uses
[`random.choice(...)`](https://docs.python.org/3/library/random.html#random.choice)
which uses pseudo-random numbers that are "both fast and threadsafe".
This commit is contained in:
Eric Eastwood 2025-10-30 11:47:53 -05:00 committed by GitHub
parent 349599143e
commit f0aae62f85
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 22 additions and 4 deletions

1
changelog.d/19094.misc Normal file
View File

@ -0,0 +1 @@
Use cheaper random string function in logcontext utilities.

View File

@ -53,7 +53,7 @@ from twisted.internet import defer, threads
from twisted.python.threadpool import ThreadPool
from synapse.logging.loggers import ExplicitlyConfiguredLogger
from synapse.util.stringutils import random_string
from synapse.util.stringutils import random_string_insecure_fast
if TYPE_CHECKING:
from synapse.logging.scopecontextmanager import _LogContextScope
@ -657,7 +657,7 @@ class PreserveLoggingContext:
self, new_context: LoggingContextOrSentinel = SENTINEL_CONTEXT
) -> None:
self._new_context = new_context
self._instance_id = random_string(5)
self._instance_id = random_string_insecure_fast(5)
def __enter__(self) -> None:
logcontext_debug_logger.debug(
@ -859,7 +859,7 @@ def run_in_background(
Note that the returned Deferred does not follow the synapse logcontext
rules.
"""
instance_id = random_string(5)
instance_id = random_string_insecure_fast(5)
calling_context = current_context()
logcontext_debug_logger.debug(
"run_in_background(%s): called with logcontext=%s", instance_id, calling_context
@ -1012,7 +1012,7 @@ def make_deferred_yieldable(deferred: "defer.Deferred[T]") -> "defer.Deferred[T]
restores the old context once the awaitable completes (execution passes from the
reactor back to the code).
"""
instance_id = random_string(5)
instance_id = random_string_insecure_fast(5)
logcontext_debug_logger.debug(
"make_deferred_yieldable(%s): called with logcontext=%s",
instance_id,

View File

@ -20,6 +20,7 @@
#
#
import itertools
import random
import re
import secrets
import string
@ -56,6 +57,10 @@ def random_string(length: int) -> str:
"""Generate a cryptographically secure string of random letters.
Drawn from the characters: `a-z` and `A-Z`
Because this is generated from cryptographic sources, it takes a notable amount of
effort to generate (computationally expensive). If you don't need cryptographic
security, consider using `random_string_insecure_fast` for better performance.
"""
return "".join(secrets.choice(string.ascii_letters) for _ in range(length))
@ -68,6 +73,18 @@ def random_string_with_symbols(length: int) -> str:
return "".join(secrets.choice(_string_with_symbols) for _ in range(length))
def random_string_insecure_fast(length: int) -> str:
"""
Generate a string of random letters (insecure, fast). This is a more performant but
insecure version of `random_string`.
WARNING: Not for security or cryptographic uses. Use `random_string` instead.
Drawn from the characters: `a-z` and `A-Z`
"""
return "".join(random.choice(string.ascii_letters) for _ in range(length))
def is_ascii(s: bytes) -> bool:
try:
s.decode("ascii").encode("ascii")