fix(device-handler): make _maybe_retry_device_resync thread-safe (#18391)

A race-condition may render concurrent retry loops.

Use an actual `Lock` for guarding single access of device resyncing
retrying.

### Pull Request Checklist

* [x] Pull request is based on the develop branch
* [x] Pull request includes a [changelog
file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog).
The entry should:
- Be a short description of your change which makes sense to users.
"Fixed a bug that prevented receiving messages from other servers."
instead of "Moved X method from `EventStore` to `EventWorkerStore`.".
  - Use markdown where necessary, mostly for `code blocks`.
  - End with either a period (.) or an exclamation mark (!).
  - Start with a capital letter.
- Feel free to credit yourself, by adding a sentence "Contributed by
@github_username." or "Contributed by [Your Name]." to the end of the
entry.
* [x] [Code
style](https://element-hq.github.io/synapse/latest/code_style.html) is
correct
(run the
[linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters))
This commit is contained in:
3nprob 2025-05-26 14:21:43 +00:00 committed by GitHub
parent 24e849e483
commit 33ba8860c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 8 deletions

1
changelog.d/18391.bugfix Normal file
View File

@ -0,0 +1 @@
Prevent race-condition in `_maybe_retry_device_resync` entrance.

View File

@ -20,6 +20,7 @@
#
#
import logging
from threading import Lock
from typing import (
TYPE_CHECKING,
AbstractSet,
@ -1237,7 +1238,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
)
# Attempt to resync out of sync device lists every 30s.
self._resync_retry_in_progress = False
self._resync_retry_lock = Lock()
self.clock.looping_call(
run_as_background_process,
30 * 1000,
@ -1419,13 +1420,10 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
"""Retry to resync device lists that are out of sync, except if another retry is
in progress.
"""
if self._resync_retry_in_progress:
# If the lock can not be acquired we want to always return immediately instead of blocking here
if not self._resync_retry_lock.acquire(blocking=False):
return
try:
# Prevent another call of this function to retry resyncing device lists so
# we don't send too many requests.
self._resync_retry_in_progress = True
# Get all of the users that need resyncing.
need_resync = await self.store.get_user_ids_requiring_device_list_resync()
@ -1466,8 +1464,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
e,
)
finally:
# Allow future calls to retry resyncinc out of sync device lists.
self._resync_retry_in_progress = False
self._resync_retry_lock.release()
async def multi_user_device_resync(
self, user_ids: List[str], mark_failed_as_stale: bool = True