From 33ba8860c43d4770ea119a09a4fcbbf366f3b32e Mon Sep 17 00:00:00 2001 From: 3nprob <74199244+3nprob@users.noreply.github.com> Date: Mon, 26 May 2025 14:21:43 +0000 Subject: [PATCH 28/34] fix(device-handler): make _maybe_retry_device_resync thread-safe (#18391) A race-condition may render concurrent retry loops. Use an actual `Lock` for guarding single access of device resyncing retrying. ### Pull Request Checklist * [x] Pull request is based on the develop branch * [x] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [x] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters)) --- changelog.d/18391.bugfix | 1 + synapse/handlers/device.py | 13 +++++-------- 2 files changed, 6 insertions(+), 8 deletions(-) create mode 100644 changelog.d/18391.bugfix diff --git a/changelog.d/18391.bugfix b/changelog.d/18391.bugfix new file mode 100644 index 0000000000..bbcb7b7a28 --- /dev/null +++ b/changelog.d/18391.bugfix @@ -0,0 +1 @@ +Prevent race-condition in `_maybe_retry_device_resync` entrance. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 1efd039f22..f8b547bbed 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -20,6 +20,7 @@ # # import logging +from threading import Lock from typing import ( TYPE_CHECKING, AbstractSet, @@ -1237,7 +1238,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater): ) # Attempt to resync out of sync device lists every 30s. - self._resync_retry_in_progress = False + self._resync_retry_lock = Lock() self.clock.looping_call( run_as_background_process, 30 * 1000, @@ -1419,13 +1420,10 @@ class DeviceListUpdater(DeviceListWorkerUpdater): """Retry to resync device lists that are out of sync, except if another retry is in progress. """ - if self._resync_retry_in_progress: + # If the lock can not be acquired we want to always return immediately instead of blocking here + if not self._resync_retry_lock.acquire(blocking=False): return - try: - # Prevent another call of this function to retry resyncing device lists so - # we don't send too many requests. - self._resync_retry_in_progress = True # Get all of the users that need resyncing. need_resync = await self.store.get_user_ids_requiring_device_list_resync() @@ -1466,8 +1464,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater): e, ) finally: - # Allow future calls to retry resyncinc out of sync device lists. - self._resync_retry_in_progress = False + self._resync_retry_lock.release() async def multi_user_device_resync( self, user_ids: List[str], mark_failed_as_stale: bool = True -- 2.49.0