summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorSean Quah <8349537+squahtx@users.noreply.github.com>2023-03-30 13:36:41 +0100
committerGitHub <noreply@github.com>2023-03-30 13:36:41 +0100
commitd9f694932c64d68e791ecb4c860e911e21a0baeb (patch)
tree31af566215c68a1194706f488a09a098a2335cea /synapse/storage
parentAdd the ability to enable/disable registrations when in the OIDC flow (#14978) (diff)
downloadsynapse-d9f694932c64d68e791ecb4c860e911e21a0baeb.tar.xz
Fix spinloop during partial state sync when a prev event is in backoff (#15351)
Previously, we would spin in a tight loop until
`update_state_for_partial_state_event` stopped raising
`FederationPullAttemptBackoffError`s. Replace the spinloop with a wait
until the backoff period has expired.

Signed-off-by: Sean Quah <seanq@matrix.org>
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/databases/main/event_federation.py35
1 files changed, 21 insertions, 14 deletions
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index ff3edeb716..a19ba88bf8 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -1544,7 +1544,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         self,
         room_id: str,
         event_ids: Collection[str],
-    ) -> List[str]:
+    ) -> Dict[str, int]:
         """
         Filter down the events to ones that we've failed to pull before recently. Uses
         exponential backoff.
@@ -1554,7 +1554,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
             event_ids: A list of events to filter down
 
         Returns:
-            List of event_ids that should not be attempted to be pulled
+            A dictionary of event_ids that should not be attempted to be pulled and the
+            next timestamp at which we may try pulling them again.
         """
         event_failed_pull_attempts = await self.db_pool.simple_select_many_batch(
             table="event_failed_pull_attempts",
@@ -1570,22 +1571,28 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         )
 
         current_time = self._clock.time_msec()
-        return [
-            event_failed_pull_attempt["event_id"]
-            for event_failed_pull_attempt in event_failed_pull_attempts
+
+        event_ids_with_backoff = {}
+        for event_failed_pull_attempt in event_failed_pull_attempts:
+            event_id = event_failed_pull_attempt["event_id"]
             # Exponential back-off (up to the upper bound) so we don't try to
             # pull the same event over and over. ex. 2hr, 4hr, 8hr, 16hr, etc.
-            if current_time
-            < event_failed_pull_attempt["last_attempt_ts"]
-            + (
-                2
-                ** min(
-                    event_failed_pull_attempt["num_attempts"],
-                    BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
+            backoff_end_time = (
+                event_failed_pull_attempt["last_attempt_ts"]
+                + (
+                    2
+                    ** min(
+                        event_failed_pull_attempt["num_attempts"],
+                        BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS,
+                    )
                 )
+                * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS
             )
-            * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS
-        ]
+
+            if current_time < backoff_end_time:  # `backoff_end_time` is exclusive
+                event_ids_with_backoff[event_id] = backoff_end_time
+
+        return event_ids_with_backoff
 
     async def get_missing_events(
         self,