Fix spinloop during partial state sync when a prev event is in backoff (#15351)
Previously, we would spin in a tight loop until
`update_state_for_partial_state_event` stopped raising
`FederationPullAttemptBackoffError`s. Replace the spinloop with a wait
until the backoff period has expired.
Signed-off-by: Sean Quah <seanq@matrix.org>
1 files changed, 17 insertions, 7 deletions
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 648843cdbe..982c8d3b2f 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -140,6 +140,7 @@ class FederationEventHandler:
"""
def __init__(self, hs: "HomeServer"):
+ self._clock = hs.get_clock()
self._store = hs.get_datastores().main
self._storage_controllers = hs.get_storage_controllers()
self._state_storage_controller = self._storage_controllers.state
@@ -1038,8 +1039,8 @@ class FederationEventHandler:
Raises:
FederationPullAttemptBackoffError if we are are deliberately not attempting
- to pull the given event over federation because we've already done so
- recently and are backing off.
+ to pull one of the given event's `prev_event`s over federation because
+ we've already done so recently and are backing off.
FederationError if we fail to get the state from the remote server after any
missing `prev_event`s.
"""
@@ -1053,13 +1054,22 @@ class FederationEventHandler:
# If we've already recently attempted to pull this missing event, don't
# try it again so soon. Since we have to fetch all of the prev_events, we can
# bail early here if we find any to ignore.
- prevs_to_ignore = await self._store.get_event_ids_to_not_pull_from_backoff(
- room_id, missing_prevs
+ prevs_with_pull_backoff = (
+ await self._store.get_event_ids_to_not_pull_from_backoff(
+ room_id, missing_prevs
+ )
)
- if len(prevs_to_ignore) > 0:
+ if len(prevs_with_pull_backoff) > 0:
raise FederationPullAttemptBackoffError(
- event_ids=prevs_to_ignore,
- message=f"While computing context for event={event_id}, not attempting to pull missing prev_event={prevs_to_ignore[0]} because we already tried to pull recently (backing off).",
+ event_ids=prevs_with_pull_backoff.keys(),
+ message=(
+ f"While computing context for event={event_id}, not attempting to "
+ f"pull missing prev_events={list(prevs_with_pull_backoff.keys())} "
+ "because we already tried to pull recently (backing off)."
+ ),
+ retry_after_ms=(
+ max(prevs_with_pull_backoff.values()) - self._clock.time_msec()
+ ),
)
if not missing_prevs:
|