From e17e5c97e0d2be7a82c782e8ef9774e682968c7b Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 1 Aug 2022 16:45:39 +0000 Subject: Faster Room Joins: don't leave a stuck room partial state flag if the join fails. (#13403) --- synapse/handlers/federation.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'synapse/handlers/federation.py') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3b5eaf5156..1cf6cb32e3 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -546,9 +546,9 @@ class FederationHandler: ) if ret.partial_state: - # TODO(faster_joins): roll this back if we don't manage to start the - # background resync (eg process_remote_join fails) - # https://github.com/matrix-org/synapse/issues/12998 + # Mark the room as having partial state. + # The background process is responsible for unmarking this flag, + # even if the join fails. await self.store.store_partial_state_room(room_id, ret.servers_in_room) try: @@ -574,17 +574,21 @@ class FederationHandler: room_id, ) raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0) - - if ret.partial_state: - # Kick off the process of asynchronously fetching the state for this - # room. - run_as_background_process( - desc="sync_partial_state_room", - func=self._sync_partial_state_room, - initial_destination=origin, - other_destinations=ret.servers_in_room, - room_id=room_id, - ) + finally: + # Always kick off the background process that asynchronously fetches + # state for the room. + # If the join failed, the background process is responsible for + # cleaning up — including unmarking the room as a partial state room. + if ret.partial_state: + # Kick off the process of asynchronously fetching the state for this + # room. + run_as_background_process( + desc="sync_partial_state_room", + func=self._sync_partial_state_room, + initial_destination=origin, + other_destinations=ret.servers_in_room, + room_id=room_id, + ) # We wait here until this instance has seen the events come down # replication (if we're using replication) as the below uses caches. -- cgit 1.4.1