From 99b146825a1a8257d05440ae3e331c68b8e1575a Mon Sep 17 00:00:00 2001 From: Jason Little Date: Wed, 30 Apr 2025 09:29:42 -0500 Subject: [PATCH 07/10] fix: Always recheck `/messages` pagination data if a backfill might have been needed (#28) --- synapse/handlers/federation.py | 35 +++++++++++++-------------------- synapse/handlers/pagination.py | 36 +++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a6de3e824d..ff751d25f6 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -211,7 +211,7 @@ class FederationHandler: @tag_args async def maybe_backfill( self, room_id: str, current_depth: int, limit: int, record_time: bool = True - ) -> bool: + ) -> None: """Checks the database to see if we should backfill before paginating, and if so do. @@ -225,8 +225,6 @@ class FederationHandler: should back paginate. record_time: Whether to record the time it takes to backfill. - Returns: - True if we actually tried to backfill something, otherwise False. """ # Starting the processing time here so we can include the room backfill # linearizer lock queue in the timing @@ -252,7 +250,7 @@ class FederationHandler: limit: int, *, processing_start_time: Optional[int], - ) -> bool: + ) -> None: """ Checks whether the `current_depth` is at or approaching any backfill points in the room and if so, will backfill. We only care about @@ -326,7 +324,7 @@ class FederationHandler: limit=1, ) if not have_later_backfill_points: - return False + return None logger.debug( "_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points." @@ -346,15 +344,15 @@ class FederationHandler: ) # We return `False` because we're backfilling in the background and there is # no new events immediately for the caller to know about yet. - return False + return None # Even after recursing with `MAX_DEPTH`, we didn't find any # backward extremities to backfill from. if not sorted_backfill_points: logger.debug( - "_maybe_backfill_inner: Not backfilling as no backward extremeties found." + "_maybe_backfill_inner: Not backfilling as no backward extremities found." ) - return False + return None # If we're approaching an extremity we trigger a backfill, otherwise we # no-op. @@ -373,7 +371,7 @@ class FederationHandler: current_depth, limit, ) - return False + return None # For performance's sake, we only want to paginate from a particular extremity # if we can actually see the events we'll get. Otherwise, we'd just spend a lot @@ -441,7 +439,7 @@ class FederationHandler: logger.debug( "_maybe_backfill_inner: found no extremities which would be visible" ) - return False + return None logger.debug( "_maybe_backfill_inner: extremities_to_request %s", extremities_to_request @@ -464,7 +462,7 @@ class FederationHandler: ) ) - async def try_backfill(domains: StrCollection) -> bool: + async def try_backfill(domains: StrCollection) -> None: # TODO: Should we try multiple of these at a time? # Number of contacted remote homeservers that have denied our backfill @@ -487,7 +485,7 @@ class FederationHandler: # If this succeeded then we probably already have the # appropriate stuff. # TODO: We can probably do something more intelligent here. - return True + return None except NotRetryingDestination as e: logger.info("_maybe_backfill_inner: %s", e) continue @@ -511,7 +509,7 @@ class FederationHandler: ) denied_count += 1 if denied_count >= max_denied_count: - return False + return None continue logger.info("Failed to backfill from %s because %s", dom, e) @@ -527,7 +525,7 @@ class FederationHandler: ) denied_count += 1 if denied_count >= max_denied_count: - return False + return None continue logger.info("Failed to backfill from %s because %s", dom, e) @@ -539,7 +537,7 @@ class FederationHandler: logger.exception("Failed to backfill from %s because %s", dom, e) continue - return False + return None # If we have the `processing_start_time`, then we can make an # observation. We wouldn't have the `processing_start_time` in the case @@ -551,14 +549,9 @@ class FederationHandler: (processing_end_time - processing_start_time) / 1000 ) - success = await try_backfill(likely_domains) - if success: - return True - # TODO: we could also try servers which were previously in the room, but # are no longer. - - return False + return await try_backfill(likely_domains) async def send_invite(self, target_host: str, event: EventBase) -> EventBase: """Sends the invite to the remote server for signing. diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 4070b74b7a..81cda38549 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -577,27 +577,31 @@ class PaginationHandler: or missing_too_many_events or not_enough_events_to_fill_response ): - did_backfill = await self.hs.get_federation_handler().maybe_backfill( + # Historical Note: There used to be a check here for if backfill was + # successful or not + await self.hs.get_federation_handler().maybe_backfill( room_id, curr_topo, limit=pagin_config.limit, ) - # If we did backfill something, refetch the events from the database to - # catch anything new that might have been added since we last fetched. - if did_backfill: - ( - events, - next_key, - _, - ) = await self.store.paginate_room_events_by_topological_ordering( - room_id=room_id, - from_key=from_token.room_key, - to_key=to_room_key, - direction=pagin_config.direction, - limit=pagin_config.limit, - event_filter=event_filter, - ) + # Regardless if we backfilled or not, another worker or even a + # simultaneous request may have backfilled for us while we were held + # behind the linearizer. This should not have too much additional + # database load as it will only be triggered if a backfill *might* have + # been needed + ( + events, + next_key, + _, + ) = await self.store.paginate_room_events_by_topological_ordering( + room_id=room_id, + from_key=from_token.room_key, + to_key=to_room_key, + direction=pagin_config.direction, + limit=pagin_config.limit, + event_filter=event_filter, + ) else: # Otherwise, we can backfill in the background for eventual # consistency's sake but we don't need to block the client waiting -- 2.49.0