diff options
-rw-r--r-- | changelog.d/13249.bugfix | 1 | ||||
-rw-r--r-- | synapse/handlers/federation_event.py | 46 | ||||
-rw-r--r-- | synapse/storage/databases/main/events_worker.py | 6 |
3 files changed, 42 insertions, 11 deletions
diff --git a/changelog.d/13249.bugfix b/changelog.d/13249.bugfix new file mode 100644 index 0000000000..79ff97b0c5 --- /dev/null +++ b/changelog.d/13249.bugfix @@ -0,0 +1 @@ +Prevent `Failed to fetch missing state events...` warning being logged every time we tried to fetch `state_ids` for a missing `prev_event`. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 9e065e1116..100785ebba 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1064,7 +1064,8 @@ class FederationEventHandler: ) logger.debug( - "state_ids returned %i state events, %i auth events", + "_get_state_ids_after_missing_prev_event(event_id=%s): state_ids returned %i state events, %i auth events", + event_id, len(state_event_ids), len(auth_event_ids), ) @@ -1072,12 +1073,17 @@ class FederationEventHandler: # Start by checking events we already have in the DB desired_events = set(state_event_ids) desired_events.add(event_id) - logger.debug("Fetching %i events from cache/store", len(desired_events)) + logger.debug( + "_get_state_ids_after_missing_prev_event(event_id=%s): Fetching %i events from cache/store", + event_id, + len(desired_events), + ) have_events = await self._store.have_seen_events(room_id, desired_events) missing_desired_event_ids = desired_events - have_events logger.debug( - "We are missing %i events (got %i)", + "_get_state_ids_after_missing_prev_event(event_id=%s): We are missing %i events (got %i)", + event_id, len(missing_desired_event_ids), len(have_events), ) @@ -1094,7 +1100,11 @@ class FederationEventHandler: missing_auth_event_ids.difference_update( await self._store.have_seen_events(room_id, missing_auth_event_ids) ) - logger.debug("We are also missing %i auth events", len(missing_auth_event_ids)) + logger.debug( + "_get_state_ids_after_missing_prev_event(event_id=%s): We are also missing %i auth events", + event_id, + len(missing_auth_event_ids), + ) missing_event_ids = missing_desired_event_ids | missing_auth_event_ids @@ -1125,10 +1135,17 @@ class FederationEventHandler: # TODO: might it be better to have an API which lets us do an aggregate event # request if (len(missing_event_ids) * 10) >= len(auth_event_ids) + len(state_event_ids): - logger.debug("Requesting complete state from remote") + logger.debug( + "_get_state_ids_after_missing_prev_event(event_id=%s): Requesting complete state from remote", + event_id, + ) await self._get_state_and_persist(destination, room_id, event_id) else: - logger.debug("Fetching %i events from remote", len(missing_event_ids)) + logger.debug( + "_get_state_ids_after_missing_prev_event(event_id=%s): Fetching %i events from remote", + event_id, + len(missing_event_ids), + ) await self._get_events_and_persist( destination=destination, room_id=room_id, event_ids=missing_event_ids ) @@ -1147,8 +1164,9 @@ class FederationEventHandler: # This can happen if a remote server claims that the state or # auth_events at an event in room A are actually events in room B logger.warning( - "Remote server %s claims event %s in room %s is an auth/state " + "_get_state_ids_after_missing_prev_event(event_id=%s): Remote server %s claims event %s in room %s is an auth/state " "event in room %s", + event_id, destination, state_event_id, metadata.room_id, @@ -1158,7 +1176,9 @@ class FederationEventHandler: if metadata.state_key is None: logger.warning( - "Remote server gave us non-state event in state: %s", state_event_id + "_get_state_ids_after_missing_prev_event(event_id=%s): Remote server gave us non-state event in state: %s", + event_id, + state_event_id, ) continue @@ -1174,6 +1194,14 @@ class FederationEventHandler: if not remote_event: raise Exception("Unable to get missing prev_event %s" % (event_id,)) + # Figure out which events we failed to fetch + failed_to_fetch = desired_events - event_metadata.keys() + # The event_id is part of the `desired_events` but isn't fetched as part + # of the `event_metadata` so we remove it here separately if we did find it. + have_event_id = await self._store.have_seen_event(room_id, event_id) + if have_event_id: + failed_to_fetch = failed_to_fetch - {event_id} + # missing state at that event is a warning, not a blocker # XXX: this doesn't sound right? it means that we'll end up with incomplete # state. @@ -1183,7 +1211,7 @@ class FederationEventHandler: failed_to_fetch.discard(event_id) if failed_to_fetch: logger.warning( - "Failed to fetch missing state events for %s %s", + "_get_state_ids_after_missing_prev_event(event_id=%s): Failed to fetch missing state events %s", event_id, failed_to_fetch, ) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 52914febf9..4f1dd6d6b6 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1482,7 +1482,8 @@ class EventsWorkerStore(SQLBaseStore): return results - @cachedList(cached_method_name="have_seen_event", list_name="keys") + # TODO: The cache is giving us stale results for the `failed_to_fetch` stuff. + # @cachedList(cached_method_name="have_seen_event", list_name="keys") async def _have_seen_events_dict( self, keys: Collection[Tuple[str, str]] ) -> Dict[Tuple[str, str], bool]: @@ -1524,7 +1525,8 @@ class EventsWorkerStore(SQLBaseStore): await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn) return results - @cached(max_entries=100000, tree=True) + # TODO: The cache is giving us stale results for the `failed_to_fetch` stuff. + # @cached(max_entries=100000, tree=True) async def have_seen_event(self, room_id: str, event_id: str) -> bool: res = await self._have_seen_events_dict(((room_id, event_id),)) return res[(room_id, event_id)] |