summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/13249.bugfix1
-rw-r--r--synapse/handlers/federation_event.py46
-rw-r--r--synapse/storage/databases/main/events_worker.py6
3 files changed, 42 insertions, 11 deletions
diff --git a/changelog.d/13249.bugfix b/changelog.d/13249.bugfix
new file mode 100644
index 0000000000..79ff97b0c5
--- /dev/null
+++ b/changelog.d/13249.bugfix
@@ -0,0 +1 @@
+Prevent `Failed to fetch missing state events...` warning being logged every time we tried to fetch `state_ids` for a missing `prev_event`.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 9e065e1116..100785ebba 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1064,7 +1064,8 @@ class FederationEventHandler:
         )
 
         logger.debug(
-            "state_ids returned %i state events, %i auth events",
+            "_get_state_ids_after_missing_prev_event(event_id=%s): state_ids returned %i state events, %i auth events",
+            event_id,
             len(state_event_ids),
             len(auth_event_ids),
         )
@@ -1072,12 +1073,17 @@ class FederationEventHandler:
         # Start by checking events we already have in the DB
         desired_events = set(state_event_ids)
         desired_events.add(event_id)
-        logger.debug("Fetching %i events from cache/store", len(desired_events))
+        logger.debug(
+            "_get_state_ids_after_missing_prev_event(event_id=%s): Fetching %i events from cache/store",
+            event_id,
+            len(desired_events),
+        )
         have_events = await self._store.have_seen_events(room_id, desired_events)
 
         missing_desired_event_ids = desired_events - have_events
         logger.debug(
-            "We are missing %i events (got %i)",
+            "_get_state_ids_after_missing_prev_event(event_id=%s): We are missing %i events (got %i)",
+            event_id,
             len(missing_desired_event_ids),
             len(have_events),
         )
@@ -1094,7 +1100,11 @@ class FederationEventHandler:
         missing_auth_event_ids.difference_update(
             await self._store.have_seen_events(room_id, missing_auth_event_ids)
         )
-        logger.debug("We are also missing %i auth events", len(missing_auth_event_ids))
+        logger.debug(
+            "_get_state_ids_after_missing_prev_event(event_id=%s): We are also missing %i auth events",
+            event_id,
+            len(missing_auth_event_ids),
+        )
 
         missing_event_ids = missing_desired_event_ids | missing_auth_event_ids
 
@@ -1125,10 +1135,17 @@ class FederationEventHandler:
         # TODO: might it be better to have an API which lets us do an aggregate event
         #   request
         if (len(missing_event_ids) * 10) >= len(auth_event_ids) + len(state_event_ids):
-            logger.debug("Requesting complete state from remote")
+            logger.debug(
+                "_get_state_ids_after_missing_prev_event(event_id=%s): Requesting complete state from remote",
+                event_id,
+            )
             await self._get_state_and_persist(destination, room_id, event_id)
         else:
-            logger.debug("Fetching %i events from remote", len(missing_event_ids))
+            logger.debug(
+                "_get_state_ids_after_missing_prev_event(event_id=%s): Fetching %i events from remote",
+                event_id,
+                len(missing_event_ids),
+            )
             await self._get_events_and_persist(
                 destination=destination, room_id=room_id, event_ids=missing_event_ids
             )
@@ -1147,8 +1164,9 @@ class FederationEventHandler:
                 # This can happen if a remote server claims that the state or
                 # auth_events at an event in room A are actually events in room B
                 logger.warning(
-                    "Remote server %s claims event %s in room %s is an auth/state "
+                    "_get_state_ids_after_missing_prev_event(event_id=%s):  Remote server %s claims event %s in room %s is an auth/state "
                     "event in room %s",
+                    event_id,
                     destination,
                     state_event_id,
                     metadata.room_id,
@@ -1158,7 +1176,9 @@ class FederationEventHandler:
 
             if metadata.state_key is None:
                 logger.warning(
-                    "Remote server gave us non-state event in state: %s", state_event_id
+                    "_get_state_ids_after_missing_prev_event(event_id=%s): Remote server gave us non-state event in state: %s",
+                    event_id,
+                    state_event_id,
                 )
                 continue
 
@@ -1174,6 +1194,14 @@ class FederationEventHandler:
         if not remote_event:
             raise Exception("Unable to get missing prev_event %s" % (event_id,))
 
+        # Figure out which events we failed to fetch
+        failed_to_fetch = desired_events - event_metadata.keys()
+        # The event_id is part of the `desired_events` but isn't fetched as part
+        # of the `event_metadata` so we remove it here separately if we did find it.
+        have_event_id = await self._store.have_seen_event(room_id, event_id)
+        if have_event_id:
+            failed_to_fetch = failed_to_fetch - {event_id}
+
         # missing state at that event is a warning, not a blocker
         # XXX: this doesn't sound right? it means that we'll end up with incomplete
         #   state.
@@ -1183,7 +1211,7 @@ class FederationEventHandler:
         failed_to_fetch.discard(event_id)
         if failed_to_fetch:
             logger.warning(
-                "Failed to fetch missing state events for %s %s",
+                "_get_state_ids_after_missing_prev_event(event_id=%s): Failed to fetch missing state events %s",
                 event_id,
                 failed_to_fetch,
             )
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 52914febf9..4f1dd6d6b6 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1482,7 +1482,8 @@ class EventsWorkerStore(SQLBaseStore):
 
         return results
 
-    @cachedList(cached_method_name="have_seen_event", list_name="keys")
+    # TODO: The cache is giving us stale results for the `failed_to_fetch` stuff.
+    # @cachedList(cached_method_name="have_seen_event", list_name="keys")
     async def _have_seen_events_dict(
         self, keys: Collection[Tuple[str, str]]
     ) -> Dict[Tuple[str, str], bool]:
@@ -1524,7 +1525,8 @@ class EventsWorkerStore(SQLBaseStore):
         await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn)
         return results
 
-    @cached(max_entries=100000, tree=True)
+    # TODO: The cache is giving us stale results for the `failed_to_fetch` stuff.
+    # @cached(max_entries=100000, tree=True)
     async def have_seen_event(self, room_id: str, event_id: str) -> bool:
         res = await self._have_seen_events_dict(((room_id, event_id),))
         return res[(room_id, event_id)]