diff --git a/changelog.d/13249.bugfix b/changelog.d/13249.bugfix
new file mode 100644
index 0000000000..79ff97b0c5
--- /dev/null
+++ b/changelog.d/13249.bugfix
@@ -0,0 +1 @@
+Prevent `Failed to fetch missing state events...` warning being logged every time we tried to fetch `state_ids` for a missing `prev_event`.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index 9e065e1116..100785ebba 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1064,7 +1064,8 @@ class FederationEventHandler:
)
logger.debug(
- "state_ids returned %i state events, %i auth events",
+ "_get_state_ids_after_missing_prev_event(event_id=%s): state_ids returned %i state events, %i auth events",
+ event_id,
len(state_event_ids),
len(auth_event_ids),
)
@@ -1072,12 +1073,17 @@ class FederationEventHandler:
# Start by checking events we already have in the DB
desired_events = set(state_event_ids)
desired_events.add(event_id)
- logger.debug("Fetching %i events from cache/store", len(desired_events))
+ logger.debug(
+ "_get_state_ids_after_missing_prev_event(event_id=%s): Fetching %i events from cache/store",
+ event_id,
+ len(desired_events),
+ )
have_events = await self._store.have_seen_events(room_id, desired_events)
missing_desired_event_ids = desired_events - have_events
logger.debug(
- "We are missing %i events (got %i)",
+ "_get_state_ids_after_missing_prev_event(event_id=%s): We are missing %i events (got %i)",
+ event_id,
len(missing_desired_event_ids),
len(have_events),
)
@@ -1094,7 +1100,11 @@ class FederationEventHandler:
missing_auth_event_ids.difference_update(
await self._store.have_seen_events(room_id, missing_auth_event_ids)
)
- logger.debug("We are also missing %i auth events", len(missing_auth_event_ids))
+ logger.debug(
+ "_get_state_ids_after_missing_prev_event(event_id=%s): We are also missing %i auth events",
+ event_id,
+ len(missing_auth_event_ids),
+ )
missing_event_ids = missing_desired_event_ids | missing_auth_event_ids
@@ -1125,10 +1135,17 @@ class FederationEventHandler:
# TODO: might it be better to have an API which lets us do an aggregate event
# request
if (len(missing_event_ids) * 10) >= len(auth_event_ids) + len(state_event_ids):
- logger.debug("Requesting complete state from remote")
+ logger.debug(
+ "_get_state_ids_after_missing_prev_event(event_id=%s): Requesting complete state from remote",
+ event_id,
+ )
await self._get_state_and_persist(destination, room_id, event_id)
else:
- logger.debug("Fetching %i events from remote", len(missing_event_ids))
+ logger.debug(
+ "_get_state_ids_after_missing_prev_event(event_id=%s): Fetching %i events from remote",
+ event_id,
+ len(missing_event_ids),
+ )
await self._get_events_and_persist(
destination=destination, room_id=room_id, event_ids=missing_event_ids
)
@@ -1147,8 +1164,9 @@ class FederationEventHandler:
# This can happen if a remote server claims that the state or
# auth_events at an event in room A are actually events in room B
logger.warning(
- "Remote server %s claims event %s in room %s is an auth/state "
+ "_get_state_ids_after_missing_prev_event(event_id=%s): Remote server %s claims event %s in room %s is an auth/state "
"event in room %s",
+ event_id,
destination,
state_event_id,
metadata.room_id,
@@ -1158,7 +1176,9 @@ class FederationEventHandler:
if metadata.state_key is None:
logger.warning(
- "Remote server gave us non-state event in state: %s", state_event_id
+ "_get_state_ids_after_missing_prev_event(event_id=%s): Remote server gave us non-state event in state: %s",
+ event_id,
+ state_event_id,
)
continue
@@ -1174,6 +1194,14 @@ class FederationEventHandler:
if not remote_event:
raise Exception("Unable to get missing prev_event %s" % (event_id,))
+ # Figure out which events we failed to fetch
+ failed_to_fetch = desired_events - event_metadata.keys()
+ # The event_id is part of the `desired_events` but isn't fetched as part
+ # of the `event_metadata` so we remove it here separately if we did find it.
+ have_event_id = await self._store.have_seen_event(room_id, event_id)
+ if have_event_id:
+ failed_to_fetch = failed_to_fetch - {event_id}
+
# missing state at that event is a warning, not a blocker
# XXX: this doesn't sound right? it means that we'll end up with incomplete
# state.
@@ -1183,7 +1211,7 @@ class FederationEventHandler:
failed_to_fetch.discard(event_id)
if failed_to_fetch:
logger.warning(
- "Failed to fetch missing state events for %s %s",
+ "_get_state_ids_after_missing_prev_event(event_id=%s): Failed to fetch missing state events %s",
event_id,
failed_to_fetch,
)
diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py
index 52914febf9..4f1dd6d6b6 100644
--- a/synapse/storage/databases/main/events_worker.py
+++ b/synapse/storage/databases/main/events_worker.py
@@ -1482,7 +1482,8 @@ class EventsWorkerStore(SQLBaseStore):
return results
- @cachedList(cached_method_name="have_seen_event", list_name="keys")
+ # TODO: The cache is giving us stale results for the `failed_to_fetch` stuff.
+ # @cachedList(cached_method_name="have_seen_event", list_name="keys")
async def _have_seen_events_dict(
self, keys: Collection[Tuple[str, str]]
) -> Dict[Tuple[str, str], bool]:
@@ -1524,7 +1525,8 @@ class EventsWorkerStore(SQLBaseStore):
await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn)
return results
- @cached(max_entries=100000, tree=True)
+ # TODO: The cache is giving us stale results for the `failed_to_fetch` stuff.
+ # @cached(max_entries=100000, tree=True)
async def have_seen_event(self, room_id: str, event_id: str) -> bool:
res = await self._have_seen_events_dict(((room_id, event_id),))
return res[(room_id, event_id)]
|