diff options
author | Eric Eastwood <erice@element.io> | 2022-09-14 13:57:50 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-14 13:57:50 -0500 |
commit | 957e3d74fc70f92bb9ed3c709f87752bf77a8c90 (patch) | |
tree | cf177f3d8a17f7bade6c9da71ddc14239f02d196 /synapse/storage/databases | |
parent | Update event push action and receipt tables to support threads. (#13753) (diff) | |
download | synapse-957e3d74fc70f92bb9ed3c709f87752bf77a8c90.tar.xz |
Keep track when we try and fail to process a pulled event (#13589)
We can follow-up this PR with: 1. Only try to backfill from an event if we haven't tried recently -> https://github.com/matrix-org/synapse/issues/13622 1. When we decide to backfill that event again, process it in the background so it doesn't block and make `/messages` slow when we know it will probably fail again -> https://github.com/matrix-org/synapse/issues/13623 1. Generally track failures everywhere we try and fail to pull an event over federation -> https://github.com/matrix-org/synapse/issues/13700 Fix https://github.com/matrix-org/synapse/issues/13621 Part of https://github.com/matrix-org/synapse/issues/13356 Mentioned in [internal doc](https://docs.google.com/document/d/1lvUoVfYUiy6UaHB6Rb4HicjaJAU40-APue9Q4vzuW3c/edit#bookmark=id.qv7cj51sv9i5)
Diffstat (limited to 'synapse/storage/databases')
-rw-r--r-- | synapse/storage/databases/main/event_federation.py | 45 | ||||
-rw-r--r-- | synapse/storage/databases/main/events.py | 32 |
2 files changed, 68 insertions, 9 deletions
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ca47a22bf1..ef477978ed 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1294,6 +1294,51 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas return event_id_results + @trace + async def record_event_failed_pull_attempt( + self, room_id: str, event_id: str, cause: str + ) -> None: + """ + Record when we fail to pull an event over federation. + + This information allows us to be more intelligent when we decide to + retry (we don't need to fail over and over) and we can process that + event in the background so we don't block on it each time. + + Args: + room_id: The room where the event failed to pull from + event_id: The event that failed to be fetched or processed + cause: The error message or reason that we failed to pull the event + """ + await self.db_pool.runInteraction( + "record_event_failed_pull_attempt", + self._record_event_failed_pull_attempt_upsert_txn, + room_id, + event_id, + cause, + db_autocommit=True, # Safe as it's a single upsert + ) + + def _record_event_failed_pull_attempt_upsert_txn( + self, + txn: LoggingTransaction, + room_id: str, + event_id: str, + cause: str, + ) -> None: + sql = """ + INSERT INTO event_failed_pull_attempts ( + room_id, event_id, num_attempts, last_attempt_ts, last_cause + ) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT (room_id, event_id) DO UPDATE SET + num_attempts=event_failed_pull_attempts.num_attempts + 1, + last_attempt_ts=EXCLUDED.last_attempt_ts, + last_cause=EXCLUDED.last_cause; + """ + + txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause)) + async def get_missing_events( self, room_id: str, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c0b4080e4b..1b54a2eb57 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2435,17 +2435,31 @@ class PersistEventsStore: "DELETE FROM event_backward_extremities" " WHERE event_id = ? AND room_id = ?" ) + backward_extremity_tuples_to_remove = [ + (ev.event_id, ev.room_id) + for ev in events + if not ev.internal_metadata.is_outlier() + # If we encountered an event with no prev_events, then we might + # as well remove it now because it won't ever have anything else + # to backfill from. + or len(ev.prev_event_ids()) == 0 + ] txn.execute_batch( query, - [ - (ev.event_id, ev.room_id) - for ev in events - if not ev.internal_metadata.is_outlier() - # If we encountered an event with no prev_events, then we might - # as well remove it now because it won't ever have anything else - # to backfill from. - or len(ev.prev_event_ids()) == 0 - ], + backward_extremity_tuples_to_remove, + ) + + # Clear out the failed backfill attempts after we successfully pulled + # the event. Since we no longer need these events as backward + # extremities, it also means that they won't be backfilled from again so + # we no longer need to store the backfill attempts around it. + query = """ + DELETE FROM event_failed_pull_attempts + WHERE event_id = ? and room_id = ? + """ + txn.execute_batch( + query, + backward_extremity_tuples_to_remove, ) |