Don't lock up when joining large rooms (#16903)
Co-authored-by: Andrew Morgan <andrew@amorgan.xyz>
2 files changed, 18 insertions, 9 deletions
diff --git a/changelog.d/16903.bugfix b/changelog.d/16903.bugfix
new file mode 100644
index 0000000000..85a909b681
--- /dev/null
+++ b/changelog.d/16903.bugfix
@@ -0,0 +1 @@
+Fix performance issue when joining very large rooms that can cause the server to lock up. Introduced in v1.100.0.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index bde45308d4..83f6a25981 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1757,17 +1757,25 @@ class FederationEventHandler:
events_and_contexts_to_persist.append((event, context))
- for event in sorted_auth_events:
+ for i, event in enumerate(sorted_auth_events):
await prep(event)
- await self.persist_events_and_notify(
- room_id,
- events_and_contexts_to_persist,
- # Mark these events backfilled as they're historic events that will
- # eventually be backfilled. For example, missing events we fetch
- # during backfill should be marked as backfilled as well.
- backfilled=True,
- )
+ # The above function is typically not async, and so won't yield to
+ # the reactor. For large rooms let's yield to the reactor
+ # occasionally to ensure we don't block other work.
+ if (i + 1) % 1000 == 0:
+ await self._clock.sleep(0)
+
+ # Also persist the new event in batches for similar reasons as above.
+ for batch in batch_iter(events_and_contexts_to_persist, 1000):
+ await self.persist_events_and_notify(
+ room_id,
+ batch,
+ # Mark these events as backfilled as they're historic events that will
+ # eventually be backfilled. For example, missing events we fetch
+ # during backfill should be marked as backfilled as well.
+ backfilled=True,
+ )
@trace
async def _check_event_auth(
|