Don't lock up when joining large rooms (#16903)

Co-authored-by: Andrew Morgan <andrew@amorgan.xyz>
author: Erik Johnston <erikj@element.io> 2024-02-20 14:29:18 +0000
committer: GitHub <noreply@github.com> 2024-02-20 14:29:18 +0000
commit: cdbbf3653d02e94b8c73f5533a50cab004a3f41a (patch)
tree: d3fa74976dadf98790f280b22a261923a18a8ccf
parent: bugfix: always prefer unthreaded receipt when >1 exist (MSC4102) (#16927) (diff)
download: synapse-cdbbf3653d02e94b8c73f5533a50cab004a3f41a.tar.xz
2 files changed, 18 insertions, 9 deletions
diff --git a/changelog.d/16903.bugfix b/changelog.d/16903.bugfix
new file mode 100644
index 0000000000..85a909b681
--- /dev/null
+++ b/changelog.d/16903.bugfix
@@ -0,0 +1 @@
+Fix performance issue when joining very large rooms that can cause the server to lock up. Introduced in v1.100.0.
diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index bde45308d4..83f6a25981 100644
--- a/synapse/handlers/federation_event.py
+++ b/synapse/handlers/federation_event.py
@@ -1757,17 +1757,25 @@ class FederationEventHandler:
 
             events_and_contexts_to_persist.append((event, context))
 
-        for event in sorted_auth_events:
+        for i, event in enumerate(sorted_auth_events):
             await prep(event)
 
-        await self.persist_events_and_notify(
-            room_id,
-            events_and_contexts_to_persist,
-            # Mark these events backfilled as they're historic events that will
-            # eventually be backfilled. For example, missing events we fetch
-            # during backfill should be marked as backfilled as well.
-            backfilled=True,
-        )
+            # The above function is typically not async, and so won't yield to
+            # the reactor. For large rooms let's yield to the reactor
+            # occasionally to ensure we don't block other work.
+            if (i + 1) % 1000 == 0:
+                await self._clock.sleep(0)
+
+        # Also persist the new event in batches for similar reasons as above.
+        for batch in batch_iter(events_and_contexts_to_persist, 1000):
+            await self.persist_events_and_notify(
+                room_id,
+                batch,
+                # Mark these events as backfilled as they're historic events that will
+                # eventually be backfilled. For example, missing events we fetch
+                # during backfill should be marked as backfilled as well.
+                backfilled=True,
+            )
 
     @trace
     async def _check_event_auth(
author	Erik Johnston <erikj@element.io>	2024-02-20 14:29:18 +0000
committer	GitHub <noreply@github.com>	2024-02-20 14:29:18 +0000
commit	cdbbf3653d02e94b8c73f5533a50cab004a3f41a (patch)
tree	d3fa74976dadf98790f280b22a261923a18a8ccf
parent	bugfix: always prefer unthreaded receipt when >1 exist (MSC4102) (#16927) (diff)
download	synapse-cdbbf3653d02e94b8c73f5533a50cab004a3f41a.tar.xz