summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2020-02-19 09:39:27 +0000
committerGitHub <noreply@github.com>2020-02-19 09:39:26 +0000
commit5a5abd55e8b47a7c1620c298a72817ccf73f90b0 (patch)
treece5208b306ead08a47b21bb96c46546bc3432ff7
parentImplement GET /_matrix/client/r0/rooms/{roomId}/aliases (#6939) (diff)
downloadsynapse-5a5abd55e8b47a7c1620c298a72817ccf73f90b0.tar.xz
Limit size of get_auth_chain_ids query (#6947)
-rw-r--r--changelog.d/6947.misc1
-rw-r--r--synapse/storage/data_stores/main/event_federation.py41
2 files changed, 24 insertions, 18 deletions
diff --git a/changelog.d/6947.misc b/changelog.d/6947.misc
new file mode 100644
index 0000000000..6d00e58654
--- /dev/null
+++ b/changelog.d/6947.misc
@@ -0,0 +1 @@
+Increase perf of `get_auth_chain_ids` used in state res v2.
diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py
index 1746f40adf..dcc375b840 100644
--- a/synapse/storage/data_stores/main/event_federation.py
+++ b/synapse/storage/data_stores/main/event_federation.py
@@ -62,32 +62,37 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
         )
 
     def _get_auth_chain_ids_txn(self, txn, event_ids, include_given):
+        if include_given:
+            results = set(event_ids)
+        else:
+            results = set()
+
         if isinstance(self.database_engine, PostgresEngine):
             # For efficiency we make the database do this if we can.
-            sql = """
-                WITH RECURSIVE auth_chain(event_id) AS (
-                    SELECT auth_id FROM event_auth WHERE event_id = ANY(?)
-                    UNION
-                    SELECT auth_id FROM event_auth
-                    INNER JOIN auth_chain USING (event_id)
-                )
-                SELECT event_id FROM auth_chain
-            """
-            txn.execute(sql, (list(event_ids),))
-
-            results = set(event_id for event_id, in txn)
 
-            if include_given:
-                results.update(event_ids)
+            # We need to be a little careful with querying large amounts at
+            # once, for some reason postgres really doesn't like it. We do this
+            # by only asking for auth chain of 500 events at a time.
+            event_ids = list(event_ids)
+            chunks = [event_ids[x : x + 500] for x in range(0, len(event_ids), 500)]
+            for chunk in chunks:
+                sql = """
+                    WITH RECURSIVE auth_chain(event_id) AS (
+                        SELECT auth_id FROM event_auth WHERE event_id = ANY(?)
+                        UNION
+                        SELECT auth_id FROM event_auth
+                        INNER JOIN auth_chain USING (event_id)
+                    )
+                    SELECT event_id FROM auth_chain
+                """
+                txn.execute(sql, (chunk,))
+
+                results.update(event_id for event_id, in txn)
 
             return list(results)
 
         # Database doesn't necessarily support recursive CTE, so we fall
         # back to do doing it manually.
-        if include_given:
-            results = set(event_ids)
-        else:
-            results = set()
 
         base_sql = "SELECT auth_id FROM event_auth WHERE "