summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2020-04-15 10:16:35 +0100
committerGitHub <noreply@github.com>2020-04-15 10:16:35 +0100
commitf2049a8d21f1ea97085f500000865200da6d3016 (patch)
treec8edb768cc785e17e92d22ca6ec00f1e5c8c6bd2
parentFix the parameters of a test fixture (#7243) (diff)
downloadsynapse-f2049a8d21f1ea97085f500000865200da6d3016.tar.xz
Fix a potentially-huge sql query (#7274)
We could end up looking up tens of thousands of events, which could cause large
amounts of data to be logged to the postgres log.
-rw-r--r--changelog.d/7274.bugfix1
-rw-r--r--synapse/storage/data_stores/main/event_federation.py23
2 files changed, 17 insertions, 7 deletions
diff --git a/changelog.d/7274.bugfix b/changelog.d/7274.bugfix
new file mode 100644
index 0000000000..211a38befc
--- /dev/null
+++ b/changelog.d/7274.bugfix
@@ -0,0 +1 @@
+Fix a sql query introduced in Synapse 1.12.0 which could cause large amounts of logging to the postgres slow-query log.
diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py
index 62d4e9f599..b99439cc37 100644
--- a/synapse/storage/data_stores/main/event_federation.py
+++ b/synapse/storage/data_stores/main/event_federation.py
@@ -173,19 +173,28 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas
             for event_id in initial_events
         }
 
+        # The sorted list of events whose auth chains we should walk.
+        search = []  # type: List[Tuple[int, str]]
+
         # We need to get the depth of the initial events for sorting purposes.
         sql = """
             SELECT depth, event_id FROM events
             WHERE %s
-            ORDER BY depth ASC
         """
-        clause, args = make_in_list_sql_clause(
-            txn.database_engine, "event_id", initial_events
-        )
-        txn.execute(sql % (clause,), args)
+        # the list can be huge, so let's avoid looking them all up in one massive
+        # query.
+        for batch in batch_iter(initial_events, 1000):
+            clause, args = make_in_list_sql_clause(
+                txn.database_engine, "event_id", batch
+            )
+            txn.execute(sql % (clause,), args)
 
-        # The sorted list of events whose auth chains we should walk.
-        search = txn.fetchall()  # type: List[Tuple[int, str]]
+            # I think building a temporary list with fetchall is more efficient than
+            # just `search.extend(txn)`, but this is unconfirmed
+            search.extend(txn.fetchall())
+
+        # sort by depth
+        search.sort()
 
         # Map from event to its auth events
         event_to_auth_events = {}  # type: Dict[str, Set[str]]