summary refs log tree commit diff
diff options
context:
space:
mode:
authorBrendan Abolivier <babolivier@matrix.org>2019-11-07 11:49:37 +0000
committerBrendan Abolivier <babolivier@matrix.org>2019-11-07 11:51:11 +0000
commit3f9b61ff9504dd88cac17fb3cb097e319babd2a3 (patch)
tree470d8df6be60f7f6b3879a980acf0098a553072a
parentFix bug which caused rejected events to be stored with the wrong room state ... (diff)
downloadsynapse-3f9b61ff9504dd88cac17fb3cb097e319babd2a3.tar.xz
Fix the SQL SELECT query in _paginate_room_events_txn
Doing a SELECT DISTINCT when paginating is quite expensive, because it requires the engine to do sorting on the entire events table. However, we only need to run it if we're filtering on 2+ labels, so this PR is changing the request so that DISTINCT is only used then.
-rw-r--r--synapse/storage/data_stores/main/stream.py15
1 files changed, 13 insertions, 2 deletions
diff --git a/synapse/storage/data_stores/main/stream.py b/synapse/storage/data_stores/main/stream.py
index 616ef91d4e..ef0b1426d1 100644
--- a/synapse/storage/data_stores/main/stream.py
+++ b/synapse/storage/data_stores/main/stream.py
@@ -871,14 +871,25 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
 
         args.append(int(limit))
 
+        # Using DISTINCT in this SELECT query is quite expensive, because it requires the
+        # engine to sort on the entire (not limited) result set, i.e. the entire events
+        # table. We only need to use it when we're filtering on more than two labels,
+        # because that's the only scenario in which we can possibly to get multiple times
+        # the same event ID in the results.
+        if event_filter.labels and len(event_filter.labels) > 1:
+            select_keywords = "SELECT DISTINCT"
+
+        else:
+            select_keywords = "SELECT"
+
         sql = (
-            "SELECT DISTINCT event_id, topological_ordering, stream_ordering"
+            "%(select_keywords)s event_id, topological_ordering, stream_ordering"
             " FROM events"
             " LEFT JOIN event_labels USING (event_id, room_id, topological_ordering)"
             " WHERE outlier = ? AND room_id = ? AND %(bounds)s"
             " ORDER BY topological_ordering %(order)s,"
             " stream_ordering %(order)s LIMIT ?"
-        ) % {"bounds": bounds, "order": order}
+        ) % {"select_keywords": select_keywords, "bounds": bounds, "order": order}
 
         txn.execute(sql, args)