summary refs log tree commit diff
path: root/packages/overlays/matrix-synapse/patches/synapse-fast-links.patch
blob: c35ba879a646c324eec521ceabe7e69dae84daf4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index cb4a585..1196781 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -2503,6 +2503,49 @@ class DatabasePool:
 
         return txn.fetchall()
 
+# requires database_engine.supports_using_any_list to be true
+def make_select_id_if_found_sql_clause(
+    database_engine: BaseDatabaseEngine,
+    column: str,
+    table: str,
+    iterable: Collection[Any],
+    *,
+    negative: bool = False,
+) -> Tuple[str, list]:
+    """Returns an SQL clause that checks the given column is in the iterable.
+
+    On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres
+    it expands to `column = ANY(?)`. While both DBs support the `IN` form,
+    using the `ANY` form on postgres means that it views queries with
+    different length iterables as the same, helping the query stats.
+
+    Args:
+        database_engine
+        column: Name of the column
+        table: Name of the table
+        iterable: The values to check the column against.
+        negative: Whether we should check for inequality, i.e. `NOT IN`
+
+    Returns:
+        A tuple of SQL query and the args
+    """
+    # This should hopefully be faster, but also makes postgres query
+    # stats easier to understand.
+    if database_engine.supports_using_any_list:
+        if not negative:
+            clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)"
+        else:
+            clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE NOT EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)"
+
+        return clause, [list(iterable)]
+    else:
+        params = ",".join("?" for _ in iterable)
+        if not negative:
+            clause = f"DISTINCT {column} FROM {table} WHERE {column} IN ({params})"
+        else:
+            clause = f"DISTINCT {column} FROM {table} WHERE {column} NOT IN ({params})"
+        return clause, list(iterable)
+
 
 def make_in_list_sql_clause(
     database_engine: BaseDatabaseEngine,
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 46aa590..026f011 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -52,6 +52,7 @@ from synapse.storage.database import (
     DatabasePool,
     LoggingDatabaseConnection,
     LoggingTransaction,
+    make_select_id_if_found_sql_clause,
 )
 from synapse.storage.databases.main.events_worker import EventsWorkerStore
 from synapse.storage.databases.main.signatures import SignatureWorkerStore
@@ -362,8 +363,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         sql = """
             WITH RECURSIVE links(chain_id) AS (
                 SELECT
-                    DISTINCT origin_chain_id
-                FROM event_auth_chain_links WHERE %s
+                    %s
                 UNION
                 SELECT
                     target_chain_id
@@ -380,8 +380,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
         while chains_to_fetch:
             batch2 = tuple(itertools.islice(chains_to_fetch, 1000))
             chains_to_fetch.difference_update(batch2)
-            clause, args = make_in_list_sql_clause(
-                txn.database_engine, "origin_chain_id", batch2
+            clause, args = make_select_id_if_found_sql_clause(
+                txn.database_engine, "origin_chain_id", "event_auth_chain_links", batch2
             )
             txn.execute(sql % (clause,), args)