1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index cb4a585..1196781 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -2503,6 +2503,49 @@ class DatabasePool:
return txn.fetchall()
+# requires database_engine.supports_using_any_list to be true
+def make_select_id_if_found_sql_clause(
+ database_engine: BaseDatabaseEngine,
+ column: str,
+ table: str,
+ iterable: Collection[Any],
+ *,
+ negative: bool = False,
+) -> Tuple[str, list]:
+ """Returns an SQL clause that checks the given column is in the iterable.
+
+ On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres
+ it expands to `column = ANY(?)`. While both DBs support the `IN` form,
+ using the `ANY` form on postgres means that it views queries with
+ different length iterables as the same, helping the query stats.
+
+ Args:
+ database_engine
+ column: Name of the column
+ table: Name of the table
+ iterable: The values to check the column against.
+ negative: Whether we should check for inequality, i.e. `NOT IN`
+
+ Returns:
+ A tuple of SQL query and the args
+ """
+ # This should hopefully be faster, but also makes postgres query
+ # stats easier to understand.
+ if database_engine.supports_using_any_list:
+ if not negative:
+ clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)"
+ else:
+ clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE NOT EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)"
+
+ return clause, [list(iterable)]
+ else:
+ params = ",".join("?" for _ in iterable)
+ if not negative:
+ clause = f"DISTINCT {column} FROM {table} WHERE {column} IN ({params})"
+ else:
+ clause = f"DISTINCT {column} FROM {table} WHERE {column} NOT IN ({params})"
+ return clause, list(iterable)
+
def make_in_list_sql_clause(
database_engine: BaseDatabaseEngine,
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 46aa590..026f011 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -52,6 +52,7 @@ from synapse.storage.database import (
DatabasePool,
LoggingDatabaseConnection,
LoggingTransaction,
+ make_select_id_if_found_sql_clause,
)
from synapse.storage.databases.main.events_worker import EventsWorkerStore
from synapse.storage.databases.main.signatures import SignatureWorkerStore
@@ -362,8 +363,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
sql = """
WITH RECURSIVE links(chain_id) AS (
SELECT
- DISTINCT origin_chain_id
- FROM event_auth_chain_links WHERE %s
+ %s
UNION
SELECT
target_chain_id
@@ -380,8 +380,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas
while chains_to_fetch:
batch2 = tuple(itertools.islice(chains_to_fetch, 1000))
chains_to_fetch.difference_update(batch2)
- clause, args = make_in_list_sql_clause(
- txn.database_engine, "origin_chain_id", batch2
+ clause, args = make_select_id_if_found_sql_clause(
+ txn.database_engine, "origin_chain_id", "event_auth_chain_links", batch2
)
txn.execute(sql % (clause,), args)
|