1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
From 064fbcdd10c15eea7f695b693c7ee1ef99b6b606 Mon Sep 17 00:00:00 2001
From: Rory& <root@rory.gay>
Date: Tue, 22 Jul 2025 05:07:01 +0200
Subject: [PATCH 08/19] Fast auth links
Signed-off-by: Rory& <root@rory.gay>
---
synapse/storage/database.py | 43 +++++++++++++++++++
.../databases/main/event_federation.py | 8 ++--
2 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 6e38b55686..2bab1e53c5 100644
--- a/synapse/storage/database.py
+++ b/synapse/storage/database.py
@@ -2608,6 +2608,49 @@ class DatabasePool:
return txn.fetchall()
+# requires database_engine.supports_using_any_list to be true
+def make_select_id_if_found_sql_clause(
+ database_engine: BaseDatabaseEngine,
+ column: str,
+ table: str,
+ iterable: Collection[Any],
+ *,
+ negative: bool = False,
+) -> tuple[str, list]:
+ """Returns an SQL clause that checks the given column is in the iterable.
+
+ On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres
+ it expands to `column = ANY(?)`. While both DBs support the `IN` form,
+ using the `ANY` form on postgres means that it views queries with
+ different length iterables as the same, helping the query stats.
+
+ Args:
+ database_engine
+ column: Name of the column
+ table: Name of the table
+ iterable: The values to check the column against.
+ negative: Whether we should check for inequality, i.e. `NOT IN`
+
+ Returns:
+ A tuple of SQL query and the args
+ """
+ # This should hopefully be faster, but also makes postgres query
+ # stats easier to understand.
+ if database_engine.supports_using_any_list:
+ if not negative:
+ clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)"
+ else:
+ clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE NOT EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)"
+
+ return clause, [list(iterable)]
+ else:
+ params = ",".join("?" for _ in iterable)
+ if not negative:
+ clause = f"DISTINCT {column} FROM {table} WHERE {column} IN ({params})"
+ else:
+ clause = f"DISTINCT {column} FROM {table} WHERE {column} NOT IN ({params})"
+ return clause, list(iterable)
+
def make_in_list_sql_clause(
database_engine: BaseDatabaseEngine,
diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index 415926eb0a..0e34a3ffc3 100644
--- a/synapse/storage/databases/main/event_federation.py
+++ b/synapse/storage/databases/main/event_federation.py
@@ -47,6 +47,7 @@ from synapse.storage.database import (
DatabasePool,
LoggingDatabaseConnection,
LoggingTransaction,
+ make_select_id_if_found_sql_clause,
)
from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
from synapse.storage.databases.main.events_worker import EventsWorkerStore
@@ -384,8 +385,7 @@ class EventFederationWorkerStore(
sql = """
WITH RECURSIVE links(chain_id) AS (
SELECT
- DISTINCT origin_chain_id
- FROM event_auth_chain_links WHERE %s
+ %s
UNION
SELECT
target_chain_id
@@ -402,8 +402,8 @@ class EventFederationWorkerStore(
while chains_to_fetch:
batch2 = tuple(itertools.islice(chains_to_fetch, 1000))
chains_to_fetch.difference_update(batch2)
- clause, args = make_in_list_sql_clause(
- txn.database_engine, "origin_chain_id", batch2
+ clause, args = make_select_id_if_found_sql_clause(
+ txn.database_engine, "origin_chain_id", "event_auth_chain_links", batch2
)
txn.execute(sql % (clause,), args)
--
2.53.0
|