From 064fbcdd10c15eea7f695b693c7ee1ef99b6b606 Mon Sep 17 00:00:00 2001 From: Rory& Date: Tue, 22 Jul 2025 05:07:01 +0200 Subject: [PATCH 08/19] Fast auth links Signed-off-by: Rory& --- synapse/storage/database.py | 43 +++++++++++++++++++ .../databases/main/event_federation.py | 8 ++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 6e38b55686..2bab1e53c5 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -2608,6 +2608,49 @@ class DatabasePool: return txn.fetchall() +# requires database_engine.supports_using_any_list to be true +def make_select_id_if_found_sql_clause( + database_engine: BaseDatabaseEngine, + column: str, + table: str, + iterable: Collection[Any], + *, + negative: bool = False, +) -> tuple[str, list]: + """Returns an SQL clause that checks the given column is in the iterable. + + On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres + it expands to `column = ANY(?)`. While both DBs support the `IN` form, + using the `ANY` form on postgres means that it views queries with + different length iterables as the same, helping the query stats. + + Args: + database_engine + column: Name of the column + table: Name of the table + iterable: The values to check the column against. + negative: Whether we should check for inequality, i.e. `NOT IN` + + Returns: + A tuple of SQL query and the args + """ + # This should hopefully be faster, but also makes postgres query + # stats easier to understand. + if database_engine.supports_using_any_list: + if not negative: + clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)" + else: + clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE NOT EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)" + + return clause, [list(iterable)] + else: + params = ",".join("?" for _ in iterable) + if not negative: + clause = f"DISTINCT {column} FROM {table} WHERE {column} IN ({params})" + else: + clause = f"DISTINCT {column} FROM {table} WHERE {column} NOT IN ({params})" + return clause, list(iterable) + def make_in_list_sql_clause( database_engine: BaseDatabaseEngine, diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 415926eb0a..0e34a3ffc3 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -47,6 +47,7 @@ from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, LoggingTransaction, + make_select_id_if_found_sql_clause, ) from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore @@ -384,8 +385,7 @@ class EventFederationWorkerStore( sql = """ WITH RECURSIVE links(chain_id) AS ( SELECT - DISTINCT origin_chain_id - FROM event_auth_chain_links WHERE %s + %s UNION SELECT target_chain_id @@ -402,8 +402,8 @@ class EventFederationWorkerStore( while chains_to_fetch: batch2 = tuple(itertools.islice(chains_to_fetch, 1000)) chains_to_fetch.difference_update(batch2) - clause, args = make_in_list_sql_clause( - txn.database_engine, "origin_chain_id", batch2 + clause, args = make_select_id_if_found_sql_clause( + txn.database_engine, "origin_chain_id", "event_auth_chain_links", batch2 ) txn.execute(sql % (clause,), args) -- 2.53.0