diff --git a/synapse/storage/database.py b/synapse/storage/database.py index cb4a585..1196781 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -2503,6 +2503,49 @@ class DatabasePool: return txn.fetchall() +# requires database_engine.supports_using_any_list to be true +def make_select_id_if_found_sql_clause( + database_engine: BaseDatabaseEngine, + column: str, + table: str, + iterable: Collection[Any], + *, + negative: bool = False, +) -> Tuple[str, list]: + """Returns an SQL clause that checks the given column is in the iterable. + + On SQLite this expands to `column IN (?, ?, ...)`, whereas on Postgres + it expands to `column = ANY(?)`. While both DBs support the `IN` form, + using the `ANY` form on postgres means that it views queries with + different length iterables as the same, helping the query stats. + + Args: + database_engine + column: Name of the column + table: Name of the table + iterable: The values to check the column against. + negative: Whether we should check for inequality, i.e. `NOT IN` + + Returns: + A tuple of SQL query and the args + """ + # This should hopefully be faster, but also makes postgres query + # stats easier to understand. + if database_engine.supports_using_any_list: + if not negative: + clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)" + else: + clause = f"{column}_lookup AS {column} FROM UNNEST(?::bigint[]) {column}_lookup WHERE NOT EXISTS(SELECT FROM {table} WHERE {column}={column}_lookup)" + + return clause, [list(iterable)] + else: + params = ",".join("?" for _ in iterable) + if not negative: + clause = f"DISTINCT {column} FROM {table} WHERE {column} IN ({params})" + else: + clause = f"DISTINCT {column} FROM {table} WHERE {column} NOT IN ({params})" + return clause, list(iterable) + def make_in_list_sql_clause( database_engine: BaseDatabaseEngine, diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 46aa590..026f011 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -52,6 +52,7 @@ from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, LoggingTransaction, + make_select_id_if_found_sql_clause, ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore @@ -362,8 +363,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas sql = """ WITH RECURSIVE links(chain_id) AS ( SELECT - DISTINCT origin_chain_id - FROM event_auth_chain_links WHERE %s + %s UNION SELECT target_chain_id @@ -380,8 +380,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas while chains_to_fetch: batch2 = tuple(itertools.islice(chains_to_fetch, 1000)) chains_to_fetch.difference_update(batch2) - clause, args = make_in_list_sql_clause( - txn.database_engine, "origin_chain_id", batch2 + clause, args = make_select_id_if_found_sql_clause( + txn.database_engine, "origin_chain_id", "event_auth_chain_links", batch2 ) txn.execute(sql % (clause,), args)