diff options
author | Richard van der Hoff <1389908+richvdh@users.noreply.github.com> | 2020-04-15 10:16:35 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-15 10:16:35 +0100 |
commit | f2049a8d21f1ea97085f500000865200da6d3016 (patch) | |
tree | c8edb768cc785e17e92d22ca6ec00f1e5c8c6bd2 /synapse/storage/data_stores | |
parent | Fix the parameters of a test fixture (#7243) (diff) | |
download | synapse-f2049a8d21f1ea97085f500000865200da6d3016.tar.xz |
Fix a potentially-huge sql query (#7274)
We could end up looking up tens of thousands of events, which could cause large amounts of data to be logged to the postgres log.
Diffstat (limited to 'synapse/storage/data_stores')
-rw-r--r-- | synapse/storage/data_stores/main/event_federation.py | 23 |
1 files changed, 16 insertions, 7 deletions
diff --git a/synapse/storage/data_stores/main/event_federation.py b/synapse/storage/data_stores/main/event_federation.py index 62d4e9f599..b99439cc37 100644 --- a/synapse/storage/data_stores/main/event_federation.py +++ b/synapse/storage/data_stores/main/event_federation.py @@ -173,19 +173,28 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas for event_id in initial_events } + # The sorted list of events whose auth chains we should walk. + search = [] # type: List[Tuple[int, str]] + # We need to get the depth of the initial events for sorting purposes. sql = """ SELECT depth, event_id FROM events WHERE %s - ORDER BY depth ASC """ - clause, args = make_in_list_sql_clause( - txn.database_engine, "event_id", initial_events - ) - txn.execute(sql % (clause,), args) + # the list can be huge, so let's avoid looking them all up in one massive + # query. + for batch in batch_iter(initial_events, 1000): + clause, args = make_in_list_sql_clause( + txn.database_engine, "event_id", batch + ) + txn.execute(sql % (clause,), args) - # The sorted list of events whose auth chains we should walk. - search = txn.fetchall() # type: List[Tuple[int, str]] + # I think building a temporary list with fetchall is more efficient than + # just `search.extend(txn)`, but this is unconfirmed + search.extend(txn.fetchall()) + + # sort by depth + search.sort() # Map from event to its auth events event_to_auth_events = {} # type: Dict[str, Set[str]] |