diff options
author | Erik Johnston <erikj@jki.re> | 2018-09-13 16:14:46 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-09-13 16:14:46 +0100 |
commit | 6c0f8d9d5058e65ad33b535f14b883a167841369 (patch) | |
tree | a92c752232af4479e15b78fa4b4aa848aa9c53f4 /synapse | |
parent | Merge pull request #3859 from matrix-org/erikj/add_iterkeys (diff) | |
parent | comment (diff) | |
download | synapse-6c0f8d9d5058e65ad33b535f14b883a167841369.tar.xz |
Merge pull request #3856 from matrix-org/erikj/speed_up_purge hhs-8
Make purge history slightly faster
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/storage/events.py | 44 |
1 files changed, 28 insertions, 16 deletions
diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 30ff87a4c4..e7487311ce 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1890,20 +1890,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore ")" ) - # create an index on should_delete because later we'll be looking for - # the should_delete / shouldn't_delete subsets - txn.execute( - "CREATE INDEX events_to_purge_should_delete" - " ON events_to_purge(should_delete)", - ) - - # We do joins against events_to_purge for e.g. calculating state - # groups to purge, etc., so lets make an index. - txn.execute( - "CREATE INDEX events_to_purge_id" - " ON events_to_purge(event_id)", - ) - # First ensure that we're not about to delete all the forward extremeties txn.execute( "SELECT e.event_id, e.depth FROM events as e " @@ -1930,19 +1916,45 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore should_delete_params = () if not delete_local_events: should_delete_expr += " AND event_id NOT LIKE ?" - should_delete_params += ("%:" + self.hs.hostname, ) + + # We include the parameter twice since we use the expression twice + should_delete_params += ( + "%:" + self.hs.hostname, + "%:" + self.hs.hostname, + ) should_delete_params += (room_id, token.topological) + # Note that we insert events that are outliers and aren't going to be + # deleted, as nothing will happen to them. txn.execute( "INSERT INTO events_to_purge" " SELECT event_id, %s" " FROM events AS e LEFT JOIN state_events USING (event_id)" - " WHERE e.room_id = ? AND topological_ordering < ?" % ( + " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?" + % ( + should_delete_expr, should_delete_expr, ), should_delete_params, ) + + # We create the indices *after* insertion as that's a lot faster. + + # create an index on should_delete because later we'll be looking for + # the should_delete / shouldn't_delete subsets + txn.execute( + "CREATE INDEX events_to_purge_should_delete" + " ON events_to_purge(should_delete)", + ) + + # We do joins against events_to_purge for e.g. calculating state + # groups to purge, etc., so lets make an index. + txn.execute( + "CREATE INDEX events_to_purge_id" + " ON events_to_purge(event_id)", + ) + txn.execute( "SELECT event_id, should_delete FROM events_to_purge" ) |