diff --git a/changelog.d/3856.misc b/changelog.d/3856.misc
new file mode 100644
index 0000000000..36c311eb3d
--- /dev/null
+++ b/changelog.d/3856.misc
@@ -0,0 +1 @@
+Speed up purge history for rooms that have been previously purged
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index 30ff87a4c4..e7487311ce 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -1890,20 +1890,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
")"
)
- # create an index on should_delete because later we'll be looking for
- # the should_delete / shouldn't_delete subsets
- txn.execute(
- "CREATE INDEX events_to_purge_should_delete"
- " ON events_to_purge(should_delete)",
- )
-
- # We do joins against events_to_purge for e.g. calculating state
- # groups to purge, etc., so lets make an index.
- txn.execute(
- "CREATE INDEX events_to_purge_id"
- " ON events_to_purge(event_id)",
- )
-
# First ensure that we're not about to delete all the forward extremeties
txn.execute(
"SELECT e.event_id, e.depth FROM events as e "
@@ -1930,19 +1916,45 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
should_delete_params = ()
if not delete_local_events:
should_delete_expr += " AND event_id NOT LIKE ?"
- should_delete_params += ("%:" + self.hs.hostname, )
+
+ # We include the parameter twice since we use the expression twice
+ should_delete_params += (
+ "%:" + self.hs.hostname,
+ "%:" + self.hs.hostname,
+ )
should_delete_params += (room_id, token.topological)
+ # Note that we insert events that are outliers and aren't going to be
+ # deleted, as nothing will happen to them.
txn.execute(
"INSERT INTO events_to_purge"
" SELECT event_id, %s"
" FROM events AS e LEFT JOIN state_events USING (event_id)"
- " WHERE e.room_id = ? AND topological_ordering < ?" % (
+ " WHERE (NOT outlier OR (%s)) AND e.room_id = ? AND topological_ordering < ?"
+ % (
+ should_delete_expr,
should_delete_expr,
),
should_delete_params,
)
+
+ # We create the indices *after* insertion as that's a lot faster.
+
+ # create an index on should_delete because later we'll be looking for
+ # the should_delete / shouldn't_delete subsets
+ txn.execute(
+ "CREATE INDEX events_to_purge_should_delete"
+ " ON events_to_purge(should_delete)",
+ )
+
+ # We do joins against events_to_purge for e.g. calculating state
+ # groups to purge, etc., so lets make an index.
+ txn.execute(
+ "CREATE INDEX events_to_purge_id"
+ " ON events_to_purge(event_id)",
+ )
+
txn.execute(
"SELECT event_id, should_delete FROM events_to_purge"
)
|