diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index e7487311ce..0fb190530a 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -2025,6 +2025,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
logger.info("[purge] finding state groups which depend on redundant"
" state groups")
remaining_state_groups = []
+ unreferenced_state_groups = 0
for i in range(0, len(state_rows), 100):
chunk = [sg for sg, in state_rows[i:i + 100]]
# look for state groups whose prev_state_group is one we are about
@@ -2037,13 +2038,33 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
retcols=["state_group"],
keyvalues={},
)
- remaining_state_groups.extend(
- row["state_group"] for row in rows
- # exclude state groups we are about to delete: no point in
- # updating them
- if row["state_group"] not in state_groups_to_delete
- )
+ for row in rows:
+ sg = row["state_group"]
+
+ if sg in state_groups_to_delete:
+ # exclude state groups we are about to delete: no point in
+ # updating them
+ continue
+
+ if not self._is_state_group_referenced(txn, sg):
+ # Let's also delete unreferenced state groups while we're
+ # here, since otherwise we'd need to de-delta them
+ state_groups_to_delete.add(sg)
+ unreferenced_state_groups += 1
+ continue
+
+ remaining_state_groups.append(sg)
+
+ logger.info(
+ "[purge] found %i extra unreferenced state groups to delete",
+ unreferenced_state_groups,
+ )
+
+ logger.info(
+ "[purge] de-delta-ing %i remaining state groups",
+ len(remaining_state_groups),
+ )
# Now we turn the state groups that reference to-be-deleted state
# groups to non delta versions.
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 3f4cbd61c4..b88c7dc091 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -1041,6 +1041,56 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
return count
+ def _is_state_group_referenced(self, txn, state_group):
+ """Checks if a given state group is referenced, or is safe to delete.
+
+ A state groups is referenced if it or any of its descendants are
+ pointed at by an event. (A descendant is a group which has the given
+ state_group as a prev group)
+ """
+
+ # We check this by doing a depth first search to look for any
+ # descendant referenced by `event_to_state_groups`.
+
+ # State groups we need to check, contains state groups that are
+ # descendants of `state_group`
+ state_groups_to_search = [state_group]
+
+ # Set of state groups we've already checked
+ state_groups_searched = set()
+
+ while state_groups_to_search:
+ state_group = state_groups_to_search.pop() # Next state group to check
+
+ is_referenced = self._simple_select_one_onecol_txn(
+ txn,
+ table="event_to_state_groups",
+ keyvalues={"state_group": state_group},
+ retcol="event_id",
+ allow_none=True,
+ )
+ if is_referenced:
+ # A descendant is referenced by event_to_state_groups, so
+ # original state group is referenced.
+ return True
+
+ state_groups_searched.add(state_group)
+
+ # Find all children of current state group and add to search
+ references = self._simple_select_onecol_txn(
+ txn,
+ table="state_group_edges",
+ keyvalues={"prev_state_group": state_group},
+ retcol="state_group",
+ )
+ state_groups_to_search.extend(references)
+
+ # Lets be paranoid and check for cycles
+ if state_groups_searched.intersection(references):
+ raise Exception("State group %s has cyclic dependency", state_group)
+
+ return False
+
class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
""" Keeps track of the state at a given event.
|