summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2018-10-04 15:18:52 +0100
committerErik Johnston <erik@matrix.org>2018-10-04 16:03:06 +0100
commit17d585753f1df2b2c2b13ddb8171e174cef97aac (patch)
tree125422c61e14f088219ea86d0b990ef7132e207d
parentMerge branch 'master' into develop (diff)
downloadsynapse-17d585753f1df2b2c2b13ddb8171e174cef97aac.tar.xz
Delete unreferened state groups during purge
-rw-r--r--synapse/storage/events.py33
-rw-r--r--synapse/storage/state.py50
2 files changed, 77 insertions, 6 deletions
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index e7487311ce..0fb190530a 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -2025,6 +2025,7 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
         logger.info("[purge] finding state groups which depend on redundant"
                     " state groups")
         remaining_state_groups = []
+        unreferenced_state_groups = 0
         for i in range(0, len(state_rows), 100):
             chunk = [sg for sg, in state_rows[i:i + 100]]
             # look for state groups whose prev_state_group is one we are about
@@ -2037,13 +2038,33 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                 retcols=["state_group"],
                 keyvalues={},
             )
-            remaining_state_groups.extend(
-                row["state_group"] for row in rows
 
-                # exclude state groups we are about to delete: no point in
-                # updating them
-                if row["state_group"] not in state_groups_to_delete
-            )
+            for row in rows:
+                sg = row["state_group"]
+
+                if sg in state_groups_to_delete:
+                    # exclude state groups we are about to delete: no point in
+                    # updating them
+                    continue
+
+                if not self._is_state_group_referenced(txn, sg):
+                    # Let's also delete unreferenced state groups while we're
+                    # here, since otherwise we'd need to de-delta them
+                    state_groups_to_delete.add(sg)
+                    unreferenced_state_groups += 1
+                    continue
+
+                remaining_state_groups.append(sg)
+
+        logger.info(
+            "[purge] found %i extra unreferenced state groups to delete",
+            unreferenced_state_groups,
+        )
+
+        logger.info(
+            "[purge] de-delta-ing %i remaining state groups",
+            len(remaining_state_groups),
+        )
 
         # Now we turn the state groups that reference to-be-deleted state
         # groups to non delta versions.
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 3f4cbd61c4..b88c7dc091 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -1041,6 +1041,56 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
 
             return count
 
+    def _is_state_group_referenced(self, txn, state_group):
+        """Checks if a given state group is referenced, or is safe to delete.
+
+        A state groups is referenced if it or any of its descendants are
+        pointed at by an event. (A descendant is a group which has the given
+        state_group as a prev group)
+        """
+
+        # We check this by doing a depth first search to look for any
+        # descendant referenced by `event_to_state_groups`.
+
+        # State groups we need to check, contains state groups that are
+        # descendants of `state_group`
+        state_groups_to_search = [state_group]
+
+        # Set of state groups we've already checked
+        state_groups_searched = set()
+
+        while state_groups_to_search:
+            state_group = state_groups_to_search.pop()  # Next state group to check
+
+            is_referenced = self._simple_select_one_onecol_txn(
+                txn,
+                table="event_to_state_groups",
+                keyvalues={"state_group": state_group},
+                retcol="event_id",
+                allow_none=True,
+            )
+            if is_referenced:
+                # A descendant is referenced by event_to_state_groups, so
+                # original state group is referenced.
+                return True
+
+            state_groups_searched.add(state_group)
+
+            # Find all children of current state group and add to search
+            references = self._simple_select_onecol_txn(
+                txn,
+                table="state_group_edges",
+                keyvalues={"prev_state_group": state_group},
+                retcol="state_group",
+            )
+            state_groups_to_search.extend(references)
+
+            # Lets be paranoid and check for cycles
+            if state_groups_searched.intersection(references):
+                raise Exception("State group %s has cyclic dependency", state_group)
+
+        return False
+
 
 class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
     """ Keeps track of the state at a given event.