summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2020-07-15 18:33:03 +0100
committerGitHub <noreply@github.com>2020-07-15 18:33:03 +0100
commita57df9b82750a10391811a6de974093cd2b53387 (patch)
tree583a045fbd7319846f587bea6e8ef499e87f11e3 /synapse/storage
parentMerge branch 'erikj/faster_typing' of github.com:matrix-org/synapse into develop (diff)
downloadsynapse-a57df9b82750a10391811a6de974093cd2b53387.tar.xz
Avoid brand new rooms in `delete_old_current_state_events` (#7854)
When considering rooms to clean up in `delete_old_current_state_events`, skip
rooms which we are creating, which otherwise look a bit like rooms we have
left.

Fixes #7834.
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/data_stores/main/state.py65
1 files changed, 55 insertions, 10 deletions
diff --git a/synapse/storage/data_stores/main/state.py b/synapse/storage/data_stores/main/state.py
index 347cc50778..bb38a04ede 100644
--- a/synapse/storage/data_stores/main/state.py
+++ b/synapse/storage/data_stores/main/state.py
@@ -353,6 +353,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
         last_room_id = progress.get("last_room_id", "")
 
         def _background_remove_left_rooms_txn(txn):
+            # get a batch of room ids to consider
             sql = """
                 SELECT DISTINCT room_id FROM current_state_events
                 WHERE room_id > ? ORDER BY room_id LIMIT ?
@@ -363,24 +364,68 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
             if not room_ids:
                 return True, set()
 
+            ###########################################################################
+            #
+            # exclude rooms where we have active members
+
             sql = """
                 SELECT room_id
-                FROM current_state_events
+                FROM local_current_membership
                 WHERE
                     room_id > ? AND room_id <= ?
-                    AND type = 'm.room.member'
                     AND membership = 'join'
-                    AND state_key LIKE ?
                 GROUP BY room_id
             """
 
-            txn.execute(sql, (last_room_id, room_ids[-1], "%:" + self.server_name))
-
+            txn.execute(sql, (last_room_id, room_ids[-1]))
             joined_room_ids = {row[0] for row in txn}
+            to_delete = set(room_ids) - joined_room_ids
+
+            ###########################################################################
+            #
+            # exclude rooms which we are in the process of constructing; these otherwise
+            # qualify as "rooms with no local users", and would have their
+            # forward extremities cleaned up.
+
+            # the following query will return a list of rooms which have forward
+            # extremities that are *not* also the create event in the room - ie
+            # those that are not being created currently.
+
+            sql = """
+                SELECT DISTINCT efe.room_id
+                FROM event_forward_extremities efe
+                LEFT JOIN current_state_events cse ON
+                    cse.event_id = efe.event_id
+                    AND cse.type = 'm.room.create'
+                    AND cse.state_key = ''
+                WHERE
+                    cse.event_id IS NULL
+                    AND efe.room_id > ? AND efe.room_id <= ?
+            """
+
+            txn.execute(sql, (last_room_id, room_ids[-1]))
+
+            # build a set of those rooms within `to_delete` that do not appear in
+            # the above, leaving us with the rooms in `to_delete` that *are* being
+            # created.
+            creating_rooms = to_delete.difference(row[0] for row in txn)
+            logger.info("skipping rooms which are being created: %s", creating_rooms)
+
+            # now remove the rooms being created from the list of those to delete.
+            #
+            # (we could have just taken the intersection of `to_delete` with the result
+            # of the sql query, but it's useful to be able to log `creating_rooms`; and
+            # having done so, it's quicker to remove the (few) creating rooms from
+            # `to_delete` than it is to form the intersection with the (larger) list of
+            # not-creating-rooms)
+
+            to_delete -= creating_rooms
 
-            left_rooms = set(room_ids) - joined_room_ids
+            ###########################################################################
+            #
+            # now clear the state for the rooms
 
-            logger.info("Deleting current state left rooms: %r", left_rooms)
+            logger.info("Deleting current state left rooms: %r", to_delete)
 
             # First we get all users that we still think were joined to the
             # room. This is so that we can mark those device lists as
@@ -391,7 +436,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
                 txn,
                 table="current_state_events",
                 column="room_id",
-                iterable=left_rooms,
+                iterable=to_delete,
                 keyvalues={"type": EventTypes.Member, "membership": Membership.JOIN},
                 retcols=("state_key",),
             )
@@ -403,7 +448,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
                 txn,
                 table="current_state_events",
                 column="room_id",
-                iterable=left_rooms,
+                iterable=to_delete,
                 keyvalues={},
             )
 
@@ -411,7 +456,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
                 txn,
                 table="event_forward_extremities",
                 column="room_id",
-                iterable=left_rooms,
+                iterable=to_delete,
                 keyvalues={},
             )