summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorreivilibre <oliverw@matrix.org>2022-02-02 11:37:18 +0000
committerGitHub <noreply@github.com>2022-02-02 11:37:18 +0000
commitaf795173bec447da44b65b739d05d0083b02229d (patch)
tree4b423cfa066215aff63aa36048da9d1ba8024d1c /synapse
parentExpose the registered device ID from the `register_appservice_user` test help... (diff)
downloadsynapse-af795173bec447da44b65b739d05d0083b02229d.tar.xz
Add a background database update to purge account data for deactivated users. (#11655)
Diffstat (limited to 'synapse')
-rw-r--r--synapse/storage/databases/main/account_data.py164
-rw-r--r--synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql20
2 files changed, 129 insertions, 55 deletions
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 5bfa408f74..52146aacc8 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -106,6 +106,11 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             "AccountDataAndTagsChangeCache", account_max
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            "delete_account_data_for_deactivated_users",
+            self._delete_account_data_for_deactivated_users,
+        )
+
     def get_max_account_data_stream_id(self) -> int:
         """Get the current max stream ID for account data stream
 
@@ -549,72 +554,121 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
 
     async def purge_account_data_for_user(self, user_id: str) -> None:
         """
-        Removes the account data for a user.
+        Removes ALL the account data for a user.
+        Intended to be used upon user deactivation.
 
-        This is intended to be used upon user deactivation and also removes any
-        derived information from account data (e.g. push rules and ignored users).
+        Also purges the user from the ignored_users cache table
+        and the push_rules cache tables.
+        """
 
-        Args:
-            user_id: The user ID to remove data for.
+        await self.db_pool.runInteraction(
+            "purge_account_data_for_user_txn",
+            self._purge_account_data_for_user_txn,
+            user_id,
+        )
+
+    def _purge_account_data_for_user_txn(
+        self, txn: LoggingTransaction, user_id: str
+    ) -> None:
         """
+        See `purge_account_data_for_user`.
+        """
+        # Purge from the primary account_data tables.
+        self.db_pool.simple_delete_txn(
+            txn, table="account_data", keyvalues={"user_id": user_id}
+        )
 
-        def purge_account_data_for_user_txn(txn: LoggingTransaction) -> None:
-            # Purge from the primary account_data tables.
-            self.db_pool.simple_delete_txn(
-                txn, table="account_data", keyvalues={"user_id": user_id}
-            )
+        self.db_pool.simple_delete_txn(
+            txn, table="room_account_data", keyvalues={"user_id": user_id}
+        )
 
-            self.db_pool.simple_delete_txn(
-                txn, table="room_account_data", keyvalues={"user_id": user_id}
-            )
+        # Purge from ignored_users where this user is the ignorer.
+        # N.B. We don't purge where this user is the ignoree, because that
+        #      interferes with other users' account data.
+        #      It's also not this user's data to delete!
+        self.db_pool.simple_delete_txn(
+            txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
+        )
 
-            # Purge from ignored_users where this user is the ignorer.
-            # N.B. We don't purge where this user is the ignoree, because that
-            #      interferes with other users' account data.
-            #      It's also not this user's data to delete!
-            self.db_pool.simple_delete_txn(
-                txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
-            )
+        # Remove the push rules
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules", keyvalues={"user_name": user_id}
+        )
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules_enable", keyvalues={"user_name": user_id}
+        )
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules_stream", keyvalues={"user_id": user_id}
+        )
 
-            # Remove the push rules
-            self.db_pool.simple_delete_txn(
-                txn, table="push_rules", keyvalues={"user_name": user_id}
-            )
-            self.db_pool.simple_delete_txn(
-                txn, table="push_rules_enable", keyvalues={"user_name": user_id}
-            )
-            self.db_pool.simple_delete_txn(
-                txn, table="push_rules_stream", keyvalues={"user_id": user_id}
-            )
+        # Invalidate caches as appropriate
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_room_and_type, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_user, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_global_account_data_by_type_for_user, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_room, (user_id,)
+        )
+        self._invalidate_cache_and_stream(txn, self.get_push_rules_for_user, (user_id,))
+        self._invalidate_cache_and_stream(
+            txn, self.get_push_rules_enabled_for_user, (user_id,)
+        )
+        # This user might be contained in the ignored_by cache for other users,
+        # so we have to invalidate it all.
+        self._invalidate_all_cache_and_stream(txn, self.ignored_by)
 
-            # Invalidate caches as appropriate
-            self._invalidate_cache_and_stream(
-                txn, self.get_account_data_for_room_and_type, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_account_data_for_user, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_global_account_data_by_type_for_user, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_account_data_for_room, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_push_rules_for_user, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_push_rules_enabled_for_user, (user_id,)
-            )
-            # This user might be contained in the ignored_by cache for other users,
-            # so we have to invalidate it all.
-            self._invalidate_all_cache_and_stream(txn, self.ignored_by)
+    async def _delete_account_data_for_deactivated_users(
+        self, progress: dict, batch_size: int
+    ) -> int:
+        """
+        Retroactively purges account data for users that have already been deactivated.
+        Gets run as a background update caused by a schema delta.
+        """
 
-        await self.db_pool.runInteraction(
-            "purge_account_data_for_user_txn",
-            purge_account_data_for_user_txn,
+        last_user: str = progress.get("last_user", "")
+
+        def _delete_account_data_for_deactivated_users_txn(
+            txn: LoggingTransaction,
+        ) -> int:
+            sql = """
+                SELECT name FROM users
+                WHERE deactivated = ? and name > ?
+                ORDER BY name ASC
+                LIMIT ?
+            """
+
+            txn.execute(sql, (1, last_user, batch_size))
+            users = [row[0] for row in txn]
+
+            for user in users:
+                self._purge_account_data_for_user_txn(txn, user_id=user)
+
+            if users:
+                self.db_pool.updates._background_update_progress_txn(
+                    txn,
+                    "delete_account_data_for_deactivated_users",
+                    {"last_user": users[-1]},
+                )
+
+            return len(users)
+
+        number_deleted = await self.db_pool.runInteraction(
+            "_delete_account_data_for_deactivated_users",
+            _delete_account_data_for_deactivated_users_txn,
         )
 
+        if number_deleted < batch_size:
+            await self.db_pool.updates._end_background_update(
+                "delete_account_data_for_deactivated_users"
+            )
+
+        return number_deleted
+
 
 class AccountDataStore(AccountDataWorkerStore):
     pass
diff --git a/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql b/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql
new file mode 100644
index 0000000000..e124933843
--- /dev/null
+++ b/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql
@@ -0,0 +1,20 @@
+/* Copyright 2021 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- We want to retroactively delete account data for users that were already
+-- deactivated.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (6803, 'delete_account_data_for_deactivated_users', '{}');