summary refs log tree commit diff
diff options
context:
space:
mode:
authorreivilibre <oliverw@matrix.org>2022-02-02 11:37:18 +0000
committerGitHub <noreply@github.com>2022-02-02 11:37:18 +0000
commitaf795173bec447da44b65b739d05d0083b02229d (patch)
tree4b423cfa066215aff63aa36048da9d1ba8024d1c
parentExpose the registered device ID from the `register_appservice_user` test help... (diff)
downloadsynapse-af795173bec447da44b65b739d05d0083b02229d.tar.xz
Add a background database update to purge account data for deactivated users. (#11655)
-rw-r--r--changelog.d/11655.feature1
-rwxr-xr-xscripts/synapse_port_db4
-rw-r--r--synapse/storage/databases/main/account_data.py164
-rw-r--r--synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql20
-rw-r--r--tests/handlers/test_deactivate_account.py106
5 files changed, 240 insertions, 55 deletions
diff --git a/changelog.d/11655.feature b/changelog.d/11655.feature
new file mode 100644
index 0000000000..dc426fb658
--- /dev/null
+++ b/changelog.d/11655.feature
@@ -0,0 +1 @@
+Remove account data (including client config, push rules and ignored users) upon user deactivation.
\ No newline at end of file
diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db
index 640ff15277..70ee4e5c7f 100755
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@@ -36,6 +36,8 @@ from synapse.logging.context import (
     run_in_background,
 )
 from synapse.storage.database import DatabasePool, make_conn
+from synapse.storage.databases.main import PushRuleStore
+from synapse.storage.databases.main.account_data import AccountDataWorkerStore
 from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateStore
 from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore
 from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore
@@ -180,6 +182,8 @@ class Store(
     UserDirectoryBackgroundUpdateStore,
     EndToEndKeyBackgroundStore,
     StatsStore,
+    AccountDataWorkerStore,
+    PushRuleStore,
     PusherWorkerStore,
     PresenceBackgroundUpdateStore,
     GroupServerWorkerStore,
diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 5bfa408f74..52146aacc8 100644
--- a/synapse/storage/databases/main/account_data.py
+++ b/synapse/storage/databases/main/account_data.py
@@ -106,6 +106,11 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
             "AccountDataAndTagsChangeCache", account_max
         )
 
+        self.db_pool.updates.register_background_update_handler(
+            "delete_account_data_for_deactivated_users",
+            self._delete_account_data_for_deactivated_users,
+        )
+
     def get_max_account_data_stream_id(self) -> int:
         """Get the current max stream ID for account data stream
 
@@ -549,72 +554,121 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore)
 
     async def purge_account_data_for_user(self, user_id: str) -> None:
         """
-        Removes the account data for a user.
+        Removes ALL the account data for a user.
+        Intended to be used upon user deactivation.
 
-        This is intended to be used upon user deactivation and also removes any
-        derived information from account data (e.g. push rules and ignored users).
+        Also purges the user from the ignored_users cache table
+        and the push_rules cache tables.
+        """
 
-        Args:
-            user_id: The user ID to remove data for.
+        await self.db_pool.runInteraction(
+            "purge_account_data_for_user_txn",
+            self._purge_account_data_for_user_txn,
+            user_id,
+        )
+
+    def _purge_account_data_for_user_txn(
+        self, txn: LoggingTransaction, user_id: str
+    ) -> None:
         """
+        See `purge_account_data_for_user`.
+        """
+        # Purge from the primary account_data tables.
+        self.db_pool.simple_delete_txn(
+            txn, table="account_data", keyvalues={"user_id": user_id}
+        )
 
-        def purge_account_data_for_user_txn(txn: LoggingTransaction) -> None:
-            # Purge from the primary account_data tables.
-            self.db_pool.simple_delete_txn(
-                txn, table="account_data", keyvalues={"user_id": user_id}
-            )
+        self.db_pool.simple_delete_txn(
+            txn, table="room_account_data", keyvalues={"user_id": user_id}
+        )
 
-            self.db_pool.simple_delete_txn(
-                txn, table="room_account_data", keyvalues={"user_id": user_id}
-            )
+        # Purge from ignored_users where this user is the ignorer.
+        # N.B. We don't purge where this user is the ignoree, because that
+        #      interferes with other users' account data.
+        #      It's also not this user's data to delete!
+        self.db_pool.simple_delete_txn(
+            txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
+        )
 
-            # Purge from ignored_users where this user is the ignorer.
-            # N.B. We don't purge where this user is the ignoree, because that
-            #      interferes with other users' account data.
-            #      It's also not this user's data to delete!
-            self.db_pool.simple_delete_txn(
-                txn, table="ignored_users", keyvalues={"ignorer_user_id": user_id}
-            )
+        # Remove the push rules
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules", keyvalues={"user_name": user_id}
+        )
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules_enable", keyvalues={"user_name": user_id}
+        )
+        self.db_pool.simple_delete_txn(
+            txn, table="push_rules_stream", keyvalues={"user_id": user_id}
+        )
 
-            # Remove the push rules
-            self.db_pool.simple_delete_txn(
-                txn, table="push_rules", keyvalues={"user_name": user_id}
-            )
-            self.db_pool.simple_delete_txn(
-                txn, table="push_rules_enable", keyvalues={"user_name": user_id}
-            )
-            self.db_pool.simple_delete_txn(
-                txn, table="push_rules_stream", keyvalues={"user_id": user_id}
-            )
+        # Invalidate caches as appropriate
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_room_and_type, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_user, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_global_account_data_by_type_for_user, (user_id,)
+        )
+        self._invalidate_cache_and_stream(
+            txn, self.get_account_data_for_room, (user_id,)
+        )
+        self._invalidate_cache_and_stream(txn, self.get_push_rules_for_user, (user_id,))
+        self._invalidate_cache_and_stream(
+            txn, self.get_push_rules_enabled_for_user, (user_id,)
+        )
+        # This user might be contained in the ignored_by cache for other users,
+        # so we have to invalidate it all.
+        self._invalidate_all_cache_and_stream(txn, self.ignored_by)
 
-            # Invalidate caches as appropriate
-            self._invalidate_cache_and_stream(
-                txn, self.get_account_data_for_room_and_type, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_account_data_for_user, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_global_account_data_by_type_for_user, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_account_data_for_room, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_push_rules_for_user, (user_id,)
-            )
-            self._invalidate_cache_and_stream(
-                txn, self.get_push_rules_enabled_for_user, (user_id,)
-            )
-            # This user might be contained in the ignored_by cache for other users,
-            # so we have to invalidate it all.
-            self._invalidate_all_cache_and_stream(txn, self.ignored_by)
+    async def _delete_account_data_for_deactivated_users(
+        self, progress: dict, batch_size: int
+    ) -> int:
+        """
+        Retroactively purges account data for users that have already been deactivated.
+        Gets run as a background update caused by a schema delta.
+        """
 
-        await self.db_pool.runInteraction(
-            "purge_account_data_for_user_txn",
-            purge_account_data_for_user_txn,
+        last_user: str = progress.get("last_user", "")
+
+        def _delete_account_data_for_deactivated_users_txn(
+            txn: LoggingTransaction,
+        ) -> int:
+            sql = """
+                SELECT name FROM users
+                WHERE deactivated = ? and name > ?
+                ORDER BY name ASC
+                LIMIT ?
+            """
+
+            txn.execute(sql, (1, last_user, batch_size))
+            users = [row[0] for row in txn]
+
+            for user in users:
+                self._purge_account_data_for_user_txn(txn, user_id=user)
+
+            if users:
+                self.db_pool.updates._background_update_progress_txn(
+                    txn,
+                    "delete_account_data_for_deactivated_users",
+                    {"last_user": users[-1]},
+                )
+
+            return len(users)
+
+        number_deleted = await self.db_pool.runInteraction(
+            "_delete_account_data_for_deactivated_users",
+            _delete_account_data_for_deactivated_users_txn,
         )
 
+        if number_deleted < batch_size:
+            await self.db_pool.updates._end_background_update(
+                "delete_account_data_for_deactivated_users"
+            )
+
+        return number_deleted
+
 
 class AccountDataStore(AccountDataWorkerStore):
     pass
diff --git a/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql b/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql
new file mode 100644
index 0000000000..e124933843
--- /dev/null
+++ b/synapse/storage/schema/main/delta/68/03_delete_account_data_for_deactivated_accounts.sql
@@ -0,0 +1,20 @@
+/* Copyright 2021 The Matrix.org Foundation C.I.C
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+-- We want to retroactively delete account data for users that were already
+-- deactivated.
+INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
+  (6803, 'delete_account_data_for_deactivated_users', '{}');
diff --git a/tests/handlers/test_deactivate_account.py b/tests/handlers/test_deactivate_account.py
index 3da597768c..01096a1581 100644
--- a/tests/handlers/test_deactivate_account.py
+++ b/tests/handlers/test_deactivate_account.py
@@ -217,3 +217,109 @@ class DeactivateAccountTestCase(HomeserverTestCase):
         self.assertEqual(
             self.get_success(self._store.ignored_by("@sheltie:test")), set()
         )
+
+    def _rerun_retroactive_account_data_deletion_update(self) -> None:
+        # Reset the 'all done' flag
+        self._store.db_pool.updates._all_done = False
+
+        self.get_success(
+            self._store.db_pool.simple_insert(
+                "background_updates",
+                {
+                    "update_name": "delete_account_data_for_deactivated_users",
+                    "progress_json": "{}",
+                },
+            )
+        )
+
+        self.wait_for_background_updates()
+
+    def test_account_data_deleted_retroactively_by_background_update_if_deactivated(
+        self,
+    ) -> None:
+        """
+        Tests that a user, who deactivated their account before account data was
+        deleted automatically upon deactivation, has their account data retroactively
+        scrubbed by the background update.
+        """
+
+        # Request the deactivation of our account
+        self._deactivate_my_account()
+
+        # Add some account data
+        # (we do this after the deactivation so that the act of deactivating doesn't
+        # clear it out. This emulates a user that was deactivated before this was cleared
+        # upon deactivation.)
+        self.get_success(
+            self._store.add_account_data_for_user(
+                self.user,
+                AccountDataTypes.DIRECT,
+                {"@someone:remote": ["!somewhere:remote"]},
+            )
+        )
+
+        # Check that the account data is there.
+        self.assertIsNotNone(
+            self.get_success(
+                self._store.get_global_account_data_by_type_for_user(
+                    self.user,
+                    AccountDataTypes.DIRECT,
+                )
+            ),
+        )
+
+        # Re-run the retroactive deletion update
+        self._rerun_retroactive_account_data_deletion_update()
+
+        # Check that the account data was cleared.
+        self.assertIsNone(
+            self.get_success(
+                self._store.get_global_account_data_by_type_for_user(
+                    self.user,
+                    AccountDataTypes.DIRECT,
+                )
+            ),
+        )
+
+    def test_account_data_preserved_by_background_update_if_not_deactivated(
+        self,
+    ) -> None:
+        """
+        Tests that the background update does not scrub account data for users that have
+        not been deactivated.
+        """
+
+        # Add some account data
+        # (we do this after the deactivation so that the act of deactivating doesn't
+        # clear it out. This emulates a user that was deactivated before this was cleared
+        # upon deactivation.)
+        self.get_success(
+            self._store.add_account_data_for_user(
+                self.user,
+                AccountDataTypes.DIRECT,
+                {"@someone:remote": ["!somewhere:remote"]},
+            )
+        )
+
+        # Check that the account data is there.
+        self.assertIsNotNone(
+            self.get_success(
+                self._store.get_global_account_data_by_type_for_user(
+                    self.user,
+                    AccountDataTypes.DIRECT,
+                )
+            ),
+        )
+
+        # Re-run the retroactive deletion update
+        self._rerun_retroactive_account_data_deletion_update()
+
+        # Check that the account data was NOT cleared.
+        self.assertIsNotNone(
+            self.get_success(
+                self._store.get_global_account_data_by_type_for_user(
+                    self.user,
+                    AccountDataTypes.DIRECT,
+                )
+            ),
+        )