From 6d47b7e32589e816eb766446cc1ff19ea73fc7c1 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 22 Nov 2022 14:08:04 -0500 Subject: Add a type hint for `get_device_handler()` and fix incorrect types. (#14055) This was the last untyped handler from the HomeServer object. Since it was being treated as Any (and thus unchecked) it was being used incorrectly in a few places. --- tests/handlers/test_device.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'tests/handlers/test_device.py') diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index b8b465d35b..ce7525e29c 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -19,7 +19,7 @@ from typing import Optional from twisted.test.proto_helpers import MemoryReactor from synapse.api.errors import NotFoundError, SynapseError -from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN +from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler from synapse.server import HomeServer from synapse.util import Clock @@ -32,7 +32,9 @@ user2 = "@theresa:bbb" class DeviceTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: hs = self.setup_test_homeserver("server", federation_http_client=None) - self.handler = hs.get_device_handler() + handler = hs.get_device_handler() + assert isinstance(handler, DeviceHandler) + self.handler = handler self.store = hs.get_datastores().main return hs @@ -61,6 +63,7 @@ class DeviceTestCase(unittest.HomeserverTestCase): self.assertEqual(res, "fco") dev = self.get_success(self.handler.store.get_device("@boris:foo", "fco")) + assert dev is not None self.assertEqual(dev["display_name"], "display name") def test_device_is_preserved_if_exists(self) -> None: @@ -83,6 +86,7 @@ class DeviceTestCase(unittest.HomeserverTestCase): self.assertEqual(res2, "fco") dev = self.get_success(self.handler.store.get_device("@boris:foo", "fco")) + assert dev is not None self.assertEqual(dev["display_name"], "display name") def test_device_id_is_made_up_if_unspecified(self) -> None: @@ -95,6 +99,7 @@ class DeviceTestCase(unittest.HomeserverTestCase): ) dev = self.get_success(self.handler.store.get_device("@theresa:foo", device_id)) + assert dev is not None self.assertEqual(dev["display_name"], "display") def test_get_devices_by_user(self) -> None: @@ -264,7 +269,9 @@ class DeviceTestCase(unittest.HomeserverTestCase): class DehydrationTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: hs = self.setup_test_homeserver("server", federation_http_client=None) - self.handler = hs.get_device_handler() + handler = hs.get_device_handler() + assert isinstance(handler, DeviceHandler) + self.handler = handler self.registration = hs.get_registration_handler() self.auth = hs.get_auth() self.store = hs.get_datastores().main @@ -284,9 +291,9 @@ class DehydrationTestCase(unittest.HomeserverTestCase): ) ) - retrieved_device_id, device_data = self.get_success( - self.handler.get_dehydrated_device(user_id=user_id) - ) + result = self.get_success(self.handler.get_dehydrated_device(user_id=user_id)) + assert result is not None + retrieved_device_id, device_data = result self.assertEqual(retrieved_device_id, stored_dehydrated_device_id) self.assertEqual(device_data, {"device_data": {"foo": "bar"}}) -- cgit 1.5.1 From c7e29ca277cf60bfdc488b93f4321b046fa6b46f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Nov 2022 10:36:41 +0000 Subject: POC delete stale non-e2e devices for users (#14038) This should help reduce the number of devices e.g. simple bots the repeatedly login rack up. We only delete non-e2e devices as they should be safe to delete, whereas if we delete e2e devices for a user we may accidentally break their ability to receive e2e keys for a message. Co-authored-by: Patrick Cloke Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/14038.misc | 1 + synapse/handlers/device.py | 13 +++++- synapse/storage/databases/main/devices.py | 67 ++++++++++++++++++++++++++++++- tests/handlers/test_device.py | 2 +- tests/storage/test_client_ips.py | 4 +- 5 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14038.misc (limited to 'tests/handlers/test_device.py') diff --git a/changelog.d/14038.misc b/changelog.d/14038.misc new file mode 100644 index 0000000000..f9bfc581ad --- /dev/null +++ b/changelog.d/14038.misc @@ -0,0 +1 @@ +Prune user's old devices on login if they have too many. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index b1e55e1b9e..7c4dd8cf5a 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -421,6 +421,9 @@ class DeviceHandler(DeviceWorkerHandler): self._check_device_name_length(initial_device_display_name) + # Prune the user's device list if they already have a lot of devices. + await self._prune_too_many_devices(user_id) + if device_id is not None: new_device = await self.store.store_device( user_id=user_id, @@ -452,6 +455,14 @@ class DeviceHandler(DeviceWorkerHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") + async def _prune_too_many_devices(self, user_id: str) -> None: + """Delete any excess old devices this user may have.""" + device_ids = await self.store.check_too_many_devices_for_user(user_id) + if not device_ids: + return + + await self.delete_devices(user_id, device_ids) + async def _delete_stale_devices(self) -> None: """Background task that deletes devices which haven't been accessed for more than a configured time period. @@ -481,7 +492,7 @@ class DeviceHandler(DeviceWorkerHandler): device_ids = [d for d in device_ids if d != except_device_id] await self.delete_devices(user_id, device_ids) - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: """Delete several devices Args: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 534f7fc04a..1e83c62753 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1533,6 +1533,70 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): return rows + async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str]: + """Check if the user has a lot of devices, and if so return the set of + devices we can prune. + + This does *not* return hidden devices or devices with E2E keys. + """ + + num_devices = await self.db_pool.simple_select_one_onecol( + table="devices", + keyvalues={"user_id": user_id, "hidden": False}, + retcol="COALESCE(COUNT(*), 0)", + desc="count_devices", + ) + + # We let users have up to ten devices without pruning. + if num_devices <= 10: + return () + + # We prune everything older than N days. + max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 + + if num_devices > 50: + # If the user has more than 50 devices, then we chose a last seen + # that ensures we keep at most 50 devices. + sql = """ + SELECT last_seen FROM devices + WHERE + user_id = ? + AND NOT hidden + AND last_seen IS NOT NULL + AND key_json IS NULL + ORDER BY last_seen DESC + LIMIT 1 + OFFSET 50 + """ + + rows = await self.db_pool.execute( + "check_too_many_devices_for_user_last_seen", None, sql, (user_id,) + ) + if rows: + max_last_seen = max(rows[0][0], max_last_seen) + + # Now fetch the devices to delete. + sql = """ + SELECT DISTINCT device_id FROM devices + LEFT JOIN e2e_device_keys_json USING (user_id, device_id) + WHERE + user_id = ? + AND NOT hidden + AND last_seen < ? + AND key_json IS NULL + """ + + def check_too_many_devices_for_user_txn( + txn: LoggingTransaction, + ) -> Collection[str]: + txn.execute(sql, (user_id, max_last_seen)) + return {device_id for device_id, in txn} + + return await self.db_pool.runInteraction( + "check_too_many_devices_for_user", + check_too_many_devices_for_user_txn, + ) + class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): # Because we have write access, this will be a StreamIdGenerator @@ -1591,6 +1655,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): values={}, insertion_values={ "display_name": initial_device_display_name, + "last_seen": self._clock.time_msec(), "hidden": False, }, desc="store_device", @@ -1636,7 +1701,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ) raise StoreError(500, "Problem storing device.") - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: """Deletes several devices. Args: diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index ce7525e29c..a456bffd63 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase): "device_id": "xyz", "display_name": "display 0", "last_seen_ip": None, - "last_seen_ts": None, + "last_seen_ts": 1000000, }, device_map["xyz"], ) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 49ad3c1324..a9af1babed 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -169,6 +169,8 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) ) + last_seen = self.clock.time_msec() + if after_persisting: # Trigger the storage loop self.reactor.advance(10) @@ -189,7 +191,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): "device_id": device_id, "ip": None, "user_agent": None, - "last_seen": None, + "last_seen": last_seen, }, ], ) -- cgit 1.5.1 From c29e2c630624beb0b5557aa0f7ccdcedbe62def1 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 29 Nov 2022 17:48:48 +0000 Subject: Revert "POC delete stale non-e2e devices for users (#14038)" (#14582) --- changelog.d/14582.bugfix | 1 + synapse/handlers/device.py | 13 +----- synapse/storage/databases/main/devices.py | 68 +------------------------------ tests/handlers/test_device.py | 2 +- tests/storage/test_client_ips.py | 4 +- 5 files changed, 5 insertions(+), 83 deletions(-) create mode 100644 changelog.d/14582.bugfix (limited to 'tests/handlers/test_device.py') diff --git a/changelog.d/14582.bugfix b/changelog.d/14582.bugfix new file mode 100644 index 0000000000..caad468e70 --- /dev/null +++ b/changelog.d/14582.bugfix @@ -0,0 +1 @@ +Fix a regression in Synapse 1.73.0rc1 where Synapse's main process would stop responding to HTTP requests when a user with a large number of devices logs in. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 7c4dd8cf5a..b1e55e1b9e 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -421,9 +421,6 @@ class DeviceHandler(DeviceWorkerHandler): self._check_device_name_length(initial_device_display_name) - # Prune the user's device list if they already have a lot of devices. - await self._prune_too_many_devices(user_id) - if device_id is not None: new_device = await self.store.store_device( user_id=user_id, @@ -455,14 +452,6 @@ class DeviceHandler(DeviceWorkerHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") - async def _prune_too_many_devices(self, user_id: str) -> None: - """Delete any excess old devices this user may have.""" - device_ids = await self.store.check_too_many_devices_for_user(user_id) - if not device_ids: - return - - await self.delete_devices(user_id, device_ids) - async def _delete_stale_devices(self) -> None: """Background task that deletes devices which haven't been accessed for more than a configured time period. @@ -492,7 +481,7 @@ class DeviceHandler(DeviceWorkerHandler): device_ids = [d for d in device_ids if d != except_device_id] await self.delete_devices(user_id, device_ids) - async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: """Delete several devices Args: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 0378035cff..534f7fc04a 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1533,71 +1533,6 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): return rows - async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str]: - """Check if the user has a lot of devices, and if so return the set of - devices we can prune. - - This does *not* return hidden devices or devices with E2E keys. - """ - - num_devices = await self.db_pool.simple_select_one_onecol( - table="devices", - keyvalues={"user_id": user_id, "hidden": False}, - retcol="COALESCE(COUNT(*), 0)", - desc="count_devices", - ) - - # We let users have up to ten devices without pruning. - if num_devices <= 10: - return () - - # We prune everything older than N days. - max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 - - if num_devices > 50: - # If the user has more than 50 devices, then we chose a last seen - # that ensures we keep at most 50 devices. - sql = """ - SELECT last_seen FROM devices - LEFT JOIN e2e_device_keys_json USING (user_id, device_id) - WHERE - user_id = ? - AND NOT hidden - AND last_seen IS NOT NULL - AND key_json IS NULL - ORDER BY last_seen DESC - LIMIT 1 - OFFSET 50 - """ - - rows = await self.db_pool.execute( - "check_too_many_devices_for_user_last_seen", None, sql, (user_id,) - ) - if rows: - max_last_seen = max(rows[0][0], max_last_seen) - - # Now fetch the devices to delete. - sql = """ - SELECT DISTINCT device_id FROM devices - LEFT JOIN e2e_device_keys_json USING (user_id, device_id) - WHERE - user_id = ? - AND NOT hidden - AND last_seen < ? - AND key_json IS NULL - """ - - def check_too_many_devices_for_user_txn( - txn: LoggingTransaction, - ) -> Collection[str]: - txn.execute(sql, (user_id, max_last_seen)) - return {device_id for device_id, in txn} - - return await self.db_pool.runInteraction( - "check_too_many_devices_for_user", - check_too_many_devices_for_user_txn, - ) - class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): # Because we have write access, this will be a StreamIdGenerator @@ -1656,7 +1591,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): values={}, insertion_values={ "display_name": initial_device_display_name, - "last_seen": self._clock.time_msec(), "hidden": False, }, desc="store_device", @@ -1702,7 +1636,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ) raise StoreError(500, "Problem storing device.") - async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: """Deletes several devices. Args: diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index a456bffd63..ce7525e29c 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase): "device_id": "xyz", "display_name": "display 0", "last_seen_ip": None, - "last_seen_ts": 1000000, + "last_seen_ts": None, }, device_map["xyz"], ) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index a9af1babed..49ad3c1324 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -169,8 +169,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) ) - last_seen = self.clock.time_msec() - if after_persisting: # Trigger the storage loop self.reactor.advance(10) @@ -191,7 +189,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): "device_id": device_id, "ip": None, "user_agent": None, - "last_seen": last_seen, + "last_seen": None, }, ], ) -- cgit 1.5.1 From c2de2ca63060324cf2f80ddf3289b0fd7a4d861b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Dec 2022 09:37:07 +0000 Subject: Delete stale non-e2e devices for users, take 2 (#14595) This should help reduce the number of devices e.g. simple bots the repeatedly login rack up. We only delete non-e2e devices as they should be safe to delete, whereas if we delete e2e devices for a user we may accidentally break their ability to receive e2e keys for a message. --- changelog.d/14595.misc | 1 + synapse/handlers/device.py | 31 +++++++++++- synapse/storage/databases/main/devices.py | 79 ++++++++++++++++++++++++++++++- tests/handlers/test_device.py | 2 +- tests/storage/test_client_ips.py | 4 +- 5 files changed, 113 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14595.misc (limited to 'tests/handlers/test_device.py') diff --git a/changelog.d/14595.misc b/changelog.d/14595.misc new file mode 100644 index 0000000000..f9bfc581ad --- /dev/null +++ b/changelog.d/14595.misc @@ -0,0 +1 @@ +Prune user's old devices on login if they have too many. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index d4750a32e6..7674c187ef 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -52,6 +52,7 @@ from synapse.util import stringutils from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.cancellation import cancellable +from synapse.util.iterutils import batch_iter from synapse.util.metrics import measure_func from synapse.util.retryutils import NotRetryingDestination @@ -421,6 +422,9 @@ class DeviceHandler(DeviceWorkerHandler): self._check_device_name_length(initial_device_display_name) + # Prune the user's device list if they already have a lot of devices. + await self._prune_too_many_devices(user_id) + if device_id is not None: new_device = await self.store.store_device( user_id=user_id, @@ -452,6 +456,31 @@ class DeviceHandler(DeviceWorkerHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") + async def _prune_too_many_devices(self, user_id: str) -> None: + """Delete any excess old devices this user may have.""" + device_ids = await self.store.check_too_many_devices_for_user(user_id) + if not device_ids: + return + + # We don't want to block and try and delete tonnes of devices at once, + # so we cap the number of devices we delete synchronously. + first_batch, remaining_device_ids = device_ids[:10], device_ids[10:] + await self.delete_devices(user_id, first_batch) + + if not remaining_device_ids: + return + + # Now spawn a background loop that deletes the rest. + async def _prune_too_many_devices_loop() -> None: + for batch in batch_iter(remaining_device_ids, 10): + await self.delete_devices(user_id, batch) + + await self.clock.sleep(1) + + run_as_background_process( + "_prune_too_many_devices_loop", _prune_too_many_devices_loop + ) + async def _delete_stale_devices(self) -> None: """Background task that deletes devices which haven't been accessed for more than a configured time period. @@ -481,7 +510,7 @@ class DeviceHandler(DeviceWorkerHandler): device_ids = [d for d in device_ids if d != except_device_id] await self.delete_devices(user_id, device_ids) - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: """Delete several devices Args: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index a5bb4d404e..08ccd46a2b 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1569,6 +1569,72 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): return rows + async def check_too_many_devices_for_user(self, user_id: str) -> List[str]: + """Check if the user has a lot of devices, and if so return the set of + devices we can prune. + + This does *not* return hidden devices or devices with E2E keys. + """ + + num_devices = await self.db_pool.simple_select_one_onecol( + table="devices", + keyvalues={"user_id": user_id, "hidden": False}, + retcol="COALESCE(COUNT(*), 0)", + desc="count_devices", + ) + + # We let users have up to ten devices without pruning. + if num_devices <= 10: + return [] + + # We prune everything older than N days. + max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 + + if num_devices > 50: + # If the user has more than 50 devices, then we chose a last seen + # that ensures we keep at most 50 devices. + sql = """ + SELECT last_seen FROM devices + LEFT JOIN e2e_device_keys_json USING (user_id, device_id) + WHERE + user_id = ? + AND NOT hidden + AND last_seen IS NOT NULL + AND key_json IS NULL + ORDER BY last_seen DESC + LIMIT 1 + OFFSET 50 + """ + + rows = await self.db_pool.execute( + "check_too_many_devices_for_user_last_seen", None, sql, (user_id,) + ) + if rows: + max_last_seen = max(rows[0][0], max_last_seen) + + # Now fetch the devices to delete. + sql = """ + SELECT DISTINCT device_id FROM devices + LEFT JOIN e2e_device_keys_json USING (user_id, device_id) + WHERE + user_id = ? + AND NOT hidden + AND last_seen < ? + AND key_json IS NULL + ORDER BY last_seen + """ + + def check_too_many_devices_for_user_txn( + txn: LoggingTransaction, + ) -> List[str]: + txn.execute(sql, (user_id, max_last_seen)) + return [device_id for device_id, in txn] + + return await self.db_pool.runInteraction( + "check_too_many_devices_for_user", + check_too_many_devices_for_user_txn, + ) + class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): # Because we have write access, this will be a StreamIdGenerator @@ -1627,6 +1693,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): values={}, insertion_values={ "display_name": initial_device_display_name, + "last_seen": self._clock.time_msec(), "hidden": False, }, desc="store_device", @@ -1672,7 +1739,15 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ) raise StoreError(500, "Problem storing device.") - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + @cached(max_entries=0) + async def delete_device(self, user_id: str, device_id: str) -> None: + raise NotImplementedError() + + # Note: sometimes deleting rows out of `device_inbox` can take a long time, + # so we use a cache so that we deduplicate in flight requests to delete + # devices. + @cachedList(cached_method_name="delete_device", list_name="device_ids") + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict: """Deletes several devices. Args: @@ -1709,6 +1784,8 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) + return {} + async def update_device( self, user_id: str, device_id: str, new_display_name: Optional[str] = None ) -> None: diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index ce7525e29c..a456bffd63 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase): "device_id": "xyz", "display_name": "display 0", "last_seen_ip": None, - "last_seen_ts": None, + "last_seen_ts": 1000000, }, device_map["xyz"], ) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 49ad3c1324..a9af1babed 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -169,6 +169,8 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) ) + last_seen = self.clock.time_msec() + if after_persisting: # Trigger the storage loop self.reactor.advance(10) @@ -189,7 +191,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): "device_id": device_id, "ip": None, "user_agent": None, - "last_seen": None, + "last_seen": last_seen, }, ], ) -- cgit 1.5.1 From 94bc21e69f89ad873ad7a0deb6d9c4ff3cb480ef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 9 Dec 2022 13:31:32 +0000 Subject: Limit the number of devices we delete at once (#14649) --- changelog.d/14649.misc | 1 + synapse/handlers/device.py | 4 +++- synapse/storage/databases/main/devices.py | 11 ++++++++--- tests/handlers/test_device.py | 31 +++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14649.misc (limited to 'tests/handlers/test_device.py') diff --git a/changelog.d/14649.misc b/changelog.d/14649.misc new file mode 100644 index 0000000000..f9bfc581ad --- /dev/null +++ b/changelog.d/14649.misc @@ -0,0 +1 @@ +Prune user's old devices on login if they have too many. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 7674c187ef..c935c7be90 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -458,10 +458,12 @@ class DeviceHandler(DeviceWorkerHandler): async def _prune_too_many_devices(self, user_id: str) -> None: """Delete any excess old devices this user may have.""" - device_ids = await self.store.check_too_many_devices_for_user(user_id) + device_ids = await self.store.check_too_many_devices_for_user(user_id, 100) if not device_ids: return + logger.info("Pruning %d old devices for user %s", len(device_ids), user_id) + # We don't want to block and try and delete tonnes of devices at once, # so we cap the number of devices we delete synchronously. first_batch, remaining_device_ids = device_ids[:10], device_ids[10:] diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 08ccd46a2b..95d4c0622d 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1569,11 +1569,15 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): return rows - async def check_too_many_devices_for_user(self, user_id: str) -> List[str]: + async def check_too_many_devices_for_user( + self, user_id: str, limit: int + ) -> List[str]: """Check if the user has a lot of devices, and if so return the set of devices we can prune. This does *not* return hidden devices or devices with E2E keys. + + Returns at most `limit` number of devices, ordered by last seen. """ num_devices = await self.db_pool.simple_select_one_onecol( @@ -1614,7 +1618,7 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): # Now fetch the devices to delete. sql = """ - SELECT DISTINCT device_id FROM devices + SELECT device_id FROM devices LEFT JOIN e2e_device_keys_json USING (user_id, device_id) WHERE user_id = ? @@ -1622,12 +1626,13 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): AND last_seen < ? AND key_json IS NULL ORDER BY last_seen + LIMIT ? """ def check_too_many_devices_for_user_txn( txn: LoggingTransaction, ) -> List[str]: - txn.execute(sql, (user_id, max_last_seen)) + txn.execute(sql, (user_id, max_last_seen, limit)) return [device_id for device_id, in txn] return await self.db_pool.runInteraction( diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index a456bffd63..e51cac9b33 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -20,6 +20,8 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.errors import NotFoundError, SynapseError from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler +from synapse.rest import admin +from synapse.rest.client import account, login from synapse.server import HomeServer from synapse.util import Clock @@ -30,6 +32,12 @@ user2 = "@theresa:bbb" class DeviceTestCase(unittest.HomeserverTestCase): + servlets = [ + login.register_servlets, + admin.register_servlets, + account.register_servlets, + ] + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: hs = self.setup_test_homeserver("server", federation_http_client=None) handler = hs.get_device_handler() @@ -229,6 +237,29 @@ class DeviceTestCase(unittest.HomeserverTestCase): NotFoundError, ) + def test_login_delete_old_devices(self) -> None: + """Delete old devices if the user already has too many.""" + + user_id = self.register_user("user", "pass") + + # Create a bunch of devices + for _ in range(50): + self.login("user", "pass") + self.reactor.advance(1) + + # Advance the clock for ages (as we only delete old devices) + self.reactor.advance(60 * 60 * 24 * 300) + + # Log in again to start the pruning + self.login("user", "pass") + + # Give the background job time to do its thing + self.reactor.pump([1.0] * 100) + + # We should now only have the most recent device. + devices = self.get_success(self.handler.get_devices_by_user(user_id)) + self.assertEqual(len(devices), 1) + def _record_users(self) -> None: # check this works for both devices which have a recorded client_ip, # and those which don't. -- cgit 1.5.1 From 74b89c27613a34ec9b291ad3066db7ce0adff1db Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 12 Dec 2022 13:55:23 +0000 Subject: Revert the deletion of stale devices due to performance issues. (#14662) --- changelog.d/14595.misc | 1 - changelog.d/14649.misc | 1 - changelog.d/14662.removal | 1 + synapse/handlers/device.py | 33 +----------- synapse/storage/databases/main/devices.py | 84 +------------------------------ tests/handlers/test_device.py | 33 +----------- tests/storage/test_client_ips.py | 4 +- 7 files changed, 5 insertions(+), 152 deletions(-) delete mode 100644 changelog.d/14595.misc delete mode 100644 changelog.d/14649.misc create mode 100644 changelog.d/14662.removal (limited to 'tests/handlers/test_device.py') diff --git a/changelog.d/14595.misc b/changelog.d/14595.misc deleted file mode 100644 index f9bfc581ad..0000000000 --- a/changelog.d/14595.misc +++ /dev/null @@ -1 +0,0 @@ -Prune user's old devices on login if they have too many. diff --git a/changelog.d/14649.misc b/changelog.d/14649.misc deleted file mode 100644 index f9bfc581ad..0000000000 --- a/changelog.d/14649.misc +++ /dev/null @@ -1 +0,0 @@ -Prune user's old devices on login if they have too many. diff --git a/changelog.d/14662.removal b/changelog.d/14662.removal new file mode 100644 index 0000000000..19a387bbb4 --- /dev/null +++ b/changelog.d/14662.removal @@ -0,0 +1 @@ +(remove from changelog: unreleased) Revert the deletion of stale devices due to performance issues. \ No newline at end of file diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c935c7be90..d4750a32e6 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -52,7 +52,6 @@ from synapse.util import stringutils from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.cancellation import cancellable -from synapse.util.iterutils import batch_iter from synapse.util.metrics import measure_func from synapse.util.retryutils import NotRetryingDestination @@ -422,9 +421,6 @@ class DeviceHandler(DeviceWorkerHandler): self._check_device_name_length(initial_device_display_name) - # Prune the user's device list if they already have a lot of devices. - await self._prune_too_many_devices(user_id) - if device_id is not None: new_device = await self.store.store_device( user_id=user_id, @@ -456,33 +452,6 @@ class DeviceHandler(DeviceWorkerHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") - async def _prune_too_many_devices(self, user_id: str) -> None: - """Delete any excess old devices this user may have.""" - device_ids = await self.store.check_too_many_devices_for_user(user_id, 100) - if not device_ids: - return - - logger.info("Pruning %d old devices for user %s", len(device_ids), user_id) - - # We don't want to block and try and delete tonnes of devices at once, - # so we cap the number of devices we delete synchronously. - first_batch, remaining_device_ids = device_ids[:10], device_ids[10:] - await self.delete_devices(user_id, first_batch) - - if not remaining_device_ids: - return - - # Now spawn a background loop that deletes the rest. - async def _prune_too_many_devices_loop() -> None: - for batch in batch_iter(remaining_device_ids, 10): - await self.delete_devices(user_id, batch) - - await self.clock.sleep(1) - - run_as_background_process( - "_prune_too_many_devices_loop", _prune_too_many_devices_loop - ) - async def _delete_stale_devices(self) -> None: """Background task that deletes devices which haven't been accessed for more than a configured time period. @@ -512,7 +481,7 @@ class DeviceHandler(DeviceWorkerHandler): device_ids = [d for d in device_ids if d != except_device_id] await self.delete_devices(user_id, device_ids) - async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: """Delete several devices Args: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 95d4c0622d..a5bb4d404e 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1569,77 +1569,6 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): return rows - async def check_too_many_devices_for_user( - self, user_id: str, limit: int - ) -> List[str]: - """Check if the user has a lot of devices, and if so return the set of - devices we can prune. - - This does *not* return hidden devices or devices with E2E keys. - - Returns at most `limit` number of devices, ordered by last seen. - """ - - num_devices = await self.db_pool.simple_select_one_onecol( - table="devices", - keyvalues={"user_id": user_id, "hidden": False}, - retcol="COALESCE(COUNT(*), 0)", - desc="count_devices", - ) - - # We let users have up to ten devices without pruning. - if num_devices <= 10: - return [] - - # We prune everything older than N days. - max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 - - if num_devices > 50: - # If the user has more than 50 devices, then we chose a last seen - # that ensures we keep at most 50 devices. - sql = """ - SELECT last_seen FROM devices - LEFT JOIN e2e_device_keys_json USING (user_id, device_id) - WHERE - user_id = ? - AND NOT hidden - AND last_seen IS NOT NULL - AND key_json IS NULL - ORDER BY last_seen DESC - LIMIT 1 - OFFSET 50 - """ - - rows = await self.db_pool.execute( - "check_too_many_devices_for_user_last_seen", None, sql, (user_id,) - ) - if rows: - max_last_seen = max(rows[0][0], max_last_seen) - - # Now fetch the devices to delete. - sql = """ - SELECT device_id FROM devices - LEFT JOIN e2e_device_keys_json USING (user_id, device_id) - WHERE - user_id = ? - AND NOT hidden - AND last_seen < ? - AND key_json IS NULL - ORDER BY last_seen - LIMIT ? - """ - - def check_too_many_devices_for_user_txn( - txn: LoggingTransaction, - ) -> List[str]: - txn.execute(sql, (user_id, max_last_seen, limit)) - return [device_id for device_id, in txn] - - return await self.db_pool.runInteraction( - "check_too_many_devices_for_user", - check_too_many_devices_for_user_txn, - ) - class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): # Because we have write access, this will be a StreamIdGenerator @@ -1698,7 +1627,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): values={}, insertion_values={ "display_name": initial_device_display_name, - "last_seen": self._clock.time_msec(), "hidden": False, }, desc="store_device", @@ -1744,15 +1672,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ) raise StoreError(500, "Problem storing device.") - @cached(max_entries=0) - async def delete_device(self, user_id: str, device_id: str) -> None: - raise NotImplementedError() - - # Note: sometimes deleting rows out of `device_inbox` can take a long time, - # so we use a cache so that we deduplicate in flight requests to delete - # devices. - @cachedList(cached_method_name="delete_device", list_name="device_ids") - async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict: + async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: """Deletes several devices. Args: @@ -1789,8 +1709,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) - return {} - async def update_device( self, user_id: str, device_id: str, new_display_name: Optional[str] = None ) -> None: diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index e51cac9b33..ce7525e29c 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -20,8 +20,6 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.errors import NotFoundError, SynapseError from synapse.handlers.device import MAX_DEVICE_DISPLAY_NAME_LEN, DeviceHandler -from synapse.rest import admin -from synapse.rest.client import account, login from synapse.server import HomeServer from synapse.util import Clock @@ -32,12 +30,6 @@ user2 = "@theresa:bbb" class DeviceTestCase(unittest.HomeserverTestCase): - servlets = [ - login.register_servlets, - admin.register_servlets, - account.register_servlets, - ] - def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: hs = self.setup_test_homeserver("server", federation_http_client=None) handler = hs.get_device_handler() @@ -123,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase): "device_id": "xyz", "display_name": "display 0", "last_seen_ip": None, - "last_seen_ts": 1000000, + "last_seen_ts": None, }, device_map["xyz"], ) @@ -237,29 +229,6 @@ class DeviceTestCase(unittest.HomeserverTestCase): NotFoundError, ) - def test_login_delete_old_devices(self) -> None: - """Delete old devices if the user already has too many.""" - - user_id = self.register_user("user", "pass") - - # Create a bunch of devices - for _ in range(50): - self.login("user", "pass") - self.reactor.advance(1) - - # Advance the clock for ages (as we only delete old devices) - self.reactor.advance(60 * 60 * 24 * 300) - - # Log in again to start the pruning - self.login("user", "pass") - - # Give the background job time to do its thing - self.reactor.pump([1.0] * 100) - - # We should now only have the most recent device. - devices = self.get_success(self.handler.get_devices_by_user(user_id)) - self.assertEqual(len(devices), 1) - def _record_users(self) -> None: # check this works for both devices which have a recorded client_ip, # and those which don't. diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 81e4e596e4..7f7f4ef892 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -170,8 +170,6 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) ) - last_seen = self.clock.time_msec() - if after_persisting: # Trigger the storage loop self.reactor.advance(10) @@ -192,7 +190,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): "device_id": device_id, "ip": None, "user_agent": None, - "last_seen": last_seen, + "last_seen": None, }, ], ) -- cgit 1.5.1