From a19d01c3d95f5dbd3a4bb181cb70dacd44135a8b Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 9 Nov 2021 08:10:58 -0500 Subject: Support filtering by relations per MSC3440 (#11236) Adds experimental support for `relation_types` and `relation_senders` fields for filters. --- tests/storage/test_stream.py | 207 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 tests/storage/test_stream.py (limited to 'tests/storage') diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py new file mode 100644 index 0000000000..ce782c7e1d --- /dev/null +++ b/tests/storage/test_stream.py @@ -0,0 +1,207 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List + +from synapse.api.constants import EventTypes, RelationTypes +from synapse.api.filtering import Filter +from synapse.events import EventBase +from synapse.rest import admin +from synapse.rest.client import login, room +from synapse.types import JsonDict + +from tests.unittest import HomeserverTestCase + + +class PaginationTestCase(HomeserverTestCase): + """ + Test the pre-filtering done in the pagination code. + + This is similar to some of the tests in tests.rest.client.test_rooms but here + we ensure that the filtering done in the database is applied successfully. + """ + + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + ] + + def default_config(self): + config = super().default_config() + config["experimental_features"] = {"msc3440_enabled": True} + return config + + def prepare(self, reactor, clock, homeserver): + self.user_id = self.register_user("test", "test") + self.tok = self.login("test", "test") + self.room_id = self.helper.create_room_as(self.user_id, tok=self.tok) + + self.second_user_id = self.register_user("second", "test") + self.second_tok = self.login("second", "test") + self.helper.join( + room=self.room_id, user=self.second_user_id, tok=self.second_tok + ) + + self.third_user_id = self.register_user("third", "test") + self.third_tok = self.login("third", "test") + self.helper.join(room=self.room_id, user=self.third_user_id, tok=self.third_tok) + + # An initial event with a relation from second user. + res = self.helper.send_event( + room_id=self.room_id, + type=EventTypes.Message, + content={"msgtype": "m.text", "body": "Message 1"}, + tok=self.tok, + ) + self.event_id_1 = res["event_id"] + self.helper.send_event( + room_id=self.room_id, + type="m.reaction", + content={ + "m.relates_to": { + "rel_type": RelationTypes.ANNOTATION, + "event_id": self.event_id_1, + "key": "👍", + } + }, + tok=self.second_tok, + ) + + # Another event with a relation from third user. + res = self.helper.send_event( + room_id=self.room_id, + type=EventTypes.Message, + content={"msgtype": "m.text", "body": "Message 2"}, + tok=self.tok, + ) + self.event_id_2 = res["event_id"] + self.helper.send_event( + room_id=self.room_id, + type="m.reaction", + content={ + "m.relates_to": { + "rel_type": RelationTypes.REFERENCE, + "event_id": self.event_id_2, + } + }, + tok=self.third_tok, + ) + + # An event with no relations. + self.helper.send_event( + room_id=self.room_id, + type=EventTypes.Message, + content={"msgtype": "m.text", "body": "No relations"}, + tok=self.tok, + ) + + def _filter_messages(self, filter: JsonDict) -> List[EventBase]: + """Make a request to /messages with a filter, returns the chunk of events.""" + + from_token = self.get_success( + self.hs.get_event_sources().get_current_token_for_pagination() + ) + + events, next_key = self.get_success( + self.hs.get_datastore().paginate_room_events( + room_id=self.room_id, + from_key=from_token.room_key, + to_key=None, + direction="b", + limit=10, + event_filter=Filter(self.hs, filter), + ) + ) + + return events + + def test_filter_relation_senders(self): + # Messages which second user reacted to. + filter = {"io.element.relation_senders": [self.second_user_id]} + chunk = self._filter_messages(filter) + self.assertEqual(len(chunk), 1, chunk) + self.assertEqual(chunk[0].event_id, self.event_id_1) + + # Messages which third user reacted to. + filter = {"io.element.relation_senders": [self.third_user_id]} + chunk = self._filter_messages(filter) + self.assertEqual(len(chunk), 1, chunk) + self.assertEqual(chunk[0].event_id, self.event_id_2) + + # Messages which either user reacted to. + filter = { + "io.element.relation_senders": [self.second_user_id, self.third_user_id] + } + chunk = self._filter_messages(filter) + self.assertEqual(len(chunk), 2, chunk) + self.assertCountEqual( + [c.event_id for c in chunk], [self.event_id_1, self.event_id_2] + ) + + def test_filter_relation_type(self): + # Messages which have annotations. + filter = {"io.element.relation_types": [RelationTypes.ANNOTATION]} + chunk = self._filter_messages(filter) + self.assertEqual(len(chunk), 1, chunk) + self.assertEqual(chunk[0].event_id, self.event_id_1) + + # Messages which have references. + filter = {"io.element.relation_types": [RelationTypes.REFERENCE]} + chunk = self._filter_messages(filter) + self.assertEqual(len(chunk), 1, chunk) + self.assertEqual(chunk[0].event_id, self.event_id_2) + + # Messages which have either annotations or references. + filter = { + "io.element.relation_types": [ + RelationTypes.ANNOTATION, + RelationTypes.REFERENCE, + ] + } + chunk = self._filter_messages(filter) + self.assertEqual(len(chunk), 2, chunk) + self.assertCountEqual( + [c.event_id for c in chunk], [self.event_id_1, self.event_id_2] + ) + + def test_filter_relation_senders_and_type(self): + # Messages which second user reacted to. + filter = { + "io.element.relation_senders": [self.second_user_id], + "io.element.relation_types": [RelationTypes.ANNOTATION], + } + chunk = self._filter_messages(filter) + self.assertEqual(len(chunk), 1, chunk) + self.assertEqual(chunk[0].event_id, self.event_id_1) + + def test_duplicate_relation(self): + """An event should only be returned once if there are multiple relations to it.""" + self.helper.send_event( + room_id=self.room_id, + type="m.reaction", + content={ + "m.relates_to": { + "rel_type": RelationTypes.ANNOTATION, + "event_id": self.event_id_1, + "key": "A", + } + }, + tok=self.second_tok, + ) + + filter = {"io.element.relation_senders": [self.second_user_id]} + chunk = self._filter_messages(filter) + self.assertEqual(len(chunk), 1, chunk) + self.assertEqual(chunk[0].event_id, self.event_id_1) -- cgit 1.5.1 From 0bcae8ad56a64da72f278b4ec425d89c068b5df0 Mon Sep 17 00:00:00 2001 From: Shay Date: Fri, 12 Nov 2021 10:38:24 -0800 Subject: Change display names/avatar URLs to None if they contain null bytes before storing in DB (#11230) * change display names/avatar URLS to None if they contain null bytes * add changelog * add POC test, requested changes * add a saner test and remove old one * update test to verify that display name has been changed to None * make test less fragile --- changelog.d/11230.bugfix | 2 ++ synapse/storage/databases/main/events.py | 10 ++++--- tests/storage/test_roommember.py | 48 ++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 changelog.d/11230.bugfix (limited to 'tests/storage') diff --git a/changelog.d/11230.bugfix b/changelog.d/11230.bugfix new file mode 100644 index 0000000000..b2d6d4d024 --- /dev/null +++ b/changelog.d/11230.bugfix @@ -0,0 +1,2 @@ +Fix a long-standing bug wherein display names or avatar URLs containing null bytes cause an internal server error +when stored in the DB. \ No newline at end of file diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 596275c23c..120e4807d1 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1641,8 +1641,8 @@ class PersistEventsStore: def _store_room_members_txn(self, txn, events, backfilled): """Store a room member in the database.""" - def str_or_none(val: Any) -> Optional[str]: - return val if isinstance(val, str) else None + def non_null_str_or_none(val: Any) -> Optional[str]: + return val if isinstance(val, str) and "\u0000" not in val else None self.db_pool.simple_insert_many_txn( txn, @@ -1654,8 +1654,10 @@ class PersistEventsStore: "sender": event.user_id, "room_id": event.room_id, "membership": event.membership, - "display_name": str_or_none(event.content.get("displayname")), - "avatar_url": str_or_none(event.content.get("avatar_url")), + "display_name": non_null_str_or_none( + event.content.get("displayname") + ), + "avatar_url": non_null_str_or_none(event.content.get("avatar_url")), } for event in events ], diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 2873e22ccf..fccab733c0 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -161,6 +161,54 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): ) self.assertEqual(users.keys(), {self.u_alice, self.u_bob}) + def test__null_byte_in_display_name_properly_handled(self): + room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + + res = self.get_success( + self.store.db_pool.simple_select_list( + "room_memberships", + {"user_id": "@alice:test"}, + ["display_name", "event_id"], + ) + ) + # Check that we only got one result back + self.assertEqual(len(res), 1) + + # Check that alice's display name is "alice" + self.assertEqual(res[0]["display_name"], "alice") + + # Grab the event_id to use later + event_id = res[0]["event_id"] + + # Create a profile with the offending null byte in the display name + new_profile = {"displayname": "ali\u0000ce"} + + # Ensure that the change goes smoothly and does not fail due to the null byte + self.helper.change_membership( + room, + self.u_alice, + self.u_alice, + "join", + extra_data=new_profile, + tok=self.t_alice, + ) + + res2 = self.get_success( + self.store.db_pool.simple_select_list( + "room_memberships", + {"user_id": "@alice:test"}, + ["display_name", "event_id"], + ) + ) + # Check that we only have two results + self.assertEqual(len(res2), 2) + + # Filter out the previous event using the event_id we grabbed above + row = [row for row in res2 if row["event_id"] != event_id] + + # Check that alice's display name is now None + self.assertEqual(row[0]["display_name"], None) + class CurrentStateMembershipUpdateTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, homeserver): -- cgit 1.5.1 From e605e4b8f2447f0b6afa9acc104ae1882a732090 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 15 Nov 2021 12:59:33 +0000 Subject: Database storage profile passes mypy (#11342) It already seems to pass mypy. I wonder what changed, given that it was on the exclusion list. So this commit consists of me ensuring `--disallow-untyped-defs` passes and a minor fixup to a function that returned either `True` or `None`. --- changelog.d/11342.misc | 1 + mypy.ini | 7 ++++++- synapse/storage/databases/main/profile.py | 12 ++++++++---- tests/storage/test_profile.py | 9 ++++++--- 4 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 changelog.d/11342.misc (limited to 'tests/storage') diff --git a/changelog.d/11342.misc b/changelog.d/11342.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11342.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index 710b1f3a4b..b2953974ea 100644 --- a/mypy.ini +++ b/mypy.ini @@ -38,7 +38,6 @@ exclude = (?x) |synapse/storage/databases/main/metrics.py |synapse/storage/databases/main/monthly_active_users.py |synapse/storage/databases/main/presence.py - |synapse/storage/databases/main/profile.py |synapse/storage/databases/main/purge_events.py |synapse/storage/databases/main/push_rule.py |synapse/storage/databases/main/receipts.py @@ -182,6 +181,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.room_batch] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.profile] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.state_deltas] disallow_untyped_defs = True @@ -284,6 +286,9 @@ disallow_untyped_defs = True [mypy-tests.handlers.test_user_directory] disallow_untyped_defs = True +[mypy-tests.storage.test_profile] +disallow_untyped_defs = True + [mypy-tests.storage.test_user_directory] disallow_untyped_defs = True diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py index dd8e27e226..e197b7203e 100644 --- a/synapse/storage/databases/main/profile.py +++ b/synapse/storage/databases/main/profile.py @@ -15,6 +15,7 @@ from typing import Any, Dict, List, Optional from synapse.api.errors import StoreError from synapse.storage._base import SQLBaseStore +from synapse.storage.database import LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo @@ -104,7 +105,7 @@ class ProfileWorkerStore(SQLBaseStore): desc="update_remote_profile_cache", ) - async def maybe_delete_remote_profile_cache(self, user_id): + async def maybe_delete_remote_profile_cache(self, user_id: str) -> None: """Check if we still care about the remote user's profile, and if we don't then remove their profile from the cache """ @@ -116,9 +117,9 @@ class ProfileWorkerStore(SQLBaseStore): desc="delete_remote_profile_cache", ) - async def is_subscribed_remote_profile_for_user(self, user_id): + async def is_subscribed_remote_profile_for_user(self, user_id: str) -> bool: """Check whether we are interested in a remote user's profile.""" - res = await self.db_pool.simple_select_one_onecol( + res: Optional[str] = await self.db_pool.simple_select_one_onecol( table="group_users", keyvalues={"user_id": user_id}, retcol="user_id", @@ -139,13 +140,16 @@ class ProfileWorkerStore(SQLBaseStore): if res: return True + return False async def get_remote_profile_cache_entries_that_expire( self, last_checked: int ) -> List[Dict[str, str]]: """Get all users who haven't been checked since `last_checked`""" - def _get_remote_profile_cache_entries_that_expire_txn(txn): + def _get_remote_profile_cache_entries_that_expire_txn( + txn: LoggingTransaction, + ) -> List[Dict[str, str]]: sql = """ SELECT user_id, displayname, avatar_url FROM remote_profile_cache diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py index a1ba99ff14..d37736edf8 100644 --- a/tests/storage/test_profile.py +++ b/tests/storage/test_profile.py @@ -11,19 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from twisted.test.proto_helpers import MemoryReactor +from synapse.server import HomeServer from synapse.types import UserID +from synapse.util import Clock from tests import unittest class ProfileStoreTestCase(unittest.HomeserverTestCase): - def prepare(self, reactor, clock, hs): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastore() self.u_frank = UserID.from_string("@frank:test") - def test_displayname(self): + def test_displayname(self) -> None: self.get_success(self.store.create_profile(self.u_frank.localpart)) self.get_success( @@ -48,7 +51,7 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase): self.get_success(self.store.get_profile_displayname(self.u_frank.localpart)) ) - def test_avatar_url(self): + def test_avatar_url(self) -> None: self.get_success(self.store.create_profile(self.u_frank.localpart)) self.get_success( -- cgit 1.5.1 From 319dcb955ea2f8ecab4ad3924d0e2e1222ef89f7 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 16 Nov 2021 11:36:46 -0500 Subject: Fix incorrect return value in tests. (#11359) --- changelog.d/11359.misc | 1 + tests/storage/test_rollback_worker.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/11359.misc (limited to 'tests/storage') diff --git a/changelog.d/11359.misc b/changelog.d/11359.misc new file mode 100644 index 0000000000..4720519cbc --- /dev/null +++ b/changelog.d/11359.misc @@ -0,0 +1 @@ +Require all files in synapse/ and tests/ to pass mypy unless specifically excluded. diff --git a/tests/storage/test_rollback_worker.py b/tests/storage/test_rollback_worker.py index 0ce0892165..cfc8098af6 100644 --- a/tests/storage/test_rollback_worker.py +++ b/tests/storage/test_rollback_worker.py @@ -33,7 +33,7 @@ def fake_listdir(filepath: str) -> List[str]: A list of files and folders in the directory. """ if filepath.endswith("full_schemas"): - return [SCHEMA_VERSION] + return [str(SCHEMA_VERSION)] return ["99_add_unicorn_to_database.sql"] -- cgit 1.5.1 From 7f9841bdec349075eb424c943db5569439acb83c Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 24 Nov 2021 20:21:44 +0100 Subject: Lower minumum batch size to 1 for background updates (#11422) Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/11422.bugfix | 1 + synapse/storage/background_updates.py | 2 +- tests/rest/admin/test_background_updates.py | 25 +++++++++++++++++-------- tests/storage/test_background_update.py | 8 ++++---- 4 files changed, 23 insertions(+), 13 deletions(-) create mode 100644 changelog.d/11422.bugfix (limited to 'tests/storage') diff --git a/changelog.d/11422.bugfix b/changelog.d/11422.bugfix new file mode 100644 index 0000000000..28ac65ea7c --- /dev/null +++ b/changelog.d/11422.bugfix @@ -0,0 +1 @@ +Improve performance of various background database schema updates. diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index b104f9032c..bc8364400d 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -82,7 +82,7 @@ class BackgroundUpdater: process and autotuning the batch size. """ - MINIMUM_BACKGROUND_BATCH_SIZE = 100 + MINIMUM_BACKGROUND_BATCH_SIZE = 1 DEFAULT_BACKGROUND_BATCH_SIZE = 100 BACKGROUND_UPDATE_INTERVAL_MS = 1000 BACKGROUND_UPDATE_DURATION_MS = 100 diff --git a/tests/rest/admin/test_background_updates.py b/tests/rest/admin/test_background_updates.py index 1786316763..cd5c60b65c 100644 --- a/tests/rest/admin/test_background_updates.py +++ b/tests/rest/admin/test_background_updates.py @@ -20,6 +20,7 @@ import synapse.rest.admin from synapse.api.errors import Codes from synapse.rest.client import login from synapse.server import HomeServer +from synapse.storage.background_updates import BackgroundUpdater from tests import unittest @@ -150,9 +151,11 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "current_updates": { "master": { "name": "test_update", - "average_items_per_ms": 0.1, + "average_items_per_ms": 0.001, "total_duration_ms": 1000.0, - "total_item_count": 100, + "total_item_count": ( + BackgroundUpdater.MINIMUM_BACKGROUND_BATCH_SIZE + ), } }, "enabled": True, @@ -203,9 +206,11 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "current_updates": { "master": { "name": "test_update", - "average_items_per_ms": 0.1, + "average_items_per_ms": 0.001, "total_duration_ms": 1000.0, - "total_item_count": 100, + "total_item_count": ( + BackgroundUpdater.MINIMUM_BACKGROUND_BATCH_SIZE + ), } }, "enabled": False, @@ -230,9 +235,11 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "current_updates": { "master": { "name": "test_update", - "average_items_per_ms": 0.1, + "average_items_per_ms": 0.001, "total_duration_ms": 1000.0, - "total_item_count": 100, + "total_item_count": ( + BackgroundUpdater.MINIMUM_BACKGROUND_BATCH_SIZE + ), } }, "enabled": False, @@ -267,9 +274,11 @@ class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): "current_updates": { "master": { "name": "test_update", - "average_items_per_ms": 0.1, + "average_items_per_ms": 0.001, "total_duration_ms": 2000.0, - "total_item_count": 200, + "total_item_count": ( + 2 * BackgroundUpdater.MINIMUM_BACKGROUND_BATCH_SIZE + ), } }, "enabled": True, diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index 0da42b5ac5..a5f5ebad41 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -19,11 +19,11 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): ) def test_do_background_update(self): - # the time we claim each update takes - duration_ms = 42 + # the time we claim it takes to update one item when running the update + duration_ms = 4200 # the target runtime for each bg update - target_background_update_duration_ms = 50000 + target_background_update_duration_ms = 5000000 store = self.hs.get_datastore() self.get_success( @@ -57,7 +57,7 @@ class BackgroundUpdateTestCase(unittest.HomeserverTestCase): # on the first call, we should get run with the default background update size self.update_handler.assert_called_once_with( - {"my_key": 1}, self.updates.DEFAULT_BACKGROUND_BATCH_SIZE + {"my_key": 1}, self.updates.MINIMUM_BACKGROUND_BATCH_SIZE ) # second step: complete the update -- cgit 1.5.1 From 0d88c4f9030c50bb9e016d9a28f6db27f7913d0b Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 25 Nov 2021 16:14:54 +0100 Subject: Improve performance of `remove_{hidden,deleted}_devices_from_device_inbox` (#11421) Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/11421.bugfix | 1 + synapse/storage/databases/main/deviceinbox.py | 209 +++++++-------------- .../65/08_device_inbox_background_updates.sql | 18 ++ tests/storage/databases/main/test_deviceinbox.py | 4 +- 4 files changed, 84 insertions(+), 148 deletions(-) create mode 100644 changelog.d/11421.bugfix create mode 100644 synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql (limited to 'tests/storage') diff --git a/changelog.d/11421.bugfix b/changelog.d/11421.bugfix new file mode 100644 index 0000000000..28ac65ea7c --- /dev/null +++ b/changelog.d/11421.bugfix @@ -0,0 +1 @@ +Improve performance of various background database schema updates. diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 7c0f953365..ab8766c75b 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -599,6 +599,7 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox" REMOVE_HIDDEN_DEVICES = "remove_hidden_devices_from_device_inbox" + REMOVE_DEAD_DEVICES_FROM_INBOX = "remove_dead_devices_from_device_inbox" def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) @@ -614,14 +615,18 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox ) - self.db_pool.updates.register_background_update_handler( - self.REMOVE_DELETED_DEVICES, - self._remove_deleted_devices_from_device_inbox, + # Used to be a background update that deletes all device_inboxes for deleted + # devices. + self.db_pool.updates.register_noop_background_update( + self.REMOVE_DELETED_DEVICES ) + # Used to be a background update that deletes all device_inboxes for hidden + # devices. + self.db_pool.updates.register_noop_background_update(self.REMOVE_HIDDEN_DEVICES) self.db_pool.updates.register_background_update_handler( - self.REMOVE_HIDDEN_DEVICES, - self._remove_hidden_devices_from_device_inbox, + self.REMOVE_DEAD_DEVICES_FROM_INBOX, + self._remove_dead_devices_from_device_inbox, ) async def _background_drop_index_device_inbox(self, progress, batch_size): @@ -636,171 +641,83 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): return 1 - async def _remove_deleted_devices_from_device_inbox( - self, progress: JsonDict, batch_size: int + async def _remove_dead_devices_from_device_inbox( + self, + progress: JsonDict, + batch_size: int, ) -> int: - """A background update that deletes all device_inboxes for deleted devices. - - This should only need to be run once (when users upgrade to v1.47.0) + """A background update to remove devices that were either deleted or hidden from + the device_inbox table. Args: - progress: JsonDict used to store progress of this background update - batch_size: the maximum number of rows to retrieve in a single select query + progress: The update's progress dict. + batch_size: The batch size for this update. Returns: - The number of deleted rows + The number of rows deleted. """ - def _remove_deleted_devices_from_device_inbox_txn( + def _remove_dead_devices_from_device_inbox_txn( txn: LoggingTransaction, - ) -> int: - """stream_id is not unique - we need to use an inclusive `stream_id >= ?` clause, - since we might not have deleted all dead device messages for the stream_id - returned from the previous query + ) -> Tuple[int, bool]: - Then delete only rows matching the `(user_id, device_id, stream_id)` tuple, - to avoid problems of deleting a large number of rows all at once - due to a single device having lots of device messages. - """ + if "max_stream_id" in progress: + max_stream_id = progress["max_stream_id"] + else: + txn.execute("SELECT max(stream_id) FROM device_inbox") + # There's a type mismatch here between how we want to type the row and + # what fetchone says it returns, but we silence it because we know that + # res can't be None. + res: Tuple[Optional[int]] = txn.fetchone() # type: ignore[assignment] + if res[0] is None: + # this can only happen if the `device_inbox` table is empty, in which + # case we have no work to do. + return 0, True + else: + max_stream_id = res[0] - last_stream_id = progress.get("stream_id", 0) + start = progress.get("stream_id", 0) + stop = start + batch_size + # delete rows in `device_inbox` which do *not* correspond to a known, + # unhidden device. sql = """ - SELECT device_id, user_id, stream_id - FROM device_inbox + DELETE FROM device_inbox WHERE - stream_id >= ? - AND (device_id, user_id) NOT IN ( - SELECT device_id, user_id FROM devices + stream_id >= ? AND stream_id < ? + AND NOT EXISTS ( + SELECT * FROM devices d + WHERE + d.device_id=device_inbox.device_id + AND d.user_id=device_inbox.user_id + AND NOT hidden ) - ORDER BY stream_id - LIMIT ? - """ - - txn.execute(sql, (last_stream_id, batch_size)) - rows = txn.fetchall() + """ - num_deleted = 0 - for row in rows: - num_deleted += self.db_pool.simple_delete_txn( - txn, - "device_inbox", - {"device_id": row[0], "user_id": row[1], "stream_id": row[2]}, - ) + txn.execute(sql, (start, stop)) - if rows: - # send more than stream_id to progress - # otherwise it can happen in large deployments that - # no change of status is visible in the log file - # it may be that the stream_id does not change in several runs - self.db_pool.updates._background_update_progress_txn( - txn, - self.REMOVE_DELETED_DEVICES, - { - "device_id": rows[-1][0], - "user_id": rows[-1][1], - "stream_id": rows[-1][2], - }, - ) - - return num_deleted - - number_deleted = await self.db_pool.runInteraction( - "_remove_deleted_devices_from_device_inbox", - _remove_deleted_devices_from_device_inbox_txn, - ) - - # The task is finished when no more lines are deleted. - if not number_deleted: - await self.db_pool.updates._end_background_update( - self.REMOVE_DELETED_DEVICES + self.db_pool.updates._background_update_progress_txn( + txn, + self.REMOVE_DEAD_DEVICES_FROM_INBOX, + { + "stream_id": stop, + "max_stream_id": max_stream_id, + }, ) - return number_deleted - - async def _remove_hidden_devices_from_device_inbox( - self, progress: JsonDict, batch_size: int - ) -> int: - """A background update that deletes all device_inboxes for hidden devices. - - This should only need to be run once (when users upgrade to v1.47.0) - - Args: - progress: JsonDict used to store progress of this background update - batch_size: the maximum number of rows to retrieve in a single select query - - Returns: - The number of deleted rows - """ - - def _remove_hidden_devices_from_device_inbox_txn( - txn: LoggingTransaction, - ) -> int: - """stream_id is not unique - we need to use an inclusive `stream_id >= ?` clause, - since we might not have deleted all hidden device messages for the stream_id - returned from the previous query - - Then delete only rows matching the `(user_id, device_id, stream_id)` tuple, - to avoid problems of deleting a large number of rows all at once - due to a single device having lots of device messages. - """ - - last_stream_id = progress.get("stream_id", 0) - - sql = """ - SELECT device_id, user_id, stream_id - FROM device_inbox - WHERE - stream_id >= ? - AND (device_id, user_id) IN ( - SELECT device_id, user_id FROM devices WHERE hidden = ? - ) - ORDER BY stream_id - LIMIT ? - """ - - txn.execute(sql, (last_stream_id, True, batch_size)) - rows = txn.fetchall() - - num_deleted = 0 - for row in rows: - num_deleted += self.db_pool.simple_delete_txn( - txn, - "device_inbox", - {"device_id": row[0], "user_id": row[1], "stream_id": row[2]}, - ) - - if rows: - # We don't just save the `stream_id` in progress as - # otherwise it can happen in large deployments that - # no change of status is visible in the log file, as - # it may be that the stream_id does not change in several runs - self.db_pool.updates._background_update_progress_txn( - txn, - self.REMOVE_HIDDEN_DEVICES, - { - "device_id": rows[-1][0], - "user_id": rows[-1][1], - "stream_id": rows[-1][2], - }, - ) - - return num_deleted + return stop > max_stream_id - number_deleted = await self.db_pool.runInteraction( - "_remove_hidden_devices_from_device_inbox", - _remove_hidden_devices_from_device_inbox_txn, + finished = await self.db_pool.runInteraction( + "_remove_devices_from_device_inbox_txn", + _remove_dead_devices_from_device_inbox_txn, ) - # The task is finished when no more lines are deleted. - if not number_deleted: + if finished: await self.db_pool.updates._end_background_update( - self.REMOVE_HIDDEN_DEVICES + self.REMOVE_DEAD_DEVICES_FROM_INBOX, ) - return number_deleted + return batch_size class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): diff --git a/synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql b/synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql new file mode 100644 index 0000000000..d79455c2ce --- /dev/null +++ b/synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql @@ -0,0 +1,18 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Background update to clear the inboxes of hidden and deleted devices. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6508, 'remove_dead_devices_from_device_inbox', '{}'); diff --git a/tests/storage/databases/main/test_deviceinbox.py b/tests/storage/databases/main/test_deviceinbox.py index 4b67bd15b7..36c933b9e9 100644 --- a/tests/storage/databases/main/test_deviceinbox.py +++ b/tests/storage/databases/main/test_deviceinbox.py @@ -66,7 +66,7 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): self.store.db_pool.simple_insert( "background_updates", { - "update_name": "remove_deleted_devices_from_device_inbox", + "update_name": "remove_dead_devices_from_device_inbox", "progress_json": "{}", }, ) @@ -140,7 +140,7 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): self.store.db_pool.simple_insert( "background_updates", { - "update_name": "remove_hidden_devices_from_device_inbox", + "update_name": "remove_dead_devices_from_device_inbox", "progress_json": "{}", }, ) -- cgit 1.5.1