From d2d48cce85556753f8443d72aafe697c477c217b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 16 May 2024 05:36:54 -0500 Subject: Refactor Sync handler to be able to return different sync responses (`SyncVersion`) (#17200) Refactor Sync handler to be able to be able to return different sync responses (`SyncVersion`). Preparation to be able support sync v2 and a new Sliding Sync `/sync/e2ee` endpoint which returns a subset of sync v2. Split upon request: https://github.com/element-hq/synapse/pull/17167#discussion_r1601497279 Split from https://github.com/element-hq/synapse/pull/17167 where we will add `SyncVersion.E2EE_SYNC` and a new type of sync response. --- synapse/handlers/sync.py | 65 ++++++++++++++++++++++++++++++++++++++++----- synapse/rest/client/sync.py | 2 ++ 2 files changed, 60 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0bef58351c..53fe2a6a53 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -20,6 +20,7 @@ # import itertools import logging +from enum import Enum from typing import ( TYPE_CHECKING, AbstractSet, @@ -112,6 +113,23 @@ LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE = 100 SyncRequestKey = Tuple[Any, ...] +class SyncVersion(Enum): + """ + Enum for specifying the version of sync request. This is used to key which type of + sync response that we are generating. + + This is different than the `sync_type` you might see used in other code below; which + specifies the sub-type sync request (e.g. initial_sync, full_state_sync, + incremental_sync) and is really only relevant for the `/sync` v2 endpoint. + """ + + # These string values are semantically significant because they are used in the the + # metrics + + # Traditional `/sync` endpoint + SYNC_V2 = "sync_v2" + + @attr.s(slots=True, frozen=True, auto_attribs=True) class SyncConfig: user: UserID @@ -309,6 +327,7 @@ class SyncHandler: self, requester: Requester, sync_config: SyncConfig, + sync_version: SyncVersion, since_token: Optional[StreamToken] = None, timeout: int = 0, full_state: bool = False, @@ -316,6 +335,17 @@ class SyncHandler: """Get the sync for a client if we have new data for it now. Otherwise wait for new data to arrive on the server. If the timeout expires, then return an empty sync result. + + Args: + requester: The user requesting the sync response. + sync_config: Config/info necessary to process the sync request. + sync_version: Determines what kind of sync response to generate. + since_token: The point in the stream to sync from. + timeout: How long to wait for new data to arrive before giving up. + full_state: Whether to return the full state for each room. + + Returns: + When `SyncVersion.SYNC_V2`, returns a full `SyncResult`. """ # If the user is not part of the mau group, then check that limits have # not been exceeded (if not part of the group by this point, almost certain @@ -327,6 +357,7 @@ class SyncHandler: sync_config.request_key, self._wait_for_sync_for_user, sync_config, + sync_version, since_token, timeout, full_state, @@ -338,6 +369,7 @@ class SyncHandler: async def _wait_for_sync_for_user( self, sync_config: SyncConfig, + sync_version: SyncVersion, since_token: Optional[StreamToken], timeout: int, full_state: bool, @@ -363,9 +395,11 @@ class SyncHandler: else: sync_type = "incremental_sync" + sync_label = f"{sync_version}:{sync_type}" + context = current_context() if context: - context.tag = sync_type + context.tag = sync_label # if we have a since token, delete any to-device messages before that token # (since we now know that the device has received them) @@ -384,14 +418,16 @@ class SyncHandler: # we are going to return immediately, so don't bother calling # notifier.wait_for_events. result: SyncResult = await self.current_sync_for_user( - sync_config, since_token, full_state=full_state + sync_config, sync_version, since_token, full_state=full_state ) else: # Otherwise, we wait for something to happen and report it to the user. async def current_sync_callback( before_token: StreamToken, after_token: StreamToken ) -> SyncResult: - return await self.current_sync_for_user(sync_config, since_token) + return await self.current_sync_for_user( + sync_config, sync_version, since_token + ) result = await self.notifier.wait_for_events( sync_config.user.to_string(), @@ -416,13 +452,14 @@ class SyncHandler: lazy_loaded = "true" else: lazy_loaded = "false" - non_empty_sync_counter.labels(sync_type, lazy_loaded).inc() + non_empty_sync_counter.labels(sync_label, lazy_loaded).inc() return result async def current_sync_for_user( self, sync_config: SyncConfig, + sync_version: SyncVersion, since_token: Optional[StreamToken] = None, full_state: bool = False, ) -> SyncResult: @@ -431,12 +468,26 @@ class SyncHandler: This is a wrapper around `generate_sync_result` which starts an open tracing span to track the sync. See `generate_sync_result` for the next part of your indoctrination. + + Args: + sync_config: Config/info necessary to process the sync request. + sync_version: Determines what kind of sync response to generate. + since_token: The point in the stream to sync from.p. + full_state: Whether to return the full state for each room. + Returns: + When `SyncVersion.SYNC_V2`, returns a full `SyncResult`. """ with start_active_span("sync.current_sync_for_user"): log_kv({"since_token": since_token}) - sync_result = await self.generate_sync_result( - sync_config, since_token, full_state - ) + # Go through the `/sync` v2 path + if sync_version == SyncVersion.SYNC_V2: + sync_result: SyncResult = await self.generate_sync_result( + sync_config, since_token, full_state + ) + else: + raise Exception( + f"Unknown sync_version (this is a Synapse problem): {sync_version}" + ) set_tag(SynapseTags.SYNC_RESULT, bool(sync_result)) return sync_result diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index d19aaf0e22..d0713536e1 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -40,6 +40,7 @@ from synapse.handlers.sync import ( KnockedSyncResult, SyncConfig, SyncResult, + SyncVersion, ) from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_boolean, parse_integer, parse_string @@ -232,6 +233,7 @@ class SyncRestServlet(RestServlet): sync_result = await self.sync_handler.wait_for_sync_for_user( requester, sync_config, + SyncVersion.SYNC_V2, since_token=since_token, timeout=timeout, full_state=full_state, -- cgit 1.5.1 From 5e892671a74251109bf9cf4a78bebed9d8085979 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2024 15:04:14 +0100 Subject: Fix bug where push rules would be empty in `/sync` (#17142) Fixes #16987 Some old accounts seem to have an entry in global account data table for push rules, which we should ignore --- changelog.d/17142.bugfix | 1 + synapse/handlers/sync.py | 20 ++++++++------------ tests/handlers/test_sync.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 changelog.d/17142.bugfix (limited to 'synapse') diff --git a/changelog.d/17142.bugfix b/changelog.d/17142.bugfix new file mode 100644 index 0000000000..09b617aed1 --- /dev/null +++ b/changelog.d/17142.bugfix @@ -0,0 +1 @@ +Fix bug where push rules would be empty in `/sync` for some accounts. Introduced in v1.93.0. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 53fe2a6a53..659499af75 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1971,23 +1971,19 @@ class SyncHandler: ) if push_rules_changed: - global_account_data = { - AccountDataTypes.PUSH_RULES: await self._push_rules_handler.push_rules_for_user( - sync_config.user - ), - **global_account_data, - } + global_account_data = dict(global_account_data) + global_account_data[AccountDataTypes.PUSH_RULES] = ( + await self._push_rules_handler.push_rules_for_user(sync_config.user) + ) else: all_global_account_data = await self.store.get_global_account_data_for_user( user_id ) - global_account_data = { - AccountDataTypes.PUSH_RULES: await self._push_rules_handler.push_rules_for_user( - sync_config.user - ), - **all_global_account_data, - } + global_account_data = dict(all_global_account_data) + global_account_data[AccountDataTypes.PUSH_RULES] = ( + await self._push_rules_handler.push_rules_for_user(sync_config.user) + ) account_data_for_user = ( await sync_config.filter_collection.filter_global_account_data( diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py index 9c12a11e3a..0299113b95 100644 --- a/tests/handlers/test_sync.py +++ b/tests/handlers/test_sync.py @@ -24,7 +24,7 @@ from parameterized import parameterized from twisted.test.proto_helpers import MemoryReactor -from synapse.api.constants import EventTypes, JoinRules +from synapse.api.constants import AccountDataTypes, EventTypes, JoinRules from synapse.api.errors import Codes, ResourceLimitError from synapse.api.filtering import FilterCollection, Filtering from synapse.api.room_versions import RoomVersion, RoomVersions @@ -895,6 +895,33 @@ class SyncTestCase(tests.unittest.HomeserverTestCase): self.assertIn(private_call_event.event_id, priv_event_ids) + def test_push_rules_with_bad_account_data(self) -> None: + """Some old accounts have managed to set a `m.push_rules` account data, + which we should ignore in /sync response. + """ + + user = self.register_user("alice", "password") + + # Insert the bad account data. + self.get_success( + self.store.add_account_data_for_user(user, AccountDataTypes.PUSH_RULES, {}) + ) + + sync_result: SyncResult = self.get_success( + self.sync_handler.wait_for_sync_for_user( + create_requester(user), generate_sync_config(user) + ) + ) + + for account_dict in sync_result.account_data: + if account_dict["type"] == AccountDataTypes.PUSH_RULES: + # We should have lots of push rules here, rather than the bad + # empty data. + self.assertNotEqual(account_dict["content"], {}) + return + + self.fail("No push rules found") + _request_key = 0 -- cgit 1.5.1 From fd1200344112eb28486ee6f82ee341ada8bb4f06 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 May 2024 16:07:54 +0100 Subject: Revert "Improve perf of sync device lists" (#17207) Reverts element-hq/synapse#17191 --- changelog.d/17191.misc | 1 - synapse/handlers/sync.py | 37 ++++++++++++++++++++++++++----- synapse/storage/databases/main/devices.py | 17 ++++++++++++-- 3 files changed, 46 insertions(+), 9 deletions(-) delete mode 100644 changelog.d/17191.misc (limited to 'synapse') diff --git a/changelog.d/17191.misc b/changelog.d/17191.misc deleted file mode 100644 index bd55eeaa33..0000000000 --- a/changelog.d/17191.misc +++ /dev/null @@ -1 +0,0 @@ -Improve performance of calculating device lists changes in `/sync`. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 659499af75..2bd1b8de88 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1854,13 +1854,38 @@ class SyncHandler: # Step 1a, check for changes in devices of users we share a room # with - users_that_have_changed = ( - await self._device_handler.get_device_changes_in_shared_rooms( - user_id, - sync_result_builder.joined_room_ids, - from_token=since_token, - ) + # + # We do this in two different ways depending on what we have cached. + # If we already have a list of all the user that have changed since + # the last sync then it's likely more efficient to compare the rooms + # they're in with the rooms the syncing user is in. + # + # If we don't have that info cached then we get all the users that + # share a room with our user and check if those users have changed. + cache_result = self.store.get_cached_device_list_changes( + since_token.device_list_key ) + if cache_result.hit: + changed_users = cache_result.entities + + result = await self.store.get_rooms_for_users(changed_users) + + for changed_user_id, entries in result.items(): + # Check if the changed user shares any rooms with the user, + # or if the changed user is the syncing user (as we always + # want to include device list updates of their own devices). + if user_id == changed_user_id or any( + rid in joined_rooms for rid in entries + ): + users_that_have_changed.add(changed_user_id) + else: + users_that_have_changed = ( + await self._device_handler.get_device_changes_in_shared_rooms( + user_id, + sync_result_builder.joined_room_ids, + from_token=since_token, + ) + ) # Step 1b, check for newly joined rooms for room_id in newly_joined_rooms: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index d98f0593bc..8dbcb3f5a0 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -70,7 +70,10 @@ from synapse.types import ( from synapse.util import json_decoder, json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.lrucache import LruCache -from synapse.util.caches.stream_change_cache import StreamChangeCache +from synapse.util.caches.stream_change_cache import ( + AllEntitiesChangedResult, + StreamChangeCache, +) from synapse.util.cancellation import cancellable from synapse.util.iterutils import batch_iter from synapse.util.stringutils import shortstr @@ -829,6 +832,16 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ) return {device[0]: db_to_json(device[1]) for device in devices} + def get_cached_device_list_changes( + self, + from_key: int, + ) -> AllEntitiesChangedResult: + """Get set of users whose devices have changed since `from_key`, or None + if that information is not in our cache. + """ + + return self._device_list_stream_cache.get_all_entities_changed(from_key) + @cancellable async def get_all_devices_changed( self, @@ -1462,7 +1475,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): sql = """ SELECT DISTINCT user_id FROM device_lists_changes_in_room - WHERE {clause} AND stream_id > ? + WHERE {clause} AND stream_id >= ? """ def _get_device_list_changes_in_rooms_txn( -- cgit 1.5.1