From be4a16ff445c9dfba04aeaed695afb3a56e204f7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 29 Jul 2024 22:45:48 +0100 Subject: Sliding Sync: Track whether we have sent rooms down to clients (#17447) The basic idea is that we introduce a new token for a sliding sync connection, which stores the mapping of room to room "status" (i.e. have we sent the room down?). This token allows us to handle duplicate requests properly. In future it can be used to store more "per-connection" information safely. In future this should be migrated into the DB, so its important that we try to reduce the number of syncs where we need to update the per-connection information. In this PoC this only happens when we: a) send down a set of room for the first time, or b) we have previously sent down a room and there are updates but we are not sending the room down the sync (due to not falling in a list range) Co-authored-by: Eric Eastwood --- changelog.d/17447.feature | 1 + pyproject.toml | 4 +- synapse/handlers/sliding_sync.py | 350 +++++++++++++++++-- synapse/rest/client/sync.py | 6 +- synapse/server.py | 1 + synapse/storage/databases/main/state_deltas.py | 37 ++ synapse/types/handlers/__init__.py | 3 +- synapse/types/rest/client/__init__.py | 5 + tests/rest/client/test_sync.py | 452 ++++++++++++++++++++++++- 9 files changed, 814 insertions(+), 45 deletions(-) create mode 100644 changelog.d/17447.feature diff --git a/changelog.d/17447.feature b/changelog.d/17447.feature new file mode 100644 index 0000000000..6f80e298ae --- /dev/null +++ b/changelog.d/17447.feature @@ -0,0 +1 @@ +Track which rooms have been sent to clients in the experimental [MSC3575](https://github.com/matrix-org/matrix-spec-proposals/pull/3575) Sliding Sync `/sync` endpoint. diff --git a/pyproject.toml b/pyproject.toml index 0b5dc418e4..1adf8e087f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -201,8 +201,8 @@ netaddr = ">=0.7.18" # add a lower bound to the Jinja2 dependency. Jinja2 = ">=3.0" bleach = ">=1.4.3" -# We use `Self`, which were added in `typing-extensions` 4.0. -typing-extensions = ">=4.0" +# We use `assert_never`, which were added in `typing-extensions` 4.1. +typing-extensions = ">=4.1" # We enforce that we have a `cryptography` version that bundles an `openssl` # with the latest security patches. cryptography = ">=3.4.7" diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 3231574402..2b74f1c9c9 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -18,6 +18,7 @@ # # import logging +from enum import Enum from itertools import chain from typing import ( TYPE_CHECKING, @@ -34,6 +35,7 @@ from typing import ( import attr from immutabledict import immutabledict +from typing_extensions import assert_never from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membership from synapse.events import EventBase @@ -52,6 +54,7 @@ from synapse.types import ( RoomStreamToken, SlidingSyncStreamToken, StateMap, + StrCollection, StreamKeyType, StreamToken, UserID, @@ -361,6 +364,8 @@ class SlidingSyncHandler: self.push_rules_handler = hs.get_push_rules_handler() self.rooms_to_exclude_globally = hs.config.server.rooms_to_exclude_from_sync + self.connection_store = SlidingSyncConnectionStore() + async def wait_for_sync_for_user( self, requester: Requester, @@ -464,6 +469,11 @@ class SlidingSyncHandler: # See https://github.com/matrix-org/matrix-doc/issues/1144 raise NotImplementedError() + await self.connection_store.mark_token_seen( + sync_config=sync_config, + from_token=from_token, + ) + # Get all of the room IDs that the user should be able to see in the sync # response has_lists = sync_config.lists is not None and len(sync_config.lists) > 0 @@ -613,7 +623,7 @@ class SlidingSyncHandler: @tag_args async def handle_room(room_id: str) -> None: room_sync_result = await self.get_room_sync_data( - user=sync_config.user, + sync_config=sync_config, room_id=room_id, room_sync_config=relevant_room_map[room_id], room_membership_for_user_at_to_token=room_membership_for_user_map[ @@ -635,11 +645,22 @@ class SlidingSyncHandler: to_token=to_token, ) - # TODO: Update this when we implement per-connection state - connection_token = 0 + if has_lists or has_room_subscriptions: + connection_position = await self.connection_store.record_rooms( + sync_config=sync_config, + from_token=from_token, + sent_room_ids=relevant_room_map.keys(), + # TODO: We need to calculate which rooms have had updates since the `from_token` but were not included in the `sent_room_ids` + unsent_room_ids=[], + ) + elif from_token: + connection_position = from_token.connection_position + else: + # Initial sync without a `from_token` starts at `0` + connection_position = 0 return SlidingSyncResult( - next_pos=SlidingSyncStreamToken(to_token, connection_token), + next_pos=SlidingSyncStreamToken(to_token, connection_position), lists=lists, rooms=rooms, extensions=extensions, @@ -1370,7 +1391,7 @@ class SlidingSyncHandler: async def get_room_sync_data( self, - user: UserID, + sync_config: SlidingSyncConfig, room_id: str, room_sync_config: RoomSyncConfig, room_membership_for_user_at_to_token: _RoomMembershipForUser, @@ -1392,6 +1413,37 @@ class SlidingSyncHandler: from_token: The point in the stream to sync from. to_token: The point in the stream to sync up to. """ + user = sync_config.user + + # Determine whether we should limit the timeline to the token range. + # + # We should return historical messages (before token range) in the + # following cases because we want clients to be able to show a basic + # screen of information: + # - Initial sync (because no `from_token` to limit us anyway) + # - When users `newly_joined` + # - For an incremental sync where we haven't sent it down this + # connection before + from_bound = None + initial = True + if from_token and not room_membership_for_user_at_to_token.newly_joined: + room_status = await self.connection_store.have_sent_room( + sync_config=sync_config, + connection_token=from_token.connection_position, + room_id=room_id, + ) + if room_status.status == HaveSentRoomFlag.LIVE: + from_bound = from_token.stream_token.room_key + initial = False + elif room_status.status == HaveSentRoomFlag.PREVIOUSLY: + assert room_status.last_token is not None + from_bound = room_status.last_token + initial = False + elif room_status.status == HaveSentRoomFlag.NEVER: + from_bound = None + initial = True + else: + assert_never(room_status.status) # Assemble the list of timeline events # @@ -1418,36 +1470,23 @@ class SlidingSyncHandler: prev_batch_token = to_token # We're going to paginate backwards from the `to_token` - from_bound = to_token.room_key + to_bound = to_token.room_key # People shouldn't see past their leave/ban event if room_membership_for_user_at_to_token.membership in ( Membership.LEAVE, Membership.BAN, ): - from_bound = ( + to_bound = ( room_membership_for_user_at_to_token.event_pos.to_room_stream_token() ) - # Determine whether we should limit the timeline to the token range. - # - # We should return historical messages (before token range) in the - # following cases because we want clients to be able to show a basic - # screen of information: - # - Initial sync (because no `from_token` to limit us anyway) - # - When users `newly_joined` - # - TODO: For an incremental sync where we haven't sent it down this - # connection before - to_bound = ( - from_token.stream_token.room_key - if from_token is not None - and not room_membership_for_user_at_to_token.newly_joined - else None - ) - timeline_events, new_room_key = await self.store.paginate_room_events( room_id=room_id, - from_key=from_bound, - to_key=to_bound, + # The bounds are reversed so we can paginate backwards + # (from newer to older events) starting at to_bound. + # This ensures we fill the `limit` with the newest events first, + from_key=to_bound, + to_key=from_bound, direction=Direction.BACKWARDS, # We add one so we can determine if there are enough events to saturate # the limit or not (see `limited`) @@ -1564,12 +1603,6 @@ class SlidingSyncHandler: # indicate to the client that a state reset happened. Perhaps we should indicate # this by setting `initial: True` and empty `required_state`. - # TODO: Since we can't determine whether we've already sent a room down this - # Sliding Sync connection before (we plan to add this optimization in the - # future), we're always returning the requested room state instead of - # updates. - initial = True - # Check whether the room has a name set name_state_ids = await self.get_current_state_ids_at( room_id=room_id, @@ -1715,9 +1748,22 @@ class SlidingSyncHandler: to_token=to_token, ) else: - # TODO: Once we can figure out if we've sent a room down this connection before, - # we can return updates instead of the full required state. - raise NotImplementedError() + assert from_bound is not None + + # TODO: Limit the number of state events we're about to send down + # the room, if its too many we should change this to an + # `initial=True`? + deltas = await self.store.get_current_state_deltas_for_room( + room_id=room_id, + from_token=from_bound, + to_token=to_token.room_key, + ) + # TODO: Filter room state before fetching events + # TODO: Handle state resets where event_id is None + events = await self.store.get_events( + [d.event_id for d in deltas if d.event_id] + ) + room_state = {(s.type, s.state_key): s for s in events.values()} required_room_state: StateMap[EventBase] = {} if required_state_filter != StateFilter.none(): @@ -1863,7 +1909,7 @@ class SlidingSyncHandler: to_token: The point in the stream to sync up to. """ user_id = sync_config.user.to_string() - device_id = sync_config.device_id + device_id = sync_config.requester.device_id # Skip if the extension is not enabled if not to_device_request.enabled: @@ -1939,7 +1985,7 @@ class SlidingSyncHandler: from_token: The point in the stream to sync from. """ user_id = sync_config.user.to_string() - device_id = sync_config.device_id + device_id = sync_config.requester.device_id # Skip if the extension is not enabled if not e2ee_request.enabled: @@ -2094,3 +2140,235 @@ class SlidingSyncHandler: global_account_data_map=global_account_data_map, account_data_by_room_map=account_data_by_room_map, ) + + +class HaveSentRoomFlag(Enum): + """Flag for whether we have sent the room down a sliding sync connection. + + The valid state changes here are: + NEVER -> LIVE + LIVE -> PREVIOUSLY + PREVIOUSLY -> LIVE + """ + + # The room has never been sent down (or we have forgotten we have sent it + # down). + NEVER = 1 + + # We have previously sent the room down, but there are updates that we + # haven't sent down. + PREVIOUSLY = 2 + + # We have sent the room down and the client has received all updates. + LIVE = 3 + + +@attr.s(auto_attribs=True, slots=True, frozen=True) +class HaveSentRoom: + """Whether we have sent the room down a sliding sync connection. + + Attributes: + status: Flag of if we have or haven't sent down the room + last_token: If the flag is `PREVIOUSLY` then this is non-null and + contains the last stream token of the last updates we sent down + the room, i.e. we still need to send everything since then to the + client. + """ + + status: HaveSentRoomFlag + last_token: Optional[RoomStreamToken] + + @staticmethod + def previously(last_token: RoomStreamToken) -> "HaveSentRoom": + """Constructor for `PREVIOUSLY` flag.""" + return HaveSentRoom(HaveSentRoomFlag.PREVIOUSLY, last_token) + + +HAVE_SENT_ROOM_NEVER = HaveSentRoom(HaveSentRoomFlag.NEVER, None) +HAVE_SENT_ROOM_LIVE = HaveSentRoom(HaveSentRoomFlag.LIVE, None) + + +@attr.s(auto_attribs=True) +class SlidingSyncConnectionStore: + """In-memory store of per-connection state, including what rooms we have + previously sent down a sliding sync connection. + + Note: This is NOT safe to run in a worker setup because connection positions will + point to different sets of rooms on different workers. e.g. for the same connection, + a connection position of 5 might have totally different states on worker A and + worker B. + + One complication that we need to deal with here is needing to handle requests being + resent, i.e. if we sent down a room in a response that the client received, we must + consider the room *not* sent when we get the request again. + + This is handled by using an integer "token", which is returned to the client + as part of the sync token. For each connection we store a mapping from + tokens to the room states, and create a new entry when we send down new + rooms. + + Note that for any given sliding sync connection we will only store a maximum + of two different tokens: the previous token from the request and a new token + sent in the response. When we receive a request with a given token, we then + clear out all other entries with a different token. + + Attributes: + _connections: Mapping from `(user_id, conn_id)` to mapping of `token` + to mapping of room ID to `HaveSentRoom`. + """ + + # `(user_id, conn_id)` -> `token` -> `room_id` -> `HaveSentRoom` + _connections: Dict[Tuple[str, str], Dict[int, Dict[str, HaveSentRoom]]] = ( + attr.Factory(dict) + ) + + async def have_sent_room( + self, sync_config: SlidingSyncConfig, connection_token: int, room_id: str + ) -> HaveSentRoom: + """For the given user_id/conn_id/token, return whether we have + previously sent the room down + """ + + conn_key = self._get_connection_key(sync_config) + sync_statuses = self._connections.setdefault(conn_key, {}) + room_status = sync_statuses.get(connection_token, {}).get( + room_id, HAVE_SENT_ROOM_NEVER + ) + + return room_status + + async def record_rooms( + self, + sync_config: SlidingSyncConfig, + from_token: Optional[SlidingSyncStreamToken], + *, + sent_room_ids: StrCollection, + unsent_room_ids: StrCollection, + ) -> int: + """Record which rooms we have/haven't sent down in a new response + + Attributes: + sync_config + from_token: The since token from the request, if any + sent_room_ids: The set of room IDs that we have sent down as + part of this request (only needs to be ones we didn't + previously sent down). + unsent_room_ids: The set of room IDs that have had updates + since the `from_token`, but which were not included in + this request + """ + prev_connection_token = 0 + if from_token is not None: + prev_connection_token = from_token.connection_position + + # If there are no changes then this is a noop. + if not sent_room_ids and not unsent_room_ids: + return prev_connection_token + + conn_key = self._get_connection_key(sync_config) + sync_statuses = self._connections.setdefault(conn_key, {}) + + # Generate a new token, removing any existing entries in that token + # (which can happen if requests get resent). + new_store_token = prev_connection_token + 1 + sync_statuses.pop(new_store_token, None) + + # Copy over and update the room mappings. + new_room_statuses = dict(sync_statuses.get(prev_connection_token, {})) + + # Whether we have updated the `new_room_statuses`, if we don't by the + # end we can treat this as a noop. + have_updated = False + for room_id in sent_room_ids: + new_room_statuses[room_id] = HAVE_SENT_ROOM_LIVE + have_updated = True + + # Whether we add/update the entries for unsent rooms depends on the + # existing entry: + # - LIVE: We have previously sent down everything up to + # `last_room_token, so we update the entry to be `PREVIOUSLY` with + # `last_room_token`. + # - PREVIOUSLY: We have previously sent down everything up to *a* + # given token, so we don't need to update the entry. + # - NEVER: We have never previously sent down the room, and we haven't + # sent anything down this time either so we leave it as NEVER. + + # Work out the new state for unsent rooms that were `LIVE`. + if from_token: + new_unsent_state = HaveSentRoom.previously(from_token.stream_token.room_key) + else: + new_unsent_state = HAVE_SENT_ROOM_NEVER + + for room_id in unsent_room_ids: + prev_state = new_room_statuses.get(room_id) + if prev_state is not None and prev_state.status == HaveSentRoomFlag.LIVE: + new_room_statuses[room_id] = new_unsent_state + have_updated = True + + if not have_updated: + return prev_connection_token + + sync_statuses[new_store_token] = new_room_statuses + + return new_store_token + + async def mark_token_seen( + self, + sync_config: SlidingSyncConfig, + from_token: Optional[SlidingSyncStreamToken], + ) -> None: + """We have received a request with the given token, so we can clear out + any other tokens associated with the connection. + + If there is no from token then we have started afresh, and so we delete + all tokens associated with the device. + """ + # Clear out any tokens for the connection that doesn't match the one + # from the request. + + conn_key = self._get_connection_key(sync_config) + sync_statuses = self._connections.pop(conn_key, {}) + if from_token is None: + return + + sync_statuses = { + connection_token: room_statuses + for connection_token, room_statuses in sync_statuses.items() + if connection_token == from_token.connection_position + } + if sync_statuses: + self._connections[conn_key] = sync_statuses + + @staticmethod + def _get_connection_key(sync_config: SlidingSyncConfig) -> Tuple[str, str]: + """Return a unique identifier for this connection. + + The first part is simply the user ID. + + The second part is generally a combination of device ID and conn_id. + However, both these two are optional (e.g. puppet access tokens don't + have device IDs), so this handles those edge cases. + + We use this over the raw `conn_id` to avoid clashes between different + clients that use the same `conn_id`. Imagine a user uses a web client + that uses `conn_id: main_sync_loop` and an Android client that also has + a `conn_id: main_sync_loop`. + """ + + user_id = sync_config.user.to_string() + + # Only one sliding sync connection is allowed per given conn_id (empty + # or not). + conn_id = sync_config.conn_id or "" + + if sync_config.requester.device_id: + return (user_id, f"D/{sync_config.requester.device_id}/{conn_id}") + + if sync_config.requester.access_token_id: + # If we don't have a device, then the access token ID should be a + # stable ID. + return (user_id, f"A/{sync_config.requester.access_token_id}/{conn_id}") + + # If we have neither then its likely an AS or some weird token. Either + # way we can just fail here. + raise Exception("Cannot use sliding sync with access token type") diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index 7cf1f56435..bf3ac8d483 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -881,7 +881,6 @@ class SlidingSyncRestServlet(RestServlet): ) user = requester.user - device_id = requester.device_id timeout = parse_integer(request, "timeout", default=0) # Position in the stream @@ -902,11 +901,12 @@ class SlidingSyncRestServlet(RestServlet): sync_config = SlidingSyncConfig( user=user, - device_id=device_id, + requester=requester, # FIXME: Currently, we're just manually copying the fields from the - # `SlidingSyncBody` into the config. How can we gurantee into the future + # `SlidingSyncBody` into the config. How can we guarantee into the future # that we don't forget any? I would like something more structured like # `copy_attributes(from=body, to=config)` + conn_id=body.conn_id, lists=body.lists, room_subscriptions=body.room_subscriptions, extensions=body.extensions, diff --git a/synapse/server.py b/synapse/server.py index 4a3f9ff934..46b9d83a04 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -559,6 +559,7 @@ class HomeServer(metaclass=abc.ABCMeta): def get_sync_handler(self) -> SyncHandler: return SyncHandler(self) + @cache_in_self def get_sliding_sync_handler(self) -> SlidingSyncHandler: return SlidingSyncHandler(self) diff --git a/synapse/storage/databases/main/state_deltas.py b/synapse/storage/databases/main/state_deltas.py index 036972ac25..da3ebe66b8 100644 --- a/synapse/storage/databases/main/state_deltas.py +++ b/synapse/storage/databases/main/state_deltas.py @@ -26,6 +26,8 @@ import attr from synapse.storage._base import SQLBaseStore from synapse.storage.database import LoggingTransaction +from synapse.storage.databases.main.stream import _filter_results_by_stream +from synapse.types import RoomStreamToken from synapse.util.caches.stream_change_cache import StreamChangeCache logger = logging.getLogger(__name__) @@ -156,3 +158,38 @@ class StateDeltasStore(SQLBaseStore): "get_max_stream_id_in_current_state_deltas", self._get_max_stream_id_in_current_state_deltas_txn, ) + + async def get_current_state_deltas_for_room( + self, room_id: str, from_token: RoomStreamToken, to_token: RoomStreamToken + ) -> List[StateDelta]: + """Get the state deltas between two tokens.""" + + def get_current_state_deltas_for_room_txn( + txn: LoggingTransaction, + ) -> List[StateDelta]: + sql = """ + SELECT instance_name, stream_id, type, state_key, event_id, prev_event_id + FROM current_state_delta_stream + WHERE room_id = ? AND ? < stream_id AND stream_id <= ? + ORDER BY stream_id ASC + """ + txn.execute( + sql, (room_id, from_token.stream, to_token.get_max_stream_pos()) + ) + + return [ + StateDelta( + stream_id=row[1], + room_id=room_id, + event_type=row[2], + state_key=row[3], + event_id=row[4], + prev_event_id=row[5], + ) + for row in txn + if _filter_results_by_stream(from_token, to_token, row[0], row[1]) + ] + + return await self.db_pool.runInteraction( + "get_current_state_deltas_for_room", get_current_state_deltas_for_room_txn + ) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index 479222a18d..f3141b05a0 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -35,6 +35,7 @@ from synapse.types import ( DeviceListUpdates, JsonDict, JsonMapping, + Requester, SlidingSyncStreamToken, StreamToken, UserID, @@ -109,7 +110,7 @@ class SlidingSyncConfig(SlidingSyncBody): """ user: UserID - device_id: Optional[str] + requester: Requester # Pydantic config class Config: diff --git a/synapse/types/rest/client/__init__.py b/synapse/types/rest/client/__init__.py index 34e07ddac5..dfe3b1e0f7 100644 --- a/synapse/types/rest/client/__init__.py +++ b/synapse/types/rest/client/__init__.py @@ -120,6 +120,9 @@ class SlidingSyncBody(RequestBodyModel): Sliding Sync API request body. Attributes: + conn_id: An optional string to identify this connection to the server. + Only one sliding sync connection is allowed per given conn_id (empty + or not). lists: Sliding window API. A map of list key to list information (:class:`SlidingSyncList`). Max lists: 100. The list keys should be arbitrary strings which the client is using to refer to the list. Keep this @@ -343,6 +346,8 @@ class SlidingSyncBody(RequestBodyModel): e2ee: Optional[E2eeExtension] = None account_data: Optional[AccountDataExtension] = None + conn_id: Optional[str] + # mypy workaround via https://github.com/pydantic/pydantic/issues/156#issuecomment-1130883884 if TYPE_CHECKING: lists: Optional[Dict[str, SlidingSyncList]] = None diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 2bbbd95a76..3e7b8f76a1 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -59,6 +59,7 @@ from synapse.types import ( StreamToken, UserID, ) +from synapse.types.handlers import SlidingSyncConfig from synapse.util import Clock from synapse.util.stringutils import random_string @@ -3676,13 +3677,52 @@ class SlidingSyncTestCase(SlidingSyncBase): # Make the incremental Sliding Sync request response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + # We only return updates but only if we've sent the room down the + # connection before. + self.assertIsNone(response_body["rooms"][room_id1].get("required_state")) + self.assertIsNone(response_body["rooms"][room_id1].get("invite_state")) + + def test_rooms_required_state_incremental_sync_restart(self) -> None: + """ + Test `rooms.required_state` returns requested state events in the room during an + incremental sync, after a restart (and so the in memory caches are reset). + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + [EventTypes.RoomHistoryVisibility, ""], + # This one doesn't exist in the room + [EventTypes.Tombstone, ""], + ], + "timeline_limit": 1, + } + } + } + _, from_token = self.do_sync(sync_body, tok=user1_tok) + + # Reset the in-memory cache + self.hs.get_sliding_sync_handler().connection_store._connections.clear() + + # Make the Sliding Sync request + response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + + # If the cache has been cleared then we do expect the state to come down state_map = self.get_success( self.storage_controllers.state.get_current_state(room_id1) ) - # The returned state doesn't change from initial to incremental sync. In the - # future, we will only return updates but only if we've sent the room down the - # connection before. self._assertRequiredStateIncludes( response_body["rooms"][room_id1]["required_state"], { @@ -4434,6 +4474,412 @@ class SlidingSyncTestCase(SlidingSyncBase): # `world_readable` but currently we don't support this. self.assertIsNone(response_body["rooms"].get(room_id1), response_body["rooms"]) + def test_rooms_required_state_incremental_sync_LIVE(self) -> None: + """Test that we only get state updates in incremental sync for rooms + we've already seen (LIVE). + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + user2_id = self.register_user("user2", "pass") + user2_tok = self.login(user2_id, "pass") + + room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) + self.helper.join(room_id1, user1_id, tok=user1_tok) + + # Make the Sliding Sync request + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [ + [EventTypes.Create, ""], + [EventTypes.RoomHistoryVisibility, ""], + # This one doesn't exist in the room + [EventTypes.Name, ""], + ], + "timeline_limit": 0, + } + } + } + + response_body, from_token = self.do_sync(sync_body, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + self._assertRequiredStateIncludes( + response_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Create, "")], + state_map[(EventTypes.RoomHistoryVisibility, "")], + }, + exact=True, + ) + + # Send a state event + self.helper.send_state( + room_id1, EventTypes.Name, body={"name": "foo"}, tok=user2_tok + ) + + response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + self.assertNotIn("initial", response_body["rooms"][room_id1]) + self._assertRequiredStateIncludes( + response_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Name, "")], + }, + exact=True, + ) + + @parameterized.expand([(False,), (True,)]) + def test_rooms_timeline_incremental_sync_PREVIOUSLY(self, limited: bool) -> None: + """ + Test getting room data where we have previously sent down the room, but + we missed sending down some timeline events previously and so its status + is considered PREVIOUSLY. + + There are two versions of this test, one where there are more messages + than the timeline limit, and one where there isn't. + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) + + self.helper.send(room_id1, "msg", tok=user1_tok) + + timeline_limit = 5 + conn_id = "conn_id" + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 0]], + "required_state": [], + "timeline_limit": timeline_limit, + } + }, + "conn_id": "conn_id", + } + + # The first room gets sent down the initial sync + response_body, initial_from_token = self.do_sync(sync_body, tok=user1_tok) + self.assertCountEqual( + response_body["rooms"].keys(), {room_id1}, response_body["rooms"] + ) + + # We now send down some events in room1 (depending on the test param). + expected_events = [] # The set of events in the timeline + if limited: + for _ in range(10): + resp = self.helper.send(room_id1, "msg1", tok=user1_tok) + expected_events.append(resp["event_id"]) + else: + resp = self.helper.send(room_id1, "msg1", tok=user1_tok) + expected_events.append(resp["event_id"]) + + # A second messages happens in the other room, so room1 won't get sent down. + self.helper.send(room_id2, "msg", tok=user1_tok) + + # Only the second room gets sent down sync. + response_body, from_token = self.do_sync( + sync_body, since=initial_from_token, tok=user1_tok + ) + + self.assertCountEqual( + response_body["rooms"].keys(), {room_id2}, response_body["rooms"] + ) + + # FIXME: This is a hack to record that the first room wasn't sent down + # sync, as we don't implement that currently. + sliding_sync_handler = self.hs.get_sliding_sync_handler() + requester = self.get_success( + self.hs.get_auth().get_user_by_access_token(user1_tok) + ) + sync_config = SlidingSyncConfig( + user=requester.user, + requester=requester, + conn_id=conn_id, + ) + + parsed_initial_from_token = self.get_success( + SlidingSyncStreamToken.from_string(self.store, initial_from_token) + ) + connection_position = self.get_success( + sliding_sync_handler.connection_store.record_rooms( + sync_config, + parsed_initial_from_token, + sent_room_ids=[], + unsent_room_ids=[room_id1], + ) + ) + + # FIXME: Now fix up `from_token` with new connect position above. + parsed_from_token = self.get_success( + SlidingSyncStreamToken.from_string(self.store, from_token) + ) + parsed_from_token = SlidingSyncStreamToken( + stream_token=parsed_from_token.stream_token, + connection_position=connection_position, + ) + from_token = self.get_success(parsed_from_token.to_string(self.store)) + + # We now send another event to room1, so we should sync all the missing events. + resp = self.helper.send(room_id1, "msg2", tok=user1_tok) + expected_events.append(resp["event_id"]) + + # This sync should contain the messages from room1 not yet sent down. + response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + + self.assertCountEqual( + response_body["rooms"].keys(), {room_id1}, response_body["rooms"] + ) + self.assertNotIn("initial", response_body["rooms"][room_id1]) + + self.assertEqual( + [ev["event_id"] for ev in response_body["rooms"][room_id1]["timeline"]], + expected_events[-timeline_limit:], + ) + self.assertEqual(response_body["rooms"][room_id1]["limited"], limited) + self.assertEqual(response_body["rooms"][room_id1].get("required_state"), None) + + def test_rooms_required_state_incremental_sync_PREVIOUSLY(self) -> None: + """ + Test getting room data where we have previously sent down the room, but + we missed sending down some state previously and so its status is + considered PREVIOUSLY. + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) + + self.helper.send(room_id1, "msg", tok=user1_tok) + + conn_id = "conn_id" + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 0]], + "required_state": [ + [EventTypes.Create, ""], + [EventTypes.RoomHistoryVisibility, ""], + # This one doesn't exist in the room + [EventTypes.Name, ""], + ], + "timeline_limit": 0, + } + }, + "conn_id": "conn_id", + } + + # The first room gets sent down the initial sync + response_body, initial_from_token = self.do_sync(sync_body, tok=user1_tok) + self.assertCountEqual( + response_body["rooms"].keys(), {room_id1}, response_body["rooms"] + ) + + # We now send down some state in room1 + resp = self.helper.send_state( + room_id1, EventTypes.Name, {"name": "foo"}, tok=user1_tok + ) + name_change_id = resp["event_id"] + + # A second messages happens in the other room, so room1 won't get sent down. + self.helper.send(room_id2, "msg", tok=user1_tok) + + # Only the second room gets sent down sync. + response_body, from_token = self.do_sync( + sync_body, since=initial_from_token, tok=user1_tok + ) + + self.assertCountEqual( + response_body["rooms"].keys(), {room_id2}, response_body["rooms"] + ) + + # FIXME: This is a hack to record that the first room wasn't sent down + # sync, as we don't implement that currently. + sliding_sync_handler = self.hs.get_sliding_sync_handler() + requester = self.get_success( + self.hs.get_auth().get_user_by_access_token(user1_tok) + ) + sync_config = SlidingSyncConfig( + user=requester.user, + requester=requester, + conn_id=conn_id, + ) + + parsed_initial_from_token = self.get_success( + SlidingSyncStreamToken.from_string(self.store, initial_from_token) + ) + connection_position = self.get_success( + sliding_sync_handler.connection_store.record_rooms( + sync_config, + parsed_initial_from_token, + sent_room_ids=[], + unsent_room_ids=[room_id1], + ) + ) + + # FIXME: Now fix up `from_token` with new connect position above. + parsed_from_token = self.get_success( + SlidingSyncStreamToken.from_string(self.store, from_token) + ) + parsed_from_token = SlidingSyncStreamToken( + stream_token=parsed_from_token.stream_token, + connection_position=connection_position, + ) + from_token = self.get_success(parsed_from_token.to_string(self.store)) + + # We now send another event to room1, so we should sync all the missing state. + self.helper.send(room_id1, "msg", tok=user1_tok) + + # This sync should contain the state changes from room1. + response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + + self.assertCountEqual( + response_body["rooms"].keys(), {room_id1}, response_body["rooms"] + ) + self.assertNotIn("initial", response_body["rooms"][room_id1]) + + # We should only see the name change. + self.assertEqual( + [ + ev["event_id"] + for ev in response_body["rooms"][room_id1]["required_state"] + ], + [name_change_id], + ) + + def test_rooms_required_state_incremental_sync_NEVER(self) -> None: + """ + Test getting `required_state` where we have NEVER sent down the room before + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) + + self.helper.send(room_id1, "msg", tok=user1_tok) + + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 0]], + "required_state": [ + [EventTypes.Create, ""], + [EventTypes.RoomHistoryVisibility, ""], + # This one doesn't exist in the room + [EventTypes.Name, ""], + ], + "timeline_limit": 1, + } + }, + } + + # A message happens in the other room, so room1 won't get sent down. + self.helper.send(room_id2, "msg", tok=user1_tok) + + # Only the second room gets sent down sync. + response_body, from_token = self.do_sync(sync_body, tok=user1_tok) + + self.assertCountEqual( + response_body["rooms"].keys(), {room_id2}, response_body["rooms"] + ) + + # We now send another event to room1, so we should send down the full + # room. + self.helper.send(room_id1, "msg2", tok=user1_tok) + + # This sync should contain the messages from room1 not yet sent down. + response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + + self.assertCountEqual( + response_body["rooms"].keys(), {room_id1}, response_body["rooms"] + ) + + self.assertEqual(response_body["rooms"][room_id1]["initial"], True) + + state_map = self.get_success( + self.storage_controllers.state.get_current_state(room_id1) + ) + + self._assertRequiredStateIncludes( + response_body["rooms"][room_id1]["required_state"], + { + state_map[(EventTypes.Create, "")], + state_map[(EventTypes.RoomHistoryVisibility, "")], + }, + exact=True, + ) + + def test_rooms_timeline_incremental_sync_NEVER(self) -> None: + """ + Test getting timeline room data where we have NEVER sent down the room + before + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + room_id2 = self.helper.create_room_as(user1_id, tok=user1_tok) + + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 0]], + "required_state": [], + "timeline_limit": 5, + } + }, + } + + expected_events = [] + for _ in range(4): + resp = self.helper.send(room_id1, "msg", tok=user1_tok) + expected_events.append(resp["event_id"]) + + # A message happens in the other room, so room1 won't get sent down. + self.helper.send(room_id2, "msg", tok=user1_tok) + + # Only the second room gets sent down sync. + response_body, from_token = self.do_sync(sync_body, tok=user1_tok) + + self.assertCountEqual( + response_body["rooms"].keys(), {room_id2}, response_body["rooms"] + ) + + # We now send another event to room1 so it comes down sync + resp = self.helper.send(room_id1, "msg2", tok=user1_tok) + expected_events.append(resp["event_id"]) + + # This sync should contain the messages from room1 not yet sent down. + response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + + self.assertCountEqual( + response_body["rooms"].keys(), {room_id1}, response_body["rooms"] + ) + + self.assertEqual( + [ev["event_id"] for ev in response_body["rooms"][room_id1]["timeline"]], + expected_events, + ) + self.assertEqual(response_body["rooms"][room_id1]["limited"], True) + self.assertEqual(response_body["rooms"][room_id1]["initial"], True) + class SlidingSyncToDeviceExtensionTestCase(SlidingSyncBase): """Tests for the to-device sliding sync extension""" -- cgit 1.5.1 From 34306be5aa7ebf7913dd28d048887802dc7e079b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 30 Jul 2024 09:30:44 +0100 Subject: Only send rooms with updates down sliding sync (#17479) Rather than always including all rooms in range. Also adds a pre-filter to rooms that checks the stream change cache to see if anything might have happened. Based on #17447 --------- Co-authored-by: Eric Eastwood --- changelog.d/17479.misc | 1 + synapse/handlers/sliding_sync.py | 51 +++++++++++++++++- synapse/storage/databases/main/stream.py | 10 ++++ synapse/types/handlers/__init__.py | 17 +++++- tests/rest/client/test_sync.py | 89 ++++++++++++++++++++++---------- 5 files changed, 138 insertions(+), 30 deletions(-) create mode 100644 changelog.d/17479.misc diff --git a/changelog.d/17479.misc b/changelog.d/17479.misc new file mode 100644 index 0000000000..4502f71662 --- /dev/null +++ b/changelog.d/17479.misc @@ -0,0 +1 @@ +Do not send down empty room entries down experimental sliding sync endpoint. diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 2b74f1c9c9..84f2fa18ff 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -619,6 +619,51 @@ class SlidingSyncHandler: # Fetch room data rooms: Dict[str, SlidingSyncResult.RoomResult] = {} + # Filter out rooms that haven't received updates and we've sent down + # previously. + if from_token: + rooms_should_send = set() + + # First we check if there are rooms that match a list/room + # subscription and have updates we need to send (i.e. either because + # we haven't sent the room down, or we have but there are missing + # updates). + for room_id in relevant_room_map: + status = await self.connection_store.have_sent_room( + sync_config, + from_token.connection_position, + room_id, + ) + if ( + # The room was never sent down before so the client needs to know + # about it regardless of any updates. + status.status == HaveSentRoomFlag.NEVER + # `PREVIOUSLY` literally means the "room was sent down before *AND* + # there are updates we haven't sent down" so we already know this + # room has updates. + or status.status == HaveSentRoomFlag.PREVIOUSLY + ): + rooms_should_send.add(room_id) + elif status.status == HaveSentRoomFlag.LIVE: + # We know that we've sent all updates up until `from_token`, + # so we just need to check if there have been updates since + # then. + pass + else: + assert_never(status.status) + + # We only need to check for new events since any state changes + # will also come down as new events. + rooms_that_have_updates = self.store.get_rooms_that_might_have_updates( + relevant_room_map.keys(), from_token.stream_token.room_key + ) + rooms_should_send.update(rooms_that_have_updates) + relevant_room_map = { + room_id: room_sync_config + for room_id, room_sync_config in relevant_room_map.items() + if room_id in rooms_should_send + } + @trace @tag_args async def handle_room(room_id: str) -> None: @@ -633,7 +678,9 @@ class SlidingSyncHandler: to_token=to_token, ) - rooms[room_id] = room_sync_result + # Filter out empty room results during incremental sync + if room_sync_result or not from_token: + rooms[room_id] = room_sync_result with start_active_span("sliding_sync.generate_room_entries"): await concurrently_execute(handle_room, relevant_room_map, 10) @@ -2198,7 +2245,7 @@ class SlidingSyncConnectionStore: a connection position of 5 might have totally different states on worker A and worker B. - One complication that we need to deal with here is needing to handle requests being + One complication that we need to deal with here is needing to handle requests being resent, i.e. if we sent down a room in a response that the client received, we must consider the room *not* sent when we get the request again. diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index b034361aec..4207e73c7f 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -2104,3 +2104,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): return RoomStreamToken(stream=last_position.stream - 1) return None + + def get_rooms_that_might_have_updates( + self, room_ids: StrCollection, from_token: RoomStreamToken + ) -> StrCollection: + """Filters given room IDs down to those that might have updates, i.e. + removes rooms that definitely do not have updates. + """ + return self._events_stream_cache.get_entities_changed( + room_ids, from_token.stream + ) diff --git a/synapse/types/handlers/__init__.py b/synapse/types/handlers/__init__.py index f3141b05a0..f26cc0e903 100644 --- a/synapse/types/handlers/__init__.py +++ b/synapse/types/handlers/__init__.py @@ -238,6 +238,17 @@ class SlidingSyncResult: notification_count: int highlight_count: int + def __bool__(self) -> bool: + return ( + # If this is the first time the client is seeing the room, we should not filter it out + # under any circumstance. + self.initial + # We need to let the client know if there are any new events + or bool(self.required_state) + or bool(self.timeline_events) + or bool(self.stripped_state) + ) + @attr.s(slots=True, frozen=True, auto_attribs=True) class SlidingWindowList: """ @@ -367,7 +378,11 @@ class SlidingSyncResult: to tell if the notifier needs to wait for more events when polling for events. """ - return bool(self.lists or self.rooms or self.extensions) + # We don't include `self.lists` here, as a) `lists` is always non-empty even if + # there are no changes, and b) since we're sorting rooms by `stream_ordering` of + # the latest activity, anything that would cause the order to change would end + # up in `self.rooms` and cause us to send down the change. + return bool(self.rooms or self.extensions) @staticmethod def empty(next_pos: SlidingSyncStreamToken) -> "SlidingSyncResult": diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 3e7b8f76a1..5abf1041be 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -69,7 +69,6 @@ from tests.federation.transport.test_knocking import ( ) from tests.server import TimedOutException from tests.test_utils.event_injection import create_event, mark_event_as_partial_state -from tests.unittest import skip_unless logger = logging.getLogger(__name__) @@ -1656,12 +1655,6 @@ class SlidingSyncTestCase(SlidingSyncBase): channel.json_body["rooms"][room_id]["timeline"], ) - # TODO: Once we remove `ops`, we should be able to add a `RoomResult.__bool__` to - # check if there are any updates since the `from_token`. - @skip_unless( - False, - "Once we remove ops from the Sliding Sync response, this test should pass", - ) def test_wait_for_new_data_timeout(self) -> None: """ Test to make sure that the Sliding Sync request waits for new data to arrive but @@ -1711,12 +1704,8 @@ class SlidingSyncTestCase(SlidingSyncBase): channel.await_result(timeout_ms=1200) self.assertEqual(channel.code, 200, channel.json_body) - # We still see rooms because that's how Sliding Sync lists work but we reached - # the timeout before seeing them - self.assertEqual( - [event["event_id"] for event in channel.json_body["rooms"].keys()], - [room_id], - ) + # There should be no room sent down. + self.assertFalse(channel.json_body["rooms"]) def test_filter_list(self) -> None: """ @@ -3556,19 +3545,7 @@ class SlidingSyncTestCase(SlidingSyncBase): response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) # Nothing to see for this banned user in the room in the token range - self.assertIsNone(response_body["rooms"][room_id1].get("timeline")) - # No events returned in the timeline so nothing is "live" - self.assertEqual( - response_body["rooms"][room_id1]["num_live"], - 0, - response_body["rooms"][room_id1], - ) - # There aren't anymore events to paginate to in this range - self.assertEqual( - response_body["rooms"][room_id1]["limited"], - False, - response_body["rooms"][room_id1], - ) + self.assertIsNone(response_body["rooms"].get(room_id1)) def test_rooms_no_required_state(self) -> None: """ @@ -3668,12 +3645,15 @@ class SlidingSyncTestCase(SlidingSyncBase): # This one doesn't exist in the room [EventTypes.Tombstone, ""], ], - "timeline_limit": 0, + "timeline_limit": 1, } } } _, from_token = self.do_sync(sync_body, tok=user1_tok) + # Send a message so the room comes down sync. + self.helper.send(room_id1, "msg", tok=user1_tok) + # Make the incremental Sliding Sync request response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) @@ -4880,6 +4860,61 @@ class SlidingSyncTestCase(SlidingSyncBase): self.assertEqual(response_body["rooms"][room_id1]["limited"], True) self.assertEqual(response_body["rooms"][room_id1]["initial"], True) + def test_rooms_with_no_updates_do_not_come_down_incremental_sync(self) -> None: + """ + Test that rooms with no updates are returned in subsequent incremental + syncs. + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 0, + } + } + } + + _, from_token = self.do_sync(sync_body, tok=user1_tok) + + # Make the incremental Sliding Sync request + response_body, _ = self.do_sync(sync_body, since=from_token, tok=user1_tok) + + # Nothing has happened in the room, so the room should not come down + # /sync. + self.assertIsNone(response_body["rooms"].get(room_id1)) + + def test_empty_initial_room_comes_down_sync(self) -> None: + """ + Test that rooms come down /sync even with empty required state and + timeline limit in initial sync. + """ + + user1_id = self.register_user("user1", "pass") + user1_tok = self.login(user1_id, "pass") + + room_id1 = self.helper.create_room_as(user1_id, tok=user1_tok) + + sync_body = { + "lists": { + "foo-list": { + "ranges": [[0, 1]], + "required_state": [], + "timeline_limit": 0, + } + } + } + + # Make the Sliding Sync request + response_body, _ = self.do_sync(sync_body, tok=user1_tok) + self.assertEqual(response_body["rooms"][room_id1]["initial"], True) + class SlidingSyncToDeviceExtensionTestCase(SlidingSyncBase): """Tests for the to-device sliding sync extension""" -- cgit 1.5.1 From 808dab0699c2e5f52e9153650323ed93b0bc4c25 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:51:24 +0100 Subject: Fix `failures` property in `/keys/query` (#17499) Fixes: https://github.com/element-hq/synapse/issues/17498 Fixes: https://github.com/element-hq/element-web/issues/27867 --- changelog.d/17499.bugfix | 1 + synapse/handlers/e2e_keys.py | 26 ++++++++++++------ tests/handlers/test_e2e_keys.py | 59 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 75 insertions(+), 11 deletions(-) create mode 100644 changelog.d/17499.bugfix diff --git a/changelog.d/17499.bugfix b/changelog.d/17499.bugfix new file mode 100644 index 0000000000..5cb7b3c30e --- /dev/null +++ b/changelog.d/17499.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.110.0 which caused `/keys/query` to return incomplete results, leading to high network activity and CPU usage on Matrix clients. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 668cec513b..f78e66ad0a 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -291,13 +291,20 @@ class E2eKeysHandler: # Only try and fetch keys for destinations that are not marked as # down. - filtered_destinations = await filter_destinations_by_retry_limiter( - remote_queries_not_in_cache.keys(), - self.clock, - self.store, - # Let's give an arbitrary grace period for those hosts that are - # only recently down - retry_due_within_ms=60 * 1000, + unfiltered_destinations = remote_queries_not_in_cache.keys() + filtered_destinations = set( + await filter_destinations_by_retry_limiter( + unfiltered_destinations, + self.clock, + self.store, + # Let's give an arbitrary grace period for those hosts that are + # only recently down + retry_due_within_ms=60 * 1000, + ) + ) + failures.update( + (dest, _NOT_READY_FOR_RETRY_FAILURE) + for dest in (unfiltered_destinations - filtered_destinations) ) await concurrently_execute( @@ -1641,6 +1648,9 @@ def _check_device_signature( raise SynapseError(400, "Invalid signature", Codes.INVALID_SIGNATURE) +_NOT_READY_FOR_RETRY_FAILURE = {"status": 503, "message": "Not ready for retry"} + + def _exception_to_failure(e: Exception) -> JsonDict: if isinstance(e, SynapseError): return {"status": e.code, "errcode": e.errcode, "message": str(e)} @@ -1649,7 +1659,7 @@ def _exception_to_failure(e: Exception) -> JsonDict: return {"status": e.code, "message": str(e)} if isinstance(e, NotRetryingDestination): - return {"status": 503, "message": "Not ready for retry"} + return _NOT_READY_FOR_RETRY_FAILURE # include ConnectionRefused and other errors # diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 0e6352ff4b..8a3dfdcf75 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -43,9 +43,7 @@ from tests.unittest import override_config class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: self.appservice_api = mock.AsyncMock() - return self.setup_test_homeserver( - federation_client=mock.Mock(), application_service_api=self.appservice_api - ) + return self.setup_test_homeserver(application_service_api=self.appservice_api) def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.handler = hs.get_e2e_keys_handler() @@ -1224,6 +1222,61 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): }, ) + def test_query_devices_remote_down(self) -> None: + """Tests that querying keys for a remote user on an unreachable server returns + results in the "failures" property + """ + + remote_user_id = "@test:other" + local_user_id = "@test:test" + + # The backoff code treats time zero as special + self.reactor.advance(5) + + self.hs.get_federation_http_client().agent.request = mock.AsyncMock( # type: ignore[method-assign] + side_effect=Exception("boop") + ) + + e2e_handler = self.hs.get_e2e_keys_handler() + + query_result = self.get_success( + e2e_handler.query_devices( + { + "device_keys": {remote_user_id: []}, + }, + timeout=10, + from_user_id=local_user_id, + from_device_id="some_device_id", + ) + ) + + self.assertEqual( + query_result["failures"], + { + "other": { + "message": "Failed to send request: Exception: boop", + "status": 503, + } + }, + ) + + # Do it again: we should hit the backoff + query_result = self.get_success( + e2e_handler.query_devices( + { + "device_keys": {remote_user_id: []}, + }, + timeout=10, + from_user_id=local_user_id, + from_device_id="some_device_id", + ) + ) + + self.assertEqual( + query_result["failures"], + {"other": {"message": "Not ready for retry", "status": 503}}, + ) + @parameterized.expand( [ # The remote homeserver's response indicates that this user has 0/1/2 devices. -- cgit 1.5.1 From 62ae56a4ac955a789d965cfb3bb642985c798666 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 30 Jul 2024 10:54:11 +0100 Subject: Add some more opentracing to sliding sync (#17501) This will make it easier to see what it is doing in jaeger. --- changelog.d/17501.misc | 1 + synapse/handlers/sliding_sync.py | 18 +++++++++++++++--- synapse/rest/client/sync.py | 20 ++++++++++++++++++-- 3 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 changelog.d/17501.misc diff --git a/changelog.d/17501.misc b/changelog.d/17501.misc new file mode 100644 index 0000000000..ba96472acb --- /dev/null +++ b/changelog.d/17501.misc @@ -0,0 +1 @@ +Add some opentracing tags and logging to the experimental sliding sync implementation. diff --git a/synapse/handlers/sliding_sync.py b/synapse/handlers/sliding_sync.py index 84f2fa18ff..73414dbf69 100644 --- a/synapse/handlers/sliding_sync.py +++ b/synapse/handlers/sliding_sync.py @@ -41,7 +41,7 @@ from synapse.api.constants import AccountDataTypes, Direction, EventTypes, Membe from synapse.events import EventBase from synapse.events.utils import strip_event from synapse.handlers.relations import BundledAggregations -from synapse.logging.opentracing import start_active_span, tag_args, trace +from synapse.logging.opentracing import log_kv, start_active_span, tag_args, trace from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.databases.main.stream import CurrentStateDeltaMembership from synapse.storage.roommember import MemberSummary @@ -444,6 +444,7 @@ class SlidingSyncHandler: return result + @trace async def current_sync_for_user( self, sync_config: SlidingSyncConfig, @@ -682,8 +683,9 @@ class SlidingSyncHandler: if room_sync_result or not from_token: rooms[room_id] = room_sync_result - with start_active_span("sliding_sync.generate_room_entries"): - await concurrently_execute(handle_room, relevant_room_map, 10) + if relevant_room_map: + with start_active_span("sliding_sync.generate_room_entries"): + await concurrently_execute(handle_room, relevant_room_map, 10) extensions = await self.get_extensions_response( sync_config=sync_config, @@ -1161,6 +1163,7 @@ class SlidingSyncHandler: # return None + @trace async def filter_rooms( self, user: UserID, @@ -1284,6 +1287,7 @@ class SlidingSyncHandler: # Assemble a new sync room map but only with the `filtered_room_id_set` return {room_id: sync_room_map[room_id] for room_id in filtered_room_id_set} + @trace async def sort_rooms( self, sync_room_map: Dict[str, _RoomMembershipForUser], @@ -1492,6 +1496,10 @@ class SlidingSyncHandler: else: assert_never(room_status.status) + log_kv({"sliding_sync.room_status": room_status}) + + log_kv({"sliding_sync.from_bound": from_bound, "sliding_sync.initial": initial}) + # Assemble the list of timeline events # # FIXME: It would be nice to make the `rooms` response more uniform regardless of @@ -1890,6 +1898,7 @@ class SlidingSyncHandler: highlight_count=0, ) + @trace async def get_extensions_response( self, sync_config: SlidingSyncConfig, @@ -1942,6 +1951,7 @@ class SlidingSyncHandler: account_data=account_data_response, ) + @trace async def get_to_device_extension_response( self, sync_config: SlidingSyncConfig, @@ -2016,6 +2026,7 @@ class SlidingSyncHandler: events=messages, ) + @trace async def get_e2ee_extension_response( self, sync_config: SlidingSyncConfig, @@ -2066,6 +2077,7 @@ class SlidingSyncHandler: device_unused_fallback_key_types=device_unused_fallback_key_types, ) + @trace async def get_account_data_extension_response( self, sync_config: SlidingSyncConfig, diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index bf3ac8d483..ccfce6bd53 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -52,7 +52,7 @@ from synapse.http.servlet import ( parse_string, ) from synapse.http.site import SynapseRequest -from synapse.logging.opentracing import trace_with_opname +from synapse.logging.opentracing import log_kv, set_tag, trace_with_opname from synapse.rest.admin.experimental_features import ExperimentalFeature from synapse.types import JsonDict, Requester, SlidingSyncStreamToken, StreamToken from synapse.types.rest.client import SlidingSyncBody @@ -897,7 +897,23 @@ class SlidingSyncRestServlet(RestServlet): # by filter ID. For now, we will just prototype with always passing everything # in. body = parse_and_validate_json_object_from_request(request, SlidingSyncBody) - logger.info("Sliding sync request: %r", body) + + # Tag and log useful data to differentiate requests. + set_tag("sliding_sync.conn_id", body.conn_id or "") + log_kv( + { + "sliding_sync.lists": { + list_name: { + "ranges": list_config.ranges, + "timeline_limit": list_config.timeline_limit, + } + for list_name, list_config in (body.lists or {}).items() + }, + "sliding_sync.room_subscriptions": list( + (body.room_subscriptions or {}).keys() + ), + } + ) sync_config = SlidingSyncConfig( user=user, -- cgit 1.5.1 From be726724a828a91d26a55187e3988ccb47dd40c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Jul 2024 11:44:54 +0100 Subject: Bump ruff from 0.5.4 to 0.5.5 (#17494) Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 40 ++++++++++++++++++++-------------------- pyproject.toml | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/poetry.lock b/poetry.lock index 945b91e022..417f6850b8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2358,29 +2358,29 @@ files = [ [[package]] name = "ruff" -version = "0.5.4" +version = "0.5.5" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.5.4-py3-none-linux_armv6l.whl", hash = "sha256:82acef724fc639699b4d3177ed5cc14c2a5aacd92edd578a9e846d5b5ec18ddf"}, - {file = "ruff-0.5.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:da62e87637c8838b325e65beee485f71eb36202ce8e3cdbc24b9fcb8b99a37be"}, - {file = "ruff-0.5.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e98ad088edfe2f3b85a925ee96da652028f093d6b9b56b76fc242d8abb8e2059"}, - {file = "ruff-0.5.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c55efbecc3152d614cfe6c2247a3054cfe358cefbf794f8c79c8575456efe19"}, - {file = "ruff-0.5.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f9b85eaa1f653abd0a70603b8b7008d9e00c9fa1bbd0bf40dad3f0c0bdd06793"}, - {file = "ruff-0.5.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cf497a47751be8c883059c4613ba2f50dd06ec672692de2811f039432875278"}, - {file = "ruff-0.5.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:09c14ed6a72af9ccc8d2e313d7acf7037f0faff43cde4b507e66f14e812e37f7"}, - {file = "ruff-0.5.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:628f6b8f97b8bad2490240aa84f3e68f390e13fabc9af5c0d3b96b485921cd60"}, - {file = "ruff-0.5.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3520a00c0563d7a7a7c324ad7e2cde2355733dafa9592c671fb2e9e3cd8194c1"}, - {file = "ruff-0.5.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93789f14ca2244fb91ed481456f6d0bb8af1f75a330e133b67d08f06ad85b516"}, - {file = "ruff-0.5.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:029454e2824eafa25b9df46882f7f7844d36fd8ce51c1b7f6d97e2615a57bbcc"}, - {file = "ruff-0.5.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:9492320eed573a13a0bc09a2957f17aa733fff9ce5bf00e66e6d4a88ec33813f"}, - {file = "ruff-0.5.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a6e1f62a92c645e2919b65c02e79d1f61e78a58eddaebca6c23659e7c7cb4ac7"}, - {file = "ruff-0.5.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:768fa9208df2bec4b2ce61dbc7c2ddd6b1be9fb48f1f8d3b78b3332c7d71c1ff"}, - {file = "ruff-0.5.4-py3-none-win32.whl", hash = "sha256:e1e7393e9c56128e870b233c82ceb42164966f25b30f68acbb24ed69ce9c3a4e"}, - {file = "ruff-0.5.4-py3-none-win_amd64.whl", hash = "sha256:58b54459221fd3f661a7329f177f091eb35cf7a603f01d9eb3eb11cc348d38c4"}, - {file = "ruff-0.5.4-py3-none-win_arm64.whl", hash = "sha256:bd53da65f1085fb5b307c38fd3c0829e76acf7b2a912d8d79cadcdb4875c1eb7"}, - {file = "ruff-0.5.4.tar.gz", hash = "sha256:2795726d5f71c4f4e70653273d1c23a8182f07dd8e48c12de5d867bfb7557eed"}, + {file = "ruff-0.5.5-py3-none-linux_armv6l.whl", hash = "sha256:605d589ec35d1da9213a9d4d7e7a9c761d90bba78fc8790d1c5e65026c1b9eaf"}, + {file = "ruff-0.5.5-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:00817603822a3e42b80f7c3298c8269e09f889ee94640cd1fc7f9329788d7bf8"}, + {file = "ruff-0.5.5-py3-none-macosx_11_0_arm64.whl", hash = "sha256:187a60f555e9f865a2ff2c6984b9afeffa7158ba6e1eab56cb830404c942b0f3"}, + {file = "ruff-0.5.5-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe26fc46fa8c6e0ae3f47ddccfbb136253c831c3289bba044befe68f467bfb16"}, + {file = "ruff-0.5.5-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4ad25dd9c5faac95c8e9efb13e15803cd8bbf7f4600645a60ffe17c73f60779b"}, + {file = "ruff-0.5.5-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f70737c157d7edf749bcb952d13854e8f745cec695a01bdc6e29c29c288fc36e"}, + {file = "ruff-0.5.5-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:cfd7de17cef6ab559e9f5ab859f0d3296393bc78f69030967ca4d87a541b97a0"}, + {file = "ruff-0.5.5-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a09b43e02f76ac0145f86a08e045e2ea452066f7ba064fd6b0cdccb486f7c3e7"}, + {file = "ruff-0.5.5-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0b856cb19c60cd40198be5d8d4b556228e3dcd545b4f423d1ad812bfdca5884"}, + {file = "ruff-0.5.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3687d002f911e8a5faf977e619a034d159a8373514a587249cc00f211c67a091"}, + {file = "ruff-0.5.5-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ac9dc814e510436e30d0ba535f435a7f3dc97f895f844f5b3f347ec8c228a523"}, + {file = "ruff-0.5.5-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:af9bdf6c389b5add40d89b201425b531e0a5cceb3cfdcc69f04d3d531c6be74f"}, + {file = "ruff-0.5.5-py3-none-musllinux_1_2_i686.whl", hash = "sha256:d40a8533ed545390ef8315b8e25c4bb85739b90bd0f3fe1280a29ae364cc55d8"}, + {file = "ruff-0.5.5-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:cab904683bf9e2ecbbe9ff235bfe056f0eba754d0168ad5407832928d579e7ab"}, + {file = "ruff-0.5.5-py3-none-win32.whl", hash = "sha256:696f18463b47a94575db635ebb4c178188645636f05e934fdf361b74edf1bb2d"}, + {file = "ruff-0.5.5-py3-none-win_amd64.whl", hash = "sha256:50f36d77f52d4c9c2f1361ccbfbd09099a1b2ea5d2b2222c586ab08885cf3445"}, + {file = "ruff-0.5.5-py3-none-win_arm64.whl", hash = "sha256:3191317d967af701f1b73a31ed5788795936e423b7acce82a2b63e26eb3e89d6"}, + {file = "ruff-0.5.5.tar.gz", hash = "sha256:cc5516bdb4858d972fbc31d246bdb390eab8df1a26e2353be2dbc0c2d7f5421a"}, ] [[package]] @@ -3215,4 +3215,4 @@ user-search = ["pyicu"] [metadata] lock-version = "2.0" python-versions = "^3.8.0" -content-hash = "e65fbd044230964cae8810c84289bcf0bc43b27532ea5a5ef8843eab4f6514af" +content-hash = "5f458ce53b7469844af2e0c5a9c5ef720736de5f080c4eb8d3a0e60286424f44" diff --git a/pyproject.toml b/pyproject.toml index 1adf8e087f..fe6289839c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -322,7 +322,7 @@ all = [ # This helps prevents merge conflicts when running a batch of dependabot updates. isort = ">=5.10.1" black = ">=22.7.0" -ruff = "0.5.4" +ruff = "0.5.5" # Type checking only works with the pydantic.v1 compat module from pydantic v2 pydantic = "^2" -- cgit 1.5.1