From 845732be450b3f9c991df35b2f07d600a0eca6dd Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 4 Aug 2022 11:02:29 +0200 Subject: Fix rooms not being properly excluded from incremental sync (#13408) --- synapse/handlers/sync.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'synapse/handlers/sync.py') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index d42a414c90..d827c03ad1 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1536,15 +1536,13 @@ class SyncHandler: ignored_users = await self.store.ignored_users(user_id) if since_token: room_changes = await self._get_rooms_changed( - sync_result_builder, ignored_users, self.rooms_to_exclude + sync_result_builder, ignored_users ) tags_by_room = await self.store.get_updated_tags( user_id, since_token.account_data_key ) else: - room_changes = await self._get_all_rooms( - sync_result_builder, ignored_users, self.rooms_to_exclude - ) + room_changes = await self._get_all_rooms(sync_result_builder, ignored_users) tags_by_room = await self.store.get_tags_for_user(user_id) log_kv({"rooms_changed": len(room_changes.room_entries)}) @@ -1623,13 +1621,14 @@ class SyncHandler: self, sync_result_builder: "SyncResultBuilder", ignored_users: FrozenSet[str], - excluded_rooms: List[str], ) -> _RoomChanges: """Determine the changes in rooms to report to the user. This function is a first pass at generating the rooms part of the sync response. It determines which rooms have changed during the sync period, and categorises - them into four buckets: "knock", "invite", "join" and "leave". + them into four buckets: "knock", "invite", "join" and "leave". It also excludes + from that list any room that appears in the list of rooms to exclude from sync + results in the server configuration. 1. Finds all membership changes for the user in the sync period (from `since_token` up to `now_token`). @@ -1655,7 +1654,7 @@ class SyncHandler: # _have_rooms_changed. We could keep the results in memory to avoid a # second query, at the cost of more complicated source code. membership_change_events = await self.store.get_membership_changes_for_user( - user_id, since_token.room_key, now_token.room_key, excluded_rooms + user_id, since_token.room_key, now_token.room_key, self.rooms_to_exclude ) mem_change_events_by_room_id: Dict[str, List[EventBase]] = {} @@ -1862,7 +1861,6 @@ class SyncHandler: self, sync_result_builder: "SyncResultBuilder", ignored_users: FrozenSet[str], - ignored_rooms: List[str], ) -> _RoomChanges: """Returns entries for all rooms for the user. @@ -1884,7 +1882,7 @@ class SyncHandler: room_list = await self.store.get_rooms_for_local_user_where_membership_is( user_id=user_id, membership_list=Membership.LIST, - excluded_rooms=ignored_rooms, + excluded_rooms=self.rooms_to_exclude, ) room_entries = [] @@ -2150,7 +2148,9 @@ class SyncHandler: raise Exception("Unrecognized rtype: %r", room_builder.rtype) async def get_rooms_for_user_at( - self, user_id: str, room_key: RoomStreamToken + self, + user_id: str, + room_key: RoomStreamToken, ) -> FrozenSet[str]: """Get set of joined rooms for a user at the given stream ordering. @@ -2176,7 +2176,12 @@ class SyncHandler: # If the membership's stream ordering is after the given stream # ordering, we need to go and work out if the user was in the room # before. + # We also need to check whether the room should be excluded from sync + # responses as per the homeserver config. for joined_room in joined_rooms: + if joined_room.room_id in self.rooms_to_exclude: + continue + if not joined_room.event_pos.persisted_after(room_key): joined_room_ids.add(joined_room.room_id) continue -- cgit 1.5.1 From 51c01d450a5165060c8e17b506388a9b1808dda9 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 10 Aug 2022 12:58:20 +0100 Subject: Add some miscellaneous comments around sync (#13474) Add some miscellaneous comments to document sync, especially around `compute_state_delta`. Signed-off-by: Sean Quah Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/13474.misc | 1 + synapse/handlers/sync.py | 116 +++++++++++++++++++++++++++++++---------------- synapse/visibility.py | 4 +- 3 files changed, 81 insertions(+), 40 deletions(-) create mode 100644 changelog.d/13474.misc (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13474.misc b/changelog.d/13474.misc new file mode 100644 index 0000000000..d34c661fed --- /dev/null +++ b/changelog.d/13474.misc @@ -0,0 +1 @@ +Add some miscellaneous comments to document sync, especially around `compute_state_delta`. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index d827c03ad1..3ca01391c9 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -13,7 +13,17 @@ # limitations under the License. import itertools import logging -from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Dict, + FrozenSet, + List, + Optional, + Sequence, + Set, + Tuple, +) import attr from prometheus_client import Counter @@ -89,7 +99,7 @@ class SyncConfig: @attr.s(slots=True, frozen=True, auto_attribs=True) class TimelineBatch: prev_batch: StreamToken - events: List[EventBase] + events: Sequence[EventBase] limited: bool # A mapping of event ID to the bundled aggregations for the above events. # This is only calculated if limited is true. @@ -852,16 +862,26 @@ class SyncHandler: now_token: StreamToken, full_state: bool, ) -> MutableStateMap[EventBase]: - """Works out the difference in state between the start of the timeline - and the previous sync. + """Works out the difference in state between the end of the previous sync and + the start of the timeline. Args: room_id: batch: The timeline batch for the room that will be sent to the user. sync_config: - since_token: Token of the end of the previous batch. May be None. + since_token: Token of the end of the previous batch. May be `None`. now_token: Token of the end of the current batch. full_state: Whether to force returning the full state. + `lazy_load_members` still applies when `full_state` is `True`. + + Returns: + The state to return in the sync response for the room. + + Clients will overlay this onto the state at the end of the previous sync to + arrive at the state at the start of the timeline. + + Clients will then overlay state events in the timeline to arrive at the + state at the end of the timeline, in preparation for the next sync. """ # TODO(mjark) Check if the state events were received by the server # after the previous sync, since we need to include those state @@ -869,7 +889,8 @@ class SyncHandler: # TODO(mjark) Check for new redactions in the state events. with Measure(self.clock, "compute_state_delta"): - + # The memberships needed for events in the timeline. + # Only calculated when `lazy_load_members` is on. members_to_fetch = None lazy_load_members = sync_config.filter_collection.lazy_load_members() @@ -897,38 +918,46 @@ class SyncHandler: else: state_filter = StateFilter.all() + # The contribution to the room state from state events in the timeline. + # Only contains the last event for any given state key. timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() } + # Now calculate the state to return in the sync response for the room. + # This is more or less the change in state between the end of the previous + # sync's timeline and the start of the current sync's timeline. + # See the docstring above for details. + state_ids: StateMap[str] + if full_state: if batch: - current_state_ids = ( + state_at_timeline_end = ( await self._state_storage_controller.get_state_ids_for_event( batch.events[-1].event_id, state_filter=state_filter ) ) - state_ids = ( + state_at_timeline_start = ( await self._state_storage_controller.get_state_ids_for_event( batch.events[0].event_id, state_filter=state_filter ) ) else: - current_state_ids = await self.get_state_at( + state_at_timeline_end = await self.get_state_at( room_id, stream_position=now_token, state_filter=state_filter ) - state_ids = current_state_ids + state_at_timeline_start = state_at_timeline_end state_ids = _calculate_state( timeline_contains=timeline_state, - timeline_start=state_ids, - previous={}, - current=current_state_ids, + timeline_start=state_at_timeline_start, + timeline_end=state_at_timeline_end, + previous_timeline_end={}, lazy_load_members=lazy_load_members, ) elif batch.limited: @@ -968,24 +997,23 @@ class SyncHandler: ) if batch: - current_state_ids = ( + state_at_timeline_end = ( await self._state_storage_controller.get_state_ids_for_event( batch.events[-1].event_id, state_filter=state_filter ) ) else: - # Its not clear how we get here, but empirically we do - # (#5407). Logging has been added elsewhere to try and - # figure out where this state comes from. - current_state_ids = await self.get_state_at( + # We can get here if the user has ignored the senders of all + # the recent events. + state_at_timeline_end = await self.get_state_at( room_id, stream_position=now_token, state_filter=state_filter ) state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, - previous=state_at_previous_sync, - current=current_state_ids, + timeline_end=state_at_timeline_end, + previous_timeline_end=state_at_previous_sync, # we have to include LL members in case LL initial sync missed them lazy_load_members=lazy_load_members, ) @@ -1010,6 +1038,13 @@ class SyncHandler: ), ) + # At this point, if `lazy_load_members` is enabled, `state_ids` includes + # the memberships of all event senders in the timeline. This is because we + # may not have sent the memberships in a previous sync. + + # When `include_redundant_members` is on, we send all the lazy-loaded + # memberships of event senders. Otherwise we make an effort to limit the set + # of memberships we send to those that we have not already sent to this client. if lazy_load_members and not include_redundant_members: cache_key = (sync_config.user.to_string(), sync_config.device_id) cache = self.get_lazy_loaded_members_cache(cache_key) @@ -2216,8 +2251,8 @@ def _action_has_highlight(actions: List[JsonDict]) -> bool: def _calculate_state( timeline_contains: StateMap[str], timeline_start: StateMap[str], - previous: StateMap[str], - current: StateMap[str], + timeline_end: StateMap[str], + previous_timeline_end: StateMap[str], lazy_load_members: bool, ) -> StateMap[str]: """Works out what state to include in a sync response. @@ -2225,45 +2260,50 @@ def _calculate_state( Args: timeline_contains: state in the timeline timeline_start: state at the start of the timeline - previous: state at the end of the previous sync (or empty dict + timeline_end: state at the end of the timeline + previous_timeline_end: state at the end of the previous sync (or empty dict if this is an initial sync) - current: state at the end of the timeline lazy_load_members: whether to return members from timeline_start or not. assumes that timeline_start has already been filtered to include only the members the client needs to know about. """ - event_id_to_key = { - e: key - for key, e in itertools.chain( + event_id_to_state_key = { + event_id: state_key + for state_key, event_id in itertools.chain( timeline_contains.items(), - previous.items(), timeline_start.items(), - current.items(), + timeline_end.items(), + previous_timeline_end.items(), ) } - c_ids = set(current.values()) - ts_ids = set(timeline_start.values()) - p_ids = set(previous.values()) - tc_ids = set(timeline_contains.values()) + timeline_end_ids = set(timeline_end.values()) + timeline_start_ids = set(timeline_start.values()) + previous_timeline_end_ids = set(previous_timeline_end.values()) + timeline_contains_ids = set(timeline_contains.values()) # If we are lazyloading room members, we explicitly add the membership events # for the senders in the timeline into the state block returned by /sync, # as we may not have sent them to the client before. We find these membership # events by filtering them out of timeline_start, which has already been filtered # to only include membership events for the senders in the timeline. - # In practice, we can do this by removing them from the p_ids list, - # which is the list of relevant state we know we have already sent to the client. + # In practice, we can do this by removing them from the previous_timeline_end_ids + # list, which is the list of relevant state we know we have already sent to the + # client. # see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 if lazy_load_members: - p_ids.difference_update( + previous_timeline_end_ids.difference_update( e for t, e in timeline_start.items() if t[0] == EventTypes.Member ) - state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids + state_ids = ( + (timeline_end_ids | timeline_start_ids) + - previous_timeline_end_ids + - timeline_contains_ids + ) - return {event_id_to_key[e]: e for e in state_ids} + return {event_id_to_state_key[e]: e for e in state_ids} @attr.s(slots=True, auto_attribs=True) diff --git a/synapse/visibility.py b/synapse/visibility.py index d947edde66..c810a05907 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -73,8 +73,8 @@ async def filter_events_for_client( * the user is not currently a member of the room, and: * the user has not been a member of the room since the given events - always_include_ids: set of event ids to specifically - include (unless sender is ignored) + always_include_ids: set of event ids to specifically include, if present + in events (unless sender is ignored) filter_send_to_client: Whether we're checking an event that's going to be sent to a client. This might not always be the case since this function can also be called to check whether a user can see the state at a given point. -- cgit 1.5.1 From 84169a82dcf7dfb6eb7d307ea7f5e33cb57f6e3f Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Thu, 18 Aug 2022 11:53:02 +0100 Subject: Avoid blocking lazy-loading `/sync`s during partial joins (#13477) Use a state filter or accept partial state in a few places where we request state, to avoid blocking. To make lazy-loading `/sync`s work, we need to provide the memberships of event senders, which are not guaranteed to be in the room state. Instead we dig through auth events for memberships to present to clients. The auth events of an event are guaranteed to contain a passable membership event, otherwise the event would have been rejected. Note that this only covers the common code paths encountered during testing. There has been no exhaustive checking of all sync code paths. Fixes #13146. Signed-off-by: Sean Quah --- changelog.d/13477.misc | 1 + synapse/handlers/sync.py | 253 ++++++++++++++++++++++++++++++----- synapse/storage/controllers/state.py | 24 +++- 3 files changed, 244 insertions(+), 34 deletions(-) create mode 100644 changelog.d/13477.misc (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13477.misc b/changelog.d/13477.misc new file mode 100644 index 0000000000..5d21ae9d7a --- /dev/null +++ b/changelog.d/13477.misc @@ -0,0 +1 @@ +Faster room joins: Avoid blocking lazy-loading `/sync`s during partial joins due to remote memberships. Pull remote memberships from auth events instead of the room state. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 3ca01391c9..b4d3f3958c 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -16,9 +16,11 @@ import logging from typing import ( TYPE_CHECKING, Any, + Collection, Dict, FrozenSet, List, + Mapping, Optional, Sequence, Set, @@ -517,10 +519,17 @@ class SyncHandler: # ensure that we always include current state in the timeline current_state_ids: FrozenSet[str] = frozenset() if any(e.is_state() for e in recents): + # FIXME(faster_joins): We use the partial state here as + # we don't want to block `/sync` on finishing a lazy join. + # Which should be fine once + # https://github.com/matrix-org/synapse/issues/12989 is resolved, + # since we shouldn't reach here anymore? + # Note that we use the current state as a whitelist for filtering + # `recents`, so partial state is only a problem when a membership + # event turns up in `recents` but has not made it into the current + # state. current_state_ids_map = ( - await self._state_storage_controller.get_current_state_ids( - room_id - ) + await self.store.get_partial_current_state_ids(room_id) ) current_state_ids = frozenset(current_state_ids_map.values()) @@ -589,7 +598,13 @@ class SyncHandler: if any(e.is_state() for e in loaded_recents): # FIXME(faster_joins): We use the partial state here as # we don't want to block `/sync` on finishing a lazy join. - # Is this the correct way of doing it? + # Which should be fine once + # https://github.com/matrix-org/synapse/issues/12989 is resolved, + # since we shouldn't reach here anymore? + # Note that we use the current state as a whitelist for filtering + # `loaded_recents`, so partial state is only a problem when a + # membership event turns up in `loaded_recents` but has not made it + # into the current state. current_state_ids_map = ( await self.store.get_partial_current_state_ids(room_id) ) @@ -637,7 +652,10 @@ class SyncHandler: ) async def get_state_after_event( - self, event_id: str, state_filter: Optional[StateFilter] = None + self, + event_id: str, + state_filter: Optional[StateFilter] = None, + await_full_state: bool = True, ) -> StateMap[str]: """ Get the room state after the given event @@ -645,9 +663,14 @@ class SyncHandler: Args: event_id: event of interest state_filter: The state filter used to fetch state from the database. + await_full_state: if `True`, will block if we do not yet have complete state + at the event and `state_filter` is not satisfied by partial state. + Defaults to `True`. """ state_ids = await self._state_storage_controller.get_state_ids_for_event( - event_id, state_filter=state_filter or StateFilter.all() + event_id, + state_filter=state_filter or StateFilter.all(), + await_full_state=await_full_state, ) # using get_metadata_for_events here (instead of get_event) sidesteps an issue @@ -670,6 +693,7 @@ class SyncHandler: room_id: str, stream_position: StreamToken, state_filter: Optional[StateFilter] = None, + await_full_state: bool = True, ) -> StateMap[str]: """Get the room state at a particular stream position @@ -677,6 +701,9 @@ class SyncHandler: room_id: room for which to get state stream_position: point at which to get state state_filter: The state filter used to fetch state from the database. + await_full_state: if `True`, will block if we do not yet have complete state + at the last event in the room before `stream_position` and + `state_filter` is not satisfied by partial state. Defaults to `True`. """ # FIXME: This gets the state at the latest event before the stream ordering, # which might not be the same as the "current state" of the room at the time @@ -688,7 +715,9 @@ class SyncHandler: if last_event_id: state = await self.get_state_after_event( - last_event_id, state_filter=state_filter or StateFilter.all() + last_event_id, + state_filter=state_filter or StateFilter.all(), + await_full_state=await_full_state, ) else: @@ -891,7 +920,15 @@ class SyncHandler: with Measure(self.clock, "compute_state_delta"): # The memberships needed for events in the timeline. # Only calculated when `lazy_load_members` is on. - members_to_fetch = None + members_to_fetch: Optional[Set[str]] = None + + # A dictionary mapping user IDs to the first event in the timeline sent by + # them. Only calculated when `lazy_load_members` is on. + first_event_by_sender_map: Optional[Dict[str, EventBase]] = None + + # The contribution to the room state from state events in the timeline. + # Only contains the last event for any given state key. + timeline_state: StateMap[str] lazy_load_members = sync_config.filter_collection.lazy_load_members() include_redundant_members = ( @@ -902,10 +939,23 @@ class SyncHandler: # We only request state for the members needed to display the # timeline: - members_to_fetch = { - event.sender # FIXME: we also care about invite targets etc. - for event in batch.events - } + timeline_state = {} + + members_to_fetch = set() + first_event_by_sender_map = {} + for event in batch.events: + # Build the map from user IDs to the first timeline event they sent. + if event.sender not in first_event_by_sender_map: + first_event_by_sender_map[event.sender] = event + + # We need the event's sender, unless their membership was in a + # previous timeline event. + if (EventTypes.Member, event.sender) not in timeline_state: + members_to_fetch.add(event.sender) + # FIXME: we also care about invite targets etc. + + if event.is_state(): + timeline_state[(event.type, event.state_key)] = event.event_id if full_state: # always make sure we LL ourselves so we know we're in the room @@ -915,16 +965,21 @@ class SyncHandler: members_to_fetch.add(sync_config.user.to_string()) state_filter = StateFilter.from_lazy_load_member_list(members_to_fetch) + + # We are happy to use partial state to compute the `/sync` response. + # Since partial state may not include the lazy-loaded memberships we + # require, we fix up the state response afterwards with memberships from + # auth events. + await_full_state = False else: - state_filter = StateFilter.all() + timeline_state = { + (event.type, event.state_key): event.event_id + for event in batch.events + if event.is_state() + } - # The contribution to the room state from state events in the timeline. - # Only contains the last event for any given state key. - timeline_state = { - (event.type, event.state_key): event.event_id - for event in batch.events - if event.is_state() - } + state_filter = StateFilter.all() + await_full_state = True # Now calculate the state to return in the sync response for the room. # This is more or less the change in state between the end of the previous @@ -936,19 +991,26 @@ class SyncHandler: if batch: state_at_timeline_end = ( await self._state_storage_controller.get_state_ids_for_event( - batch.events[-1].event_id, state_filter=state_filter + batch.events[-1].event_id, + state_filter=state_filter, + await_full_state=await_full_state, ) ) state_at_timeline_start = ( await self._state_storage_controller.get_state_ids_for_event( - batch.events[0].event_id, state_filter=state_filter + batch.events[0].event_id, + state_filter=state_filter, + await_full_state=await_full_state, ) ) else: state_at_timeline_end = await self.get_state_at( - room_id, stream_position=now_token, state_filter=state_filter + room_id, + stream_position=now_token, + state_filter=state_filter, + await_full_state=await_full_state, ) state_at_timeline_start = state_at_timeline_end @@ -964,14 +1026,19 @@ class SyncHandler: if batch: state_at_timeline_start = ( await self._state_storage_controller.get_state_ids_for_event( - batch.events[0].event_id, state_filter=state_filter + batch.events[0].event_id, + state_filter=state_filter, + await_full_state=await_full_state, ) ) else: # We can get here if the user has ignored the senders of all # the recent events. state_at_timeline_start = await self.get_state_at( - room_id, stream_position=now_token, state_filter=state_filter + room_id, + stream_position=now_token, + state_filter=state_filter, + await_full_state=await_full_state, ) # for now, we disable LL for gappy syncs - see @@ -993,20 +1060,28 @@ class SyncHandler: # is indeed the case. assert since_token is not None state_at_previous_sync = await self.get_state_at( - room_id, stream_position=since_token, state_filter=state_filter + room_id, + stream_position=since_token, + state_filter=state_filter, + await_full_state=await_full_state, ) if batch: state_at_timeline_end = ( await self._state_storage_controller.get_state_ids_for_event( - batch.events[-1].event_id, state_filter=state_filter + batch.events[-1].event_id, + state_filter=state_filter, + await_full_state=await_full_state, ) ) else: # We can get here if the user has ignored the senders of all # the recent events. state_at_timeline_end = await self.get_state_at( - room_id, stream_position=now_token, state_filter=state_filter + room_id, + stream_position=now_token, + state_filter=state_filter, + await_full_state=await_full_state, ) state_ids = _calculate_state( @@ -1036,8 +1111,23 @@ class SyncHandler: (EventTypes.Member, member) for member in members_to_fetch ), + await_full_state=False, ) + # If we only have partial state for the room, `state_ids` may be missing the + # memberships we wanted. We attempt to find some by digging through the auth + # events of timeline events. + if lazy_load_members and await self.store.is_partial_state_room(room_id): + assert members_to_fetch is not None + assert first_event_by_sender_map is not None + + additional_state_ids = ( + await self._find_missing_partial_state_memberships( + room_id, members_to_fetch, first_event_by_sender_map, state_ids + ) + ) + state_ids = {**state_ids, **additional_state_ids} + # At this point, if `lazy_load_members` is enabled, `state_ids` includes # the memberships of all event senders in the timeline. This is because we # may not have sent the memberships in a previous sync. @@ -1086,6 +1176,99 @@ class SyncHandler: if e.type != EventTypes.Aliases # until MSC2261 or alternative solution } + async def _find_missing_partial_state_memberships( + self, + room_id: str, + members_to_fetch: Collection[str], + events_with_membership_auth: Mapping[str, EventBase], + found_state_ids: StateMap[str], + ) -> StateMap[str]: + """Finds missing memberships from a set of auth events and returns them as a + state map. + + Args: + room_id: The partial state room to find the remaining memberships for. + members_to_fetch: The memberships to find. + events_with_membership_auth: A mapping from user IDs to events whose auth + events are known to contain their membership. + found_state_ids: A dict from (type, state_key) -> state_event_id, containing + memberships that have been previously found. Entries in + `members_to_fetch` that have a membership in `found_state_ids` are + ignored. + + Returns: + A dict from ("m.room.member", state_key) -> state_event_id, containing the + memberships missing from `found_state_ids`. + + Raises: + KeyError: if `events_with_membership_auth` does not have an entry for a + missing membership. Memberships in `found_state_ids` do not need an + entry in `events_with_membership_auth`. + """ + additional_state_ids: MutableStateMap[str] = {} + + # Tracks the missing members for logging purposes. + missing_members = set() + + # Identify memberships missing from `found_state_ids` and pick out the auth + # events in which to look for them. + auth_event_ids: Set[str] = set() + for member in members_to_fetch: + if (EventTypes.Member, member) in found_state_ids: + continue + + missing_members.add(member) + event_with_membership_auth = events_with_membership_auth[member] + auth_event_ids.update(event_with_membership_auth.auth_event_ids()) + + auth_events = await self.store.get_events(auth_event_ids) + + # Run through the missing memberships once more, picking out the memberships + # from the pile of auth events we have just fetched. + for member in members_to_fetch: + if (EventTypes.Member, member) in found_state_ids: + continue + + event_with_membership_auth = events_with_membership_auth[member] + + # Dig through the auth events to find the desired membership. + for auth_event_id in event_with_membership_auth.auth_event_ids(): + # We only store events once we have all their auth events, + # so the auth event must be in the pile we have just + # fetched. + auth_event = auth_events[auth_event_id] + + if ( + auth_event.type == EventTypes.Member + and auth_event.state_key == member + ): + missing_members.remove(member) + additional_state_ids[ + (EventTypes.Member, member) + ] = auth_event.event_id + break + + if missing_members: + # There really shouldn't be any missing memberships now. Either: + # * we couldn't find an auth event, which shouldn't happen because we do + # not persist events with persisting their auth events first, or + # * the set of auth events did not contain a membership we wanted, which + # means our caller didn't compute the events in `members_to_fetch` + # correctly, or we somehow accepted an event whose auth events were + # dodgy. + logger.error( + "Failed to find memberships for %s in partial state room " + "%s in the auth events of %s.", + missing_members, + room_id, + [ + events_with_membership_auth[member].event_id + for member in missing_members + ], + ) + + return additional_state_ids + async def unread_notifs_for_room_id( self, room_id: str, sync_config: SyncConfig ) -> NotifCounts: @@ -1730,7 +1913,11 @@ class SyncHandler: continue if room_id in sync_result_builder.joined_room_ids or has_join: - old_state_ids = await self.get_state_at(room_id, since_token) + old_state_ids = await self.get_state_at( + room_id, + since_token, + state_filter=StateFilter.from_types([(EventTypes.Member, user_id)]), + ) old_mem_ev_id = old_state_ids.get((EventTypes.Member, user_id), None) old_mem_ev = None if old_mem_ev_id: @@ -1756,7 +1943,13 @@ class SyncHandler: newly_left_rooms.append(room_id) else: if not old_state_ids: - old_state_ids = await self.get_state_at(room_id, since_token) + old_state_ids = await self.get_state_at( + room_id, + since_token, + state_filter=StateFilter.from_types( + [(EventTypes.Member, user_id)] + ), + ) old_mem_ev_id = old_state_ids.get( (EventTypes.Member, user_id), None ) diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 1ad002f57b..f9ffd0e29e 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -234,6 +234,7 @@ class StateStorageController: self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None, + await_full_state: bool = True, ) -> Dict[str, StateMap[str]]: """ Get the state dicts corresponding to a list of events, containing the event_ids @@ -242,6 +243,9 @@ class StateStorageController: Args: event_ids: events whose state should be returned state_filter: The state filter used to fetch state from the database. + await_full_state: if `True`, will block if we do not yet have complete state + at these events and `state_filter` is not satisfied by partial state. + Defaults to `True`. Returns: A dict from event_id -> (type, state_key) -> event_id @@ -250,8 +254,12 @@ class StateStorageController: RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ - await_full_state = True - if state_filter and not state_filter.must_await_full_state(self._is_mine_id): + if ( + await_full_state + and state_filter + and not state_filter.must_await_full_state(self._is_mine_id) + ): + # Full state is not required if the state filter is restrictive enough. await_full_state = False event_to_groups = await self.get_state_group_for_events( @@ -294,7 +302,10 @@ class StateStorageController: @trace async def get_state_ids_for_event( - self, event_id: str, state_filter: Optional[StateFilter] = None + self, + event_id: str, + state_filter: Optional[StateFilter] = None, + await_full_state: bool = True, ) -> StateMap[str]: """ Get the state dict corresponding to a particular event @@ -302,6 +313,9 @@ class StateStorageController: Args: event_id: event whose state should be returned state_filter: The state filter used to fetch state from the database. + await_full_state: if `True`, will block if we do not yet have complete state + at the event and `state_filter` is not satisfied by partial state. + Defaults to `True`. Returns: A dict from (type, state_key) -> state_event_id @@ -311,7 +325,9 @@ class StateStorageController: outlier or is unknown) """ state_map = await self.get_state_ids_for_events( - [event_id], state_filter or StateFilter.all() + [event_id], + state_filter or StateFilter.all(), + await_full_state=await_full_state, ) return state_map[event_id] -- cgit 1.5.1 From 5e7847dc923142bc68834f9b9538ada3fdd887d5 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Tue, 23 Aug 2022 10:49:59 +0100 Subject: Cache user IDs instead of profile objects (#13573) The profile objects are never used and increase cache size significantly. --- changelog.d/13573.misc | 1 + synapse/handlers/sync.py | 4 +- synapse/state/__init__.py | 13 +++--- synapse/storage/databases/main/roommember.py | 67 ++++++++++++---------------- synapse/util/caches/descriptors.py | 26 ++++++++--- 5 files changed, 57 insertions(+), 54 deletions(-) create mode 100644 changelog.d/13573.misc (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13573.misc b/changelog.d/13573.misc new file mode 100644 index 0000000000..1ce9c0c081 --- /dev/null +++ b/changelog.d/13573.misc @@ -0,0 +1 @@ +Cache user IDs instead of profiles to reduce cache memory usage. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b4d3f3958c..2d95b1fa24 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -2421,10 +2421,10 @@ class SyncHandler: joined_room.room_id, joined_room.event_pos.stream ) ) - users_in_room = await self.state.get_current_users_in_room( + user_ids_in_room = await self.state.get_current_user_ids_in_room( joined_room.room_id, extrems ) - if user_id in users_in_room: + if user_id in user_ids_in_room: joined_room_ids.add(joined_room.room_id) return frozenset(joined_room_ids) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index c355e4f98a..3047e1b1ad 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -44,7 +44,6 @@ from synapse.logging.context import ContextResourceUsage from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.roommember import ProfileInfo from synapse.storage.state import StateFilter from synapse.types import StateMap from synapse.util.async_helpers import Linearizer @@ -210,11 +209,11 @@ class StateHandler: ret = await self.resolve_state_groups_for_events(room_id, event_ids) return await ret.get_state(self._state_storage_controller, state_filter) - async def get_current_users_in_room( + async def get_current_user_ids_in_room( self, room_id: str, latest_event_ids: List[str] - ) -> Dict[str, ProfileInfo]: + ) -> Set[str]: """ - Get the users who are currently in a room. + Get the users IDs who are currently in a room. Note: This is much slower than using the equivalent method `DataStore.get_users_in_room` or `DataStore.get_users_in_room_with_profiles`, @@ -225,15 +224,15 @@ class StateHandler: room_id: The ID of the room. latest_event_ids: Precomputed list of latest event IDs. Will be computed if None. Returns: - Dictionary of user IDs to their profileinfo. + Set of user IDs in the room. """ assert latest_event_ids is not None - logger.debug("calling resolve_state_groups from get_current_users_in_room") + logger.debug("calling resolve_state_groups from get_current_user_ids_in_room") entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids) state = await entry.get_state(self._state_storage_controller, StateFilter.all()) - return await self.store.get_joined_users_from_state(room_id, state, entry) + return await self.store.get_joined_user_ids_from_state(room_id, state, entry) async def get_hosts_in_room_at_events( self, room_id: str, event_ids: Collection[str] diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 827c1f1efd..0eb024a809 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -835,9 +835,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): return shared_room_ids or frozenset() - async def get_joined_users_from_state( + async def get_joined_user_ids_from_state( self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry" - ) -> Dict[str, ProfileInfo]: + ) -> Set[str]: state_group: Union[object, int] = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a @@ -848,25 +848,25 @@ class RoomMemberWorkerStore(EventsWorkerStore): assert state_group is not None with Measure(self._clock, "get_joined_users_from_state"): - return await self._get_joined_users_from_context( + return await self._get_joined_user_ids_from_context( room_id, state_group, state, context=state_entry ) @cached(num_args=2, iterable=True, max_entries=100000) - async def _get_joined_users_from_context( + async def _get_joined_user_ids_from_context( self, room_id: str, state_group: Union[object, int], current_state_ids: StateMap[str], event: Optional[EventBase] = None, context: Optional["_StateCacheEntry"] = None, - ) -> Dict[str, ProfileInfo]: + ) -> Set[str]: # We don't use `state_group`, it's there so that we can cache based # on it. However, it's important that it's never None, since two current_states # with a state_group of None are likely to be different. assert state_group is not None - users_in_room = {} + users_in_room = set() member_event_ids = [ e_id for key, e_id in current_state_ids.items() @@ -879,11 +879,11 @@ class RoomMemberWorkerStore(EventsWorkerStore): # If we do then we can reuse that result and simply update it with # any membership changes in `delta_ids` if context.prev_group and context.delta_ids: - prev_res = self._get_joined_users_from_context.cache.get_immediate( + prev_res = self._get_joined_user_ids_from_context.cache.get_immediate( (room_id, context.prev_group), None ) - if prev_res and isinstance(prev_res, dict): - users_in_room = dict(prev_res) + if prev_res and isinstance(prev_res, set): + users_in_room = prev_res member_event_ids = [ e_id for key, e_id in context.delta_ids.items() @@ -891,7 +891,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ] for etype, state_key in context.delta_ids: if etype == EventTypes.Member: - users_in_room.pop(state_key, None) + users_in_room.discard(state_key) # We check if we have any of the member event ids in the event cache # before we ask the DB @@ -908,42 +908,41 @@ class RoomMemberWorkerStore(EventsWorkerStore): ev_entry = event_map.get(event_id) if ev_entry and not ev_entry.event.rejected_reason: if ev_entry.event.membership == Membership.JOIN: - users_in_room[ev_entry.event.state_key] = ProfileInfo( - display_name=ev_entry.event.content.get("displayname", None), - avatar_url=ev_entry.event.content.get("avatar_url", None), - ) + users_in_room.add(ev_entry.event.state_key) else: missing_member_event_ids.append(event_id) if missing_member_event_ids: - event_to_memberships = await self._get_joined_profiles_from_event_ids( + event_to_memberships = await self._get_user_ids_from_membership_event_ids( missing_member_event_ids ) - users_in_room.update(row for row in event_to_memberships.values() if row) + users_in_room.update(event_to_memberships.values()) if event is not None and event.type == EventTypes.Member: if event.membership == Membership.JOIN: if event.event_id in member_event_ids: - users_in_room[event.state_key] = ProfileInfo( - display_name=event.content.get("displayname", None), - avatar_url=event.content.get("avatar_url", None), - ) + users_in_room.add(event.state_key) return users_in_room - @cached(max_entries=10000) - def _get_joined_profile_from_event_id( + @cached( + max_entries=10000, + # This name matches the old function that has been replaced - the cache name + # is kept here to maintain backwards compatibility. + name="_get_joined_profile_from_event_id", + ) + def _get_user_id_from_membership_event_id( self, event_id: str ) -> Optional[Tuple[str, ProfileInfo]]: raise NotImplementedError() @cachedList( - cached_method_name="_get_joined_profile_from_event_id", + cached_method_name="_get_user_id_from_membership_event_id", list_name="event_ids", ) - async def _get_joined_profiles_from_event_ids( + async def _get_user_ids_from_membership_event_ids( self, event_ids: Iterable[str] - ) -> Dict[str, Optional[Tuple[str, ProfileInfo]]]: + ) -> Dict[str, str]: """For given set of member event_ids check if they point to a join event and if so return the associated user and profile info. @@ -958,21 +957,13 @@ class RoomMemberWorkerStore(EventsWorkerStore): table="room_memberships", column="event_id", iterable=event_ids, - retcols=("user_id", "display_name", "avatar_url", "event_id"), + retcols=("user_id", "event_id"), keyvalues={"membership": Membership.JOIN}, batch_size=1000, - desc="_get_joined_profiles_from_event_ids", + desc="_get_user_ids_from_membership_event_ids", ) - return { - row["event_id"]: ( - row["user_id"], - ProfileInfo( - avatar_url=row["avatar_url"], display_name=row["display_name"] - ), - ) - for row in rows - } + return {row["event_id"]: row["user_id"] for row in rows} @cached(max_entries=10000) async def is_host_joined(self, room_id: str, host: str) -> bool: @@ -1131,12 +1122,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): else: # The cache doesn't match the state group or prev state group, # so we calculate the result from first principles. - joined_users = await self.get_joined_users_from_state( + joined_user_ids = await self.get_joined_user_ids_from_state( room_id, state, state_entry ) cache.hosts_to_joined_users = {} - for user_id in joined_users: + for user_id in joined_user_ids: host = intern_string(get_domain_from_id(user_id)) cache.hosts_to_joined_users.setdefault(host, set()).add(user_id) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 867f315b2a..9d4bc89edb 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -73,8 +73,10 @@ class _CacheDescriptorBase: num_args: Optional[int], uncached_args: Optional[Collection[str]] = None, cache_context: bool = False, + name: Optional[str] = None, ): self.orig = orig + self.name = name or orig.__name__ arg_spec = inspect.getfullargspec(orig) all_args = arg_spec.args @@ -211,7 +213,7 @@ class LruCacheDescriptor(_CacheDescriptorBase): def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]: cache: LruCache[CacheKey, Any] = LruCache( - cache_name=self.orig.__name__, + cache_name=self.name, max_size=self.max_entries, ) @@ -241,7 +243,7 @@ class LruCacheDescriptor(_CacheDescriptorBase): wrapped = cast(_CachedFunction, _wrapped) wrapped.cache = cache - obj.__dict__[self.orig.__name__] = wrapped + obj.__dict__[self.name] = wrapped return wrapped @@ -301,12 +303,14 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): cache_context: bool = False, iterable: bool = False, prune_unread_entries: bool = True, + name: Optional[str] = None, ): super().__init__( orig, num_args=num_args, uncached_args=uncached_args, cache_context=cache_context, + name=name, ) if tree and self.num_args < 2: @@ -321,7 +325,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]: cache: DeferredCache[CacheKey, Any] = DeferredCache( - name=self.orig.__name__, + name=self.name, max_entries=self.max_entries, tree=self.tree, iterable=self.iterable, @@ -372,7 +376,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): wrapped.cache = cache wrapped.num_args = self.num_args - obj.__dict__[self.orig.__name__] = wrapped + obj.__dict__[self.name] = wrapped return wrapped @@ -393,6 +397,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): cached_method_name: str, list_name: str, num_args: Optional[int] = None, + name: Optional[str] = None, ): """ Args: @@ -403,7 +408,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): but including list_name) to use as cache keys. Defaults to all named args of the function. """ - super().__init__(orig, num_args=num_args, uncached_args=None) + super().__init__(orig, num_args=num_args, uncached_args=None, name=name) self.list_name = list_name @@ -525,7 +530,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): else: return defer.succeed(results) - obj.__dict__[self.orig.__name__] = wrapped + obj.__dict__[self.name] = wrapped return wrapped @@ -577,6 +582,7 @@ def cached( cache_context: bool = False, iterable: bool = False, prune_unread_entries: bool = True, + name: Optional[str] = None, ) -> Callable[[F], _CachedFunction[F]]: func = lambda orig: DeferredCacheDescriptor( orig, @@ -587,13 +593,18 @@ def cached( cache_context=cache_context, iterable=iterable, prune_unread_entries=prune_unread_entries, + name=name, ) return cast(Callable[[F], _CachedFunction[F]], func) def cachedList( - *, cached_method_name: str, list_name: str, num_args: Optional[int] = None + *, + cached_method_name: str, + list_name: str, + num_args: Optional[int] = None, + name: Optional[str] = None, ) -> Callable[[F], _CachedFunction[F]]: """Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`. @@ -628,6 +639,7 @@ def cachedList( cached_method_name=cached_method_name, list_name=list_name, num_args=num_args, + name=name, ) return cast(Callable[[F], _CachedFunction[F]], func) -- cgit 1.5.1 From 69fa29700e260f55238bfa480d3dd2b91a4353c0 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Thu, 8 Sep 2022 20:13:39 +0100 Subject: Re-type hint some collections in `/sync` code as read-only (#13754) Signed-off-by: Sean Quah --- changelog.d/13754.misc | 1 + synapse/handlers/sync.py | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 changelog.d/13754.misc (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13754.misc b/changelog.d/13754.misc new file mode 100644 index 0000000000..662ee00e99 --- /dev/null +++ b/changelog.d/13754.misc @@ -0,0 +1 @@ +Re-type hint some collections as read-only. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 2d95b1fa24..5293fa4d0e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -15,6 +15,7 @@ import itertools import logging from typing import ( TYPE_CHECKING, + AbstractSet, Any, Collection, Dict, @@ -1413,10 +1414,10 @@ class SyncHandler: async def _generate_sync_entry_for_device_list( self, sync_result_builder: "SyncResultBuilder", - newly_joined_rooms: Set[str], - newly_joined_or_invited_or_knocked_users: Set[str], - newly_left_rooms: Set[str], - newly_left_users: Set[str], + newly_joined_rooms: AbstractSet[str], + newly_joined_or_invited_or_knocked_users: AbstractSet[str], + newly_left_rooms: AbstractSet[str], + newly_left_users: AbstractSet[str], ) -> DeviceListUpdates: """Generate the DeviceListUpdates section of sync @@ -1434,8 +1435,7 @@ class SyncHandler: user_id = sync_result_builder.sync_config.user.to_string() since_token = sync_result_builder.since_token - # We're going to mutate these fields, so lets copy them rather than - # assume they won't get used later. + # Take a copy since these fields will be mutated later. newly_joined_or_invited_or_knocked_users = set( newly_joined_or_invited_or_knocked_users ) @@ -1635,8 +1635,8 @@ class SyncHandler: async def _generate_sync_entry_for_presence( self, sync_result_builder: "SyncResultBuilder", - newly_joined_rooms: Set[str], - newly_joined_or_invited_users: Set[str], + newly_joined_rooms: AbstractSet[str], + newly_joined_or_invited_users: AbstractSet[str], ) -> None: """Generates the presence portion of the sync response. Populates the `sync_result_builder` with the result. @@ -1694,7 +1694,7 @@ class SyncHandler: self, sync_result_builder: "SyncResultBuilder", account_data_by_room: Dict[str, Dict[str, JsonDict]], - ) -> Tuple[Set[str], Set[str], Set[str], Set[str]]: + ) -> Tuple[AbstractSet[str], AbstractSet[str], AbstractSet[str], AbstractSet[str]]: """Generates the rooms portion of the sync response. Populates the `sync_result_builder` with the result. @@ -2534,7 +2534,7 @@ class SyncResultBuilder: archived: List[ArchivedSyncResult] = attr.Factory(list) to_device: List[JsonDict] = attr.Factory(list) - def calculate_user_changes(self) -> Tuple[Set[str], Set[str]]: + def calculate_user_changes(self) -> Tuple[AbstractSet[str], AbstractSet[str]]: """Work out which other users have joined or left rooms we are joined to. This data only is only useful for an incremental sync. -- cgit 1.5.1 From 85e161631a2ca7d495b619456221311ec1c93096 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 27 Sep 2022 11:17:23 +0100 Subject: Faster room joins: Fix spurious error when joining a room (#13872) During a `lazy_load_members` `/sync`, we look through auth events in rooms with partial state to find prior membership events. When such a membership is not found, an error is logged. Since the first join event for a user never has a prior membership event to cite, the error would always be logged when one appeared in the room timeline. Avoid logging errors for such events. Introduced in #13477. Signed-off-by: Sean Quah --- changelog.d/13872.bugfix | 1 + synapse/handlers/sync.py | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 changelog.d/13872.bugfix (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13872.bugfix b/changelog.d/13872.bugfix new file mode 100644 index 0000000000..67d3d9e643 --- /dev/null +++ b/changelog.d/13872.bugfix @@ -0,0 +1 @@ +Faster room joins: Fix a bug introduced in 1.66.0 where an error would be logged when syncing after joining a room. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5293fa4d0e..e75fc6b947 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1191,7 +1191,9 @@ class SyncHandler: room_id: The partial state room to find the remaining memberships for. members_to_fetch: The memberships to find. events_with_membership_auth: A mapping from user IDs to events whose auth - events are known to contain their membership. + events would contain their prior membership, if one exists. + Note that join events will not cite a prior membership if a user has + never been in a room before. found_state_ids: A dict from (type, state_key) -> state_event_id, containing memberships that have been previously found. Entries in `members_to_fetch` that have a membership in `found_state_ids` are @@ -1201,6 +1203,10 @@ class SyncHandler: A dict from ("m.room.member", state_key) -> state_event_id, containing the memberships missing from `found_state_ids`. + When `events_with_membership_auth` contains a join event for a given user + which does not cite a prior membership, no membership is returned for that + user. + Raises: KeyError: if `events_with_membership_auth` does not have an entry for a missing membership. Memberships in `found_state_ids` do not need an @@ -1218,8 +1224,18 @@ class SyncHandler: if (EventTypes.Member, member) in found_state_ids: continue - missing_members.add(member) event_with_membership_auth = events_with_membership_auth[member] + is_join = ( + event_with_membership_auth.is_state() + and event_with_membership_auth.type == EventTypes.Member + and event_with_membership_auth.state_key == member + and event_with_membership_auth.content.get("membership") + == Membership.JOIN + ) + if not is_join: + # The event must include the desired membership as an auth event, unless + # it's the first join event for a given user. + missing_members.add(member) auth_event_ids.update(event_with_membership_auth.auth_event_ids()) auth_events = await self.store.get_events(auth_event_ids) @@ -1243,7 +1259,7 @@ class SyncHandler: auth_event.type == EventTypes.Member and auth_event.state_key == member ): - missing_members.remove(member) + missing_members.discard(member) additional_state_ids[ (EventTypes.Member, member) ] = auth_event.event_id -- cgit 1.5.1 From a466164647b969efd2e85168144cd75693443c05 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Thu, 29 Sep 2022 14:55:12 +0100 Subject: Optimise get_rooms_for_user (drop with_stream_ordering) (#13787) --- changelog.d/13787.misc | 1 + synapse/handlers/device.py | 6 +- synapse/handlers/sync.py | 14 +--- synapse/storage/_base.py | 1 + synapse/storage/databases/main/cache.py | 1 + synapse/storage/databases/main/roommember.py | 117 +++++++++++++-------------- tests/handlers/test_sync.py | 1 + 7 files changed, 66 insertions(+), 75 deletions(-) create mode 100644 changelog.d/13787.misc (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13787.misc b/changelog.d/13787.misc new file mode 100644 index 0000000000..a9b93717f0 --- /dev/null +++ b/changelog.d/13787.misc @@ -0,0 +1 @@ +Optimise get rooms for user calls. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 03082fce42..f9cc5bddbc 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -273,11 +273,9 @@ class DeviceWorkerHandler: possibly_left = possibly_changed | possibly_left # Double check if we still share rooms with the given user. - users_rooms = await self.store.get_rooms_for_users_with_stream_ordering( - possibly_left - ) + users_rooms = await self.store.get_rooms_for_users(possibly_left) for changed_user_id, entries in users_rooms.items(): - if any(e.room_id in room_ids for e in entries): + if any(rid in room_ids for rid in entries): possibly_left.discard(changed_user_id) else: possibly_joined.discard(changed_user_id) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e75fc6b947..4abb9b6127 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1490,16 +1490,14 @@ class SyncHandler: since_token.device_list_key ) if changed_users is not None: - result = await self.store.get_rooms_for_users_with_stream_ordering( - changed_users - ) + result = await self.store.get_rooms_for_users(changed_users) for changed_user_id, entries in result.items(): # Check if the changed user shares any rooms with the user, # or if the changed user is the syncing user (as we always # want to include device list updates of their own devices). if user_id == changed_user_id or any( - e.room_id in joined_rooms for e in entries + rid in joined_rooms for rid in entries ): users_that_have_changed.add(changed_user_id) else: @@ -1533,13 +1531,9 @@ class SyncHandler: newly_left_users.update(left_users) # Remove any users that we still share a room with. - left_users_rooms = ( - await self.store.get_rooms_for_users_with_stream_ordering( - newly_left_users - ) - ) + left_users_rooms = await self.store.get_rooms_for_users(newly_left_users) for user_id, entries in left_users_rooms.items(): - if any(e.room_id in joined_rooms for e in entries): + if any(rid in joined_rooms for rid in entries): newly_left_users.discard(user_id) return DeviceListUpdates( diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 313e8aca7d..bf42aeb8d1 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -94,6 +94,7 @@ class SQLBaseStore(metaclass=ABCMeta): self._attempt_to_invalidate_cache( "get_rooms_for_user_with_stream_ordering", (user_id,) ) + self._attempt_to_invalidate_cache("get_rooms_for_user", (user_id,)) # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index db6ce83a2b..3b8ed1f7ee 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -205,6 +205,7 @@ class CacheInvalidationWorkerStore(SQLBaseStore): self.get_rooms_for_user_with_stream_ordering.invalidate( (data.state_key,) ) + self.get_rooms_for_user.invalidate((data.state_key,)) else: raise Exception("Unknown events stream row type %s" % (row.type,)) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 8ada3cdac3..982e1f08e3 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -15,7 +15,6 @@ import logging from typing import ( TYPE_CHECKING, - Callable, Collection, Dict, FrozenSet, @@ -52,7 +51,6 @@ from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import _CacheContext, cached, cachedList -from synapse.util.cancellation import cancellable from synapse.util.iterutils import batch_iter from synapse.util.metrics import Measure @@ -600,58 +598,6 @@ class RoomMemberWorkerStore(EventsWorkerStore): for room_id, instance, stream_id in txn ) - @cachedList( - cached_method_name="get_rooms_for_user_with_stream_ordering", - list_name="user_ids", - ) - async def get_rooms_for_users_with_stream_ordering( - self, user_ids: Collection[str] - ) -> Dict[str, FrozenSet[GetRoomsForUserWithStreamOrdering]]: - """A batched version of `get_rooms_for_user_with_stream_ordering`. - - Returns: - Map from user_id to set of rooms that is currently in. - """ - return await self.db_pool.runInteraction( - "get_rooms_for_users_with_stream_ordering", - self._get_rooms_for_users_with_stream_ordering_txn, - user_ids, - ) - - def _get_rooms_for_users_with_stream_ordering_txn( - self, txn: LoggingTransaction, user_ids: Collection[str] - ) -> Dict[str, FrozenSet[GetRoomsForUserWithStreamOrdering]]: - - clause, args = make_in_list_sql_clause( - self.database_engine, - "c.state_key", - user_ids, - ) - - sql = f""" - SELECT c.state_key, room_id, e.instance_name, e.stream_ordering - FROM current_state_events AS c - INNER JOIN events AS e USING (room_id, event_id) - WHERE - c.type = 'm.room.member' - AND c.membership = ? - AND {clause} - """ - - txn.execute(sql, [Membership.JOIN] + args) - - result: Dict[str, Set[GetRoomsForUserWithStreamOrdering]] = { - user_id: set() for user_id in user_ids - } - for user_id, room_id, instance, stream_id in txn: - result[user_id].add( - GetRoomsForUserWithStreamOrdering( - room_id, PersistedEventPosition(instance, stream_id) - ) - ) - - return {user_id: frozenset(v) for user_id, v in result.items()} - async def get_users_server_still_shares_room_with( self, user_ids: Collection[str] ) -> Set[str]: @@ -693,19 +639,68 @@ class RoomMemberWorkerStore(EventsWorkerStore): return {row[0] for row in txn} - @cancellable - async def get_rooms_for_user( - self, user_id: str, on_invalidate: Optional[Callable[[], None]] = None - ) -> FrozenSet[str]: + @cached(max_entries=500000, iterable=True) + async def get_rooms_for_user(self, user_id: str) -> FrozenSet[str]: """Returns a set of room_ids the user is currently joined to. If a remote user only returns rooms this server is currently participating in. """ - rooms = await self.get_rooms_for_user_with_stream_ordering( - user_id, on_invalidate=on_invalidate + rooms = self.get_rooms_for_user_with_stream_ordering.cache.get_immediate( + (user_id,), + None, + update_metrics=False, + ) + if rooms: + return frozenset(r.room_id for r in rooms) + + room_ids = await self.db_pool.simple_select_onecol( + table="current_state_events", + keyvalues={ + "type": EventTypes.Member, + "membership": Membership.JOIN, + "state_key": user_id, + }, + retcol="room_id", + desc="get_rooms_for_user", ) - return frozenset(r.room_id for r in rooms) + + return frozenset(room_ids) + + @cachedList( + cached_method_name="get_rooms_for_user", + list_name="user_ids", + ) + async def get_rooms_for_users( + self, user_ids: Collection[str] + ) -> Dict[str, FrozenSet[str]]: + """A batched version of `get_rooms_for_user`. + + Returns: + Map from user_id to set of rooms that is currently in. + """ + + rows = await self.db_pool.simple_select_many_batch( + table="current_state_events", + column="state_key", + iterable=user_ids, + retcols=( + "state_key", + "room_id", + ), + keyvalues={ + "type": EventTypes.Member, + "membership": Membership.JOIN, + }, + desc="get_rooms_for_users", + ) + + user_rooms: Dict[str, Set[str]] = {user_id: set() for user_id in user_ids} + + for row in rows: + user_rooms[row["state_key"]].add(row["room_id"]) + + return {key: frozenset(rooms) for key, rooms in user_rooms.items()} @cached(max_entries=10000) async def does_pair_of_users_share_a_room( diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py index e3f38fbcc5..ab5c101eb7 100644 --- a/tests/handlers/test_sync.py +++ b/tests/handlers/test_sync.py @@ -159,6 +159,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase): # Blow away caches (supported room versions can only change due to a restart). self.store.get_rooms_for_user_with_stream_ordering.invalidate_all() + self.store.get_rooms_for_user.invalidate_all() self.get_success(self.store._get_event_cache.clear()) self.store._event_ref.clear() -- cgit 1.5.1 From b4ec4f5e71a87d5bdc840a4220dfd9a34c54c847 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 4 Oct 2022 09:47:04 -0400 Subject: Track notification counts per thread (implement MSC3773). (#13776) When retrieving counts of notifications segment the results based on the thread ID, but choose whether to return them as individual threads or as a single summed field by letting the client opt-in via a sync flag. The summarization code is also updated to be per thread, instead of per room. --- changelog.d/13776.feature | 1 + synapse/api/constants.py | 3 + synapse/api/filtering.py | 10 ++ synapse/config/experimental.py | 2 + synapse/handlers/sync.py | 40 ++++- synapse/push/bulk_push_rule_evaluator.py | 4 +- synapse/push/push_tools.py | 9 +- synapse/rest/client/sync.py | 4 + synapse/rest/client/versions.py | 3 +- synapse/storage/database.py | 2 +- .../storage/databases/main/event_push_actions.py | 188 +++++++++++++-------- synapse/storage/schema/__init__.py | 6 +- .../delta/73/06thread_notifications_backfill.sql | 29 ++++ .../07thread_notifications_not_null.sql.postgres | 19 +++ .../73/07thread_notifications_not_null.sql.sqlite | 101 +++++++++++ tests/replication/slave/storage/test_events.py | 17 +- tests/storage/test_event_push_actions.py | 169 +++++++++++++++++- 17 files changed, 514 insertions(+), 93 deletions(-) create mode 100644 changelog.d/13776.feature create mode 100644 synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql create mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres create mode 100644 synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13776.feature b/changelog.d/13776.feature new file mode 100644 index 0000000000..22bce125ce --- /dev/null +++ b/changelog.d/13776.feature @@ -0,0 +1 @@ +Experimental support for thread-specific notifications ([MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/synapse/api/constants.py b/synapse/api/constants.py index c031903b1a..44c5ffc6a5 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -31,6 +31,9 @@ MAX_ALIAS_LENGTH = 255 # the maximum length for a user id is 255 characters MAX_USERID_LENGTH = 255 +# Constant value used for the pseudo-thread which is the main timeline. +MAIN_TIMELINE: Final = "main" + class Membership: diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index f7f46f8d80..c6e44dcf82 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -84,6 +84,7 @@ ROOM_EVENT_FILTER_SCHEMA = { "contains_url": {"type": "boolean"}, "lazy_load_members": {"type": "boolean"}, "include_redundant_members": {"type": "boolean"}, + "org.matrix.msc3773.unread_thread_notifications": {"type": "boolean"}, # Include or exclude events with the provided labels. # cf https://github.com/matrix-org/matrix-doc/pull/2326 "org.matrix.labels": {"type": "array", "items": {"type": "string"}}, @@ -240,6 +241,9 @@ class FilterCollection: def include_redundant_members(self) -> bool: return self._room_state_filter.include_redundant_members + def unread_thread_notifications(self) -> bool: + return self._room_timeline_filter.unread_thread_notifications + async def filter_presence( self, events: Iterable[UserPresenceState] ) -> List[UserPresenceState]: @@ -304,6 +308,12 @@ class Filter: self.include_redundant_members = filter_json.get( "include_redundant_members", False ) + if hs.config.experimental.msc3773_enabled: + self.unread_thread_notifications: bool = filter_json.get( + "org.matrix.msc3773.unread_thread_notifications", False + ) + else: + self.unread_thread_notifications = False self.types = filter_json.get("types", None) self.not_types = filter_json.get("not_types", []) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 83695f24d9..6503ce6e34 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -99,6 +99,8 @@ class ExperimentalConfig(Config): self.msc3771_enabled: bool = experimental.get("msc3771_enabled", False) # MSC3772: A push rule for mutual relations. self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False) + # MSC3773: Thread notifications + self.msc3773_enabled: bool = experimental.get("msc3773_enabled", False) # MSC3715: dir param on /relations. self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4abb9b6127..329e89c604 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -40,7 +40,7 @@ from synapse.handlers.relations import BundledAggregations from synapse.logging.context import current_context from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span from synapse.push.clientformat import format_push_rules_for_user -from synapse.storage.databases.main.event_push_actions import NotifCounts +from synapse.storage.databases.main.event_push_actions import RoomNotifCounts from synapse.storage.roommember import MemberSummary from synapse.storage.state import StateFilter from synapse.types import ( @@ -128,6 +128,7 @@ class JoinedSyncResult: ephemeral: List[JsonDict] account_data: List[JsonDict] unread_notifications: JsonDict + unread_thread_notifications: JsonDict summary: Optional[JsonDict] unread_count: int @@ -278,6 +279,8 @@ class SyncHandler: self.rooms_to_exclude = hs.config.server.rooms_to_exclude_from_sync + self._msc3773_enabled = hs.config.experimental.msc3773_enabled + async def wait_for_sync_for_user( self, requester: Requester, @@ -1288,7 +1291,7 @@ class SyncHandler: async def unread_notifs_for_room_id( self, room_id: str, sync_config: SyncConfig - ) -> NotifCounts: + ) -> RoomNotifCounts: with Measure(self.clock, "unread_notifs_for_room_id"): return await self.store.get_unread_event_push_actions_by_room_for_user( @@ -2353,6 +2356,7 @@ class SyncHandler: ephemeral=ephemeral, account_data=account_data_events, unread_notifications=unread_notifications, + unread_thread_notifications={}, summary=summary, unread_count=0, ) @@ -2360,10 +2364,36 @@ class SyncHandler: if room_sync or always_include: notifs = await self.unread_notifs_for_room_id(room_id, sync_config) - unread_notifications["notification_count"] = notifs.notify_count - unread_notifications["highlight_count"] = notifs.highlight_count + # Notifications for the main timeline. + notify_count = notifs.main_timeline.notify_count + highlight_count = notifs.main_timeline.highlight_count + unread_count = notifs.main_timeline.unread_count - room_sync.unread_count = notifs.unread_count + # Check the sync configuration. + if ( + self._msc3773_enabled + and sync_config.filter_collection.unread_thread_notifications() + ): + # And add info for each thread. + room_sync.unread_thread_notifications = { + thread_id: { + "notification_count": thread_notifs.notify_count, + "highlight_count": thread_notifs.highlight_count, + } + for thread_id, thread_notifs in notifs.threads.items() + if thread_id is not None + } + + else: + # Combine the unread counts for all threads and main timeline. + for thread_notifs in notifs.threads.values(): + notify_count += thread_notifs.notify_count + highlight_count += thread_notifs.highlight_count + unread_count += thread_notifs.unread_count + + unread_notifications["notification_count"] = notify_count + unread_notifications["highlight_count"] = highlight_count + room_sync.unread_count = unread_count sync_result_builder.joined.append(room_sync) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 4270438918..61d952742d 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -31,7 +31,7 @@ from typing import ( from prometheus_client import Counter -from synapse.api.constants import EventTypes, Membership, RelationTypes +from synapse.api.constants import MAIN_TIMELINE, EventTypes, Membership, RelationTypes from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -280,7 +280,7 @@ class BulkPushRuleEvaluator: # If the event does not have a relation, then cannot have any mutual # relations or thread ID. relations = {} - thread_id = "main" + thread_id = MAIN_TIMELINE if relation: relations = await self._get_mutual_relations( relation.parent_id, diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py index 658bf373b7..edeba27a45 100644 --- a/synapse/push/push_tools.py +++ b/synapse/push/push_tools.py @@ -39,7 +39,12 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - await concurrently_execute(get_room_unread_count, joins, 10) for notifs in room_notifs: - if notifs.notify_count == 0: + # Combine the counts from all the threads. + notify_count = notifs.main_timeline.notify_count + sum( + n.notify_count for n in notifs.threads.values() + ) + + if notify_count == 0: continue if group_by_room: @@ -47,7 +52,7 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - badge += 1 else: # increment the badge count by the number of unread messages in the room - badge += notifs.notify_count + badge += notify_count return badge diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index c2989765ce..f1c23d68e5 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -509,6 +509,10 @@ class SyncRestServlet(RestServlet): ephemeral_events = room.ephemeral result["ephemeral"] = {"events": ephemeral_events} result["unread_notifications"] = room.unread_notifications + if room.unread_thread_notifications: + result[ + "org.matrix.msc3773.unread_thread_notifications" + ] = room.unread_thread_notifications result["summary"] = room.summary if self._msc2654_enabled: result["org.matrix.msc2654.unread_count"] = room.unread_count diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index c95b0d6f19..280d306483 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -103,8 +103,9 @@ class VersionsRestServlet(RestServlet): "org.matrix.msc3030": self.config.experimental.msc3030_enabled, # Adds support for thread relations, per MSC3440. "org.matrix.msc3440.stable": True, # TODO: remove when "v1.3" is added above - # Support for thread read receipts. + # Support for thread read receipts & notification counts. "org.matrix.msc3771": self.config.experimental.msc3771_enabled, + "org.matrix.msc3773": self.config.experimental.msc3773_enabled, # Allows moderators to fetch redacted event content as described in MSC2815 "fi.mau.msc2815": self.config.experimental.msc2815_enabled, # Adds support for login token requests as per MSC3882 diff --git a/synapse/storage/database.py b/synapse/storage/database.py index b4469eb964..7bb21f8f81 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -94,7 +94,7 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = { "event_search": "event_search_event_id_idx", "local_media_repository_thumbnails": "local_media_repository_thumbnails_method_idx", "remote_media_cache_thumbnails": "remote_media_repository_thumbnails_method_idx", - "event_push_summary": "event_push_summary_unique_index", + "event_push_summary": "event_push_summary_unique_index2", "receipts_linearized": "receipts_linearized_unique_index", "receipts_graph": "receipts_graph_unique_index", } diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index cdc9ee5a37..3210e9cca1 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -88,7 +88,7 @@ from typing import ( import attr -from synapse.api.constants import ReceiptTypes +from synapse.api.constants import MAIN_TIMELINE, ReceiptTypes from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( @@ -157,7 +157,7 @@ class UserPushAction(EmailPushAction): @attr.s(slots=True, auto_attribs=True) class NotifCounts: """ - The per-user, per-room count of notifications. Used by sync and push. + The per-user, per-room, per-thread count of notifications. Used by sync and push. """ notify_count: int = 0 @@ -165,6 +165,21 @@ class NotifCounts: highlight_count: int = 0 +@attr.s(slots=True, auto_attribs=True) +class RoomNotifCounts: + """ + The per-user, per-room count of notifications. Used by sync and push. + """ + + main_timeline: NotifCounts + # Map of thread ID to the notification counts. + threads: Dict[str, NotifCounts] + + def __len__(self) -> int: + # To properly account for the amount of space in any caches. + return len(self.threads) + 1 + + def _serialize_action( actions: Collection[Union[Mapping, str]], is_highlight: bool ) -> str: @@ -338,12 +353,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas return result - @cached(tree=True, max_entries=5000) + @cached(tree=True, max_entries=5000, iterable=True) async def get_unread_event_push_actions_by_room_for_user( self, room_id: str, user_id: str, - ) -> NotifCounts: + ) -> RoomNotifCounts: """Get the notification count, the highlight count and the unread message count for a given user in a given room after their latest read receipt. @@ -356,8 +371,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: The user to retrieve the counts for. Returns - A NotifCounts object containing the notification count, the highlight count - and the unread message count. + A RoomNotifCounts object containing the notification count, the + highlight count and the unread message count for both the main timeline + and threads. """ return await self.db_pool.runInteraction( "get_unread_event_push_actions_by_room", @@ -371,7 +387,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas txn: LoggingTransaction, room_id: str, user_id: str, - ) -> NotifCounts: + ) -> RoomNotifCounts: # Get the stream ordering of the user's latest receipt in the room. result = self.get_last_unthreaded_receipt_for_user_txn( txn, @@ -406,7 +422,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas room_id: str, user_id: str, receipt_stream_ordering: int, - ) -> NotifCounts: + ) -> RoomNotifCounts: """Get the number of unread messages for a user/room that have happened since the given stream ordering. @@ -418,12 +434,19 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas receipt in the room. If there are no receipts, the stream ordering of the user's join event. - Returns - A NotifCounts object containing the notification count, the highlight count - and the unread message count. + Returns: + A RoomNotifCounts object containing the notification count, the + highlight count and the unread message count for both the main timeline + and threads. """ - counts = NotifCounts() + main_counts = NotifCounts() + thread_counts: Dict[str, NotifCounts] = {} + + def _get_thread(thread_id: str) -> NotifCounts: + if thread_id == MAIN_TIMELINE: + return main_counts + return thread_counts.setdefault(thread_id, NotifCounts()) # First we pull the counts from the summary table. # @@ -440,52 +463,61 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # receipt). txn.execute( """ - SELECT stream_ordering, notif_count, COALESCE(unread_count, 0) + SELECT stream_ordering, notif_count, COALESCE(unread_count, 0), thread_id FROM event_push_summary WHERE room_id = ? AND user_id = ? AND ( (last_receipt_stream_ordering IS NULL AND stream_ordering > ?) OR last_receipt_stream_ordering = ? - ) + ) AND (notif_count != 0 OR COALESCE(unread_count, 0) != 0) """, (room_id, user_id, receipt_stream_ordering, receipt_stream_ordering), ) - row = txn.fetchone() - - summary_stream_ordering = 0 - if row: - summary_stream_ordering = row[0] - counts.notify_count += row[1] - counts.unread_count += row[2] + max_summary_stream_ordering = 0 + for summary_stream_ordering, notif_count, unread_count, thread_id in txn: + counts = _get_thread(thread_id) + counts.notify_count += notif_count + counts.unread_count += unread_count + + # Summaries will only be used if they have not been invalidated by + # a recent receipt; track the latest stream ordering or a valid summary. + # + # Note that since there's only one read receipt in the room per user, + # valid summaries are contiguous. + max_summary_stream_ordering = max( + summary_stream_ordering, max_summary_stream_ordering + ) # Next we need to count highlights, which aren't summarised sql = """ - SELECT COUNT(*) FROM event_push_actions + SELECT COUNT(*), thread_id FROM event_push_actions WHERE user_id = ? AND room_id = ? AND stream_ordering > ? AND highlight = 1 + GROUP BY thread_id """ txn.execute(sql, (user_id, room_id, receipt_stream_ordering)) - row = txn.fetchone() - if row: - counts.highlight_count += row[0] + for highlight_count, thread_id in txn: + _get_thread(thread_id).highlight_count += highlight_count # Finally we need to count push actions that aren't included in the # summary returned above. This might be due to recent events that haven't # been summarised yet or the summary is out of date due to a recent read # receipt. start_unread_stream_ordering = max( - receipt_stream_ordering, summary_stream_ordering + receipt_stream_ordering, max_summary_stream_ordering ) - notify_count, unread_count = self._get_notif_unread_count_for_user_room( + unread_counts = self._get_notif_unread_count_for_user_room( txn, room_id, user_id, start_unread_stream_ordering ) - counts.notify_count += notify_count - counts.unread_count += unread_count + for notif_count, unread_count, thread_id in unread_counts: + counts = _get_thread(thread_id) + counts.notify_count += notif_count + counts.unread_count += unread_count - return counts + return RoomNotifCounts(main_counts, thread_counts) def _get_notif_unread_count_for_user_room( self, @@ -494,7 +526,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: str, stream_ordering: int, max_stream_ordering: Optional[int] = None, - ) -> Tuple[int, int]: + ) -> List[Tuple[int, int, str]]: """Returns the notify and unread counts from `event_push_actions` for the given user/room in the given range. @@ -510,13 +542,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas If this is not given, then no maximum is applied. Return: - A tuple of the notif count and unread count in the given range. + A tuple of the notif count and unread count in the given range for + each thread. """ # If there have been no events in the room since the stream ordering, # there can't be any push actions either. if not self._events_stream_cache.has_entity_changed(room_id, stream_ordering): - return 0, 0 + return [] clause = "" args = [user_id, room_id, stream_ordering] @@ -527,26 +560,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # If the max stream ordering is less than the min stream ordering, # then obviously there are zero push actions in that range. if max_stream_ordering <= stream_ordering: - return 0, 0 + return [] sql = f""" SELECT COUNT(CASE WHEN notif = 1 THEN 1 END), - COUNT(CASE WHEN unread = 1 THEN 1 END) - FROM event_push_actions ea - WHERE user_id = ? + COUNT(CASE WHEN unread = 1 THEN 1 END), + thread_id + FROM event_push_actions ea + WHERE user_id = ? AND room_id = ? AND ea.stream_ordering > ? {clause} + GROUP BY thread_id """ txn.execute(sql, args) - row = txn.fetchone() - - if row: - return cast(Tuple[int, int], row) - - return 0, 0 + return cast(List[Tuple[int, int, str]], txn.fetchall()) async def get_push_action_users_in_range( self, min_stream_ordering: int, max_stream_ordering: int @@ -1099,26 +1129,34 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # Fetch the notification counts between the stream ordering of the # latest receipt and what was previously summarised. - notif_count, unread_count = self._get_notif_unread_count_for_user_room( + unread_counts = self._get_notif_unread_count_for_user_room( txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering ) - # Replace the previous summary with the new counts. - # - # TODO(threads): Upsert per-thread instead of setting them all to main. - self.db_pool.simple_upsert_txn( + # First mark the summary for all threads in the room as cleared. + self.db_pool.simple_update_txn( txn, table="event_push_summary", - keyvalues={"room_id": room_id, "user_id": user_id}, - values={ - "notif_count": notif_count, - "unread_count": unread_count, + keyvalues={"user_id": user_id, "room_id": room_id}, + updatevalues={ + "notif_count": 0, + "unread_count": 0, "stream_ordering": old_rotate_stream_ordering, "last_receipt_stream_ordering": stream_ordering, - "thread_id": "main", }, ) + # Then any updated threads get their notification count and unread + # count updated. + self.db_pool.simple_update_many_txn( + txn, + table="event_push_summary", + key_names=("room_id", "user_id", "thread_id"), + key_values=[(room_id, user_id, row[2]) for row in unread_counts], + value_names=("notif_count", "unread_count"), + value_values=[(row[0], row[1]) for row in unread_counts], + ) + # We always update `event_push_summary_last_receipt_stream_id` to # ensure that we don't rescan the same receipts for remote users. @@ -1204,23 +1242,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # Calculate the new counts that should be upserted into event_push_summary sql = """ - SELECT user_id, room_id, + SELECT user_id, room_id, thread_id, coalesce(old.%s, 0) + upd.cnt, upd.stream_ordering FROM ( - SELECT user_id, room_id, count(*) as cnt, + SELECT user_id, room_id, thread_id, count(*) as cnt, max(ea.stream_ordering) as stream_ordering FROM event_push_actions AS ea - LEFT JOIN event_push_summary AS old USING (user_id, room_id) + LEFT JOIN event_push_summary AS old USING (user_id, room_id, thread_id) WHERE ? < ea.stream_ordering AND ea.stream_ordering <= ? AND ( old.last_receipt_stream_ordering IS NULL OR old.last_receipt_stream_ordering < ea.stream_ordering ) AND %s = 1 - GROUP BY user_id, room_id + GROUP BY user_id, room_id, thread_id ) AS upd - LEFT JOIN event_push_summary AS old USING (user_id, room_id) + LEFT JOIN event_push_summary AS old USING (user_id, room_id, thread_id) """ # First get the count of unread messages. @@ -1234,11 +1272,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # object because we might not have the same amount of rows in each of them. To do # this, we use a dict indexed on the user ID and room ID to make it easier to # populate. - summaries: Dict[Tuple[str, str], _EventPushSummary] = {} + summaries: Dict[Tuple[str, str, str], _EventPushSummary] = {} for row in txn: - summaries[(row[0], row[1])] = _EventPushSummary( - unread_count=row[2], - stream_ordering=row[3], + summaries[(row[0], row[1], row[2])] = _EventPushSummary( + unread_count=row[3], + stream_ordering=row[4], notif_count=0, ) @@ -1249,34 +1287,35 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) for row in txn: - if (row[0], row[1]) in summaries: - summaries[(row[0], row[1])].notif_count = row[2] + if (row[0], row[1], row[2]) in summaries: + summaries[(row[0], row[1], row[2])].notif_count = row[3] else: # Because the rules on notifying are different than the rules on marking # a message unread, we might end up with messages that notify but aren't # marked unread, so we might not have a summary for this (user, room) # tuple to complete. - summaries[(row[0], row[1])] = _EventPushSummary( + summaries[(row[0], row[1], row[2])] = _EventPushSummary( unread_count=0, - stream_ordering=row[3], - notif_count=row[2], + stream_ordering=row[4], + notif_count=row[3], ) logger.info("Rotating notifications, handling %d rows", len(summaries)) - # TODO(threads): Update on a per-thread basis. self.db_pool.simple_upsert_many_txn( txn, table="event_push_summary", - key_names=("user_id", "room_id"), - key_values=[(user_id, room_id) for user_id, room_id in summaries], - value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"), + key_names=("user_id", "room_id", "thread_id"), + key_values=[ + (user_id, room_id, thread_id) + for user_id, room_id, thread_id in summaries + ], + value_names=("notif_count", "unread_count", "stream_ordering"), value_values=[ ( summary.notif_count, summary.unread_count, summary.stream_ordering, - "main", ) for summary in summaries.values() ], @@ -1288,7 +1327,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) async def _remove_old_push_actions_that_have_rotated(self) -> None: - """Clear out old push actions that have been summarised.""" + """ + Clear out old push actions that have been summarised (and are older than + 1 day ago). + """ # We want to clear out anything that is older than a day that *has* already # been rotated. diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 4a5c947699..19dbf2da7f 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -90,9 +90,9 @@ Changes in SCHEMA_VERSION = 73; SCHEMA_COMPAT_VERSION = ( - # The groups tables are no longer accessible, so synapses with SCHEMA_VERSION < 72 - # could break. - 72 + # The threads_id column must exist for event_push_actions, event_push_summary, + # receipts_linearized, and receipts_graph. + 73 ) """Limit on how far the synapse codebase can be rolled back without breaking db compat diff --git a/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql new file mode 100644 index 0000000000..0ffde9bbeb --- /dev/null +++ b/synapse/storage/schema/main/delta/73/06thread_notifications_backfill.sql @@ -0,0 +1,29 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Forces the background updates from 06thread_notifications.sql to run in the +-- foreground as code will now require those to be "done". + +DELETE FROM background_updates WHERE update_name = 'event_push_backfill_thread_id'; + +-- Overwrite any null thread_id columns. +UPDATE event_push_actions_staging SET thread_id = 'main' WHERE thread_id IS NULL; +UPDATE event_push_actions SET thread_id = 'main' WHERE thread_id IS NULL; +UPDATE event_push_summary SET thread_id = 'main' WHERE thread_id IS NULL; + +-- Do not run the event_push_summary_unique_index job if it is pending; the +-- thread_id field will be made required. +DELETE FROM background_updates WHERE update_name = 'event_push_summary_unique_index'; +DROP INDEX IF EXISTS event_push_summary_unique_index; diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres new file mode 100644 index 0000000000..33674f8c62 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.postgres @@ -0,0 +1,19 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- The columns can now be made non-nullable. +ALTER TABLE event_push_actions_staging ALTER COLUMN thread_id SET NOT NULL; +ALTER TABLE event_push_actions ALTER COLUMN thread_id SET NOT NULL; +ALTER TABLE event_push_summary ALTER COLUMN thread_id SET NOT NULL; diff --git a/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite new file mode 100644 index 0000000000..5322ad77a4 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/07thread_notifications_not_null.sql.sqlite @@ -0,0 +1,101 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- SQLite doesn't support modifying columns to an existing table, so it must +-- be recreated. + +-- Create the new tables. +CREATE TABLE event_push_actions_staging_new ( + event_id TEXT NOT NULL, + user_id TEXT NOT NULL, + actions TEXT NOT NULL, + notif SMALLINT NOT NULL, + highlight SMALLINT NOT NULL, + unread SMALLINT, + thread_id TEXT NOT NULL, + inserted_ts BIGINT +); + +CREATE TABLE event_push_actions_new ( + room_id TEXT NOT NULL, + event_id TEXT NOT NULL, + user_id TEXT NOT NULL, + profile_tag VARCHAR(32), + actions TEXT NOT NULL, + topological_ordering BIGINT, + stream_ordering BIGINT, + notif SMALLINT, + highlight SMALLINT, + unread SMALLINT, + thread_id TEXT NOT NULL, + CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) +); + +CREATE TABLE event_push_summary_new ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL, + notif_count BIGINT NOT NULL, + stream_ordering BIGINT NOT NULL, + unread_count BIGINT, + last_receipt_stream_ordering BIGINT, + thread_id TEXT NOT NULL +); + +-- Swap the indexes. +DROP INDEX IF EXISTS event_push_actions_staging_id; +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging_new(event_id); + +DROP INDEX IF EXISTS event_push_actions_room_id_user_id; +DROP INDEX IF EXISTS event_push_actions_rm_tokens; +DROP INDEX IF EXISTS event_push_actions_stream_ordering; +DROP INDEX IF EXISTS event_push_actions_u_highlight; +DROP INDEX IF EXISTS event_push_actions_highlights_index; +CREATE INDEX event_push_actions_room_id_user_id on event_push_actions_new(room_id, user_id); +CREATE INDEX event_push_actions_rm_tokens on event_push_actions_new( user_id, room_id, topological_ordering, stream_ordering ); +CREATE INDEX event_push_actions_stream_ordering on event_push_actions_new( stream_ordering, user_id ); +CREATE INDEX event_push_actions_u_highlight ON event_push_actions_new (user_id, stream_ordering); +CREATE INDEX event_push_actions_highlights_index ON event_push_actions_new (user_id, room_id, topological_ordering, stream_ordering); + +-- Copy the data. +INSERT INTO event_push_actions_staging_new (event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts) + SELECT event_id, user_id, actions, notif, highlight, unread, thread_id, inserted_ts + FROM event_push_actions_staging; + +INSERT INTO event_push_actions_new (room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id) + SELECT room_id, event_id, user_id, profile_tag, actions, topological_ordering, stream_ordering, notif, highlight, unread, thread_id + FROM event_push_actions; + +INSERT INTO event_push_summary_new (user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id) + SELECT user_id, room_id, notif_count, stream_ordering, unread_count, last_receipt_stream_ordering, thread_id + FROM event_push_summary; + +-- Drop the old tables. +DROP TABLE event_push_actions_staging; +DROP TABLE event_push_actions; +DROP TABLE event_push_summary; + +-- Rename the tables. +ALTER TABLE event_push_actions_staging_new RENAME TO event_push_actions_staging; +ALTER TABLE event_push_actions_new RENAME TO event_push_actions; +ALTER TABLE event_push_summary_new RENAME TO event_push_summary; + +-- Re-run background updates from 72/02event_push_actions_index.sql and +-- 72/06thread_notifications.sql. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7307, 'event_push_summary_unique_index2', '{}') + ON CONFLICT (update_name) DO NOTHING; +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7307, 'event_push_actions_stream_highlight_index', '{}') + ON CONFLICT (update_name) DO NOTHING; diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index efd92793c0..d42e36cdf1 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -22,7 +22,10 @@ from synapse.api.room_versions import RoomVersions from synapse.events import FrozenEvent, _EventInternalMetadata, make_event_from_dict from synapse.handlers.room import RoomEventSource from synapse.replication.slave.storage.events import SlavedEventStore -from synapse.storage.databases.main.event_push_actions import NotifCounts +from synapse.storage.databases.main.event_push_actions import ( + NotifCounts, + RoomNotifCounts, +) from synapse.storage.roommember import GetRoomsForUserWithStreamOrdering, RoomsForUser from synapse.types import PersistedEventPosition @@ -178,7 +181,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): self.check( "get_unread_event_push_actions_by_room_for_user", [ROOM_ID, USER_ID_2], - NotifCounts(highlight_count=0, unread_count=0, notify_count=0), + RoomNotifCounts( + NotifCounts(highlight_count=0, unread_count=0, notify_count=0), {} + ), ) self.persist( @@ -191,7 +196,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): self.check( "get_unread_event_push_actions_by_room_for_user", [ROOM_ID, USER_ID_2], - NotifCounts(highlight_count=0, unread_count=0, notify_count=1), + RoomNotifCounts( + NotifCounts(highlight_count=0, unread_count=0, notify_count=1), {} + ), ) self.persist( @@ -206,7 +213,9 @@ class SlavedEventStoreTestCase(BaseSlavedStoreTestCase): self.check( "get_unread_event_push_actions_by_room_for_user", [ROOM_ID, USER_ID_2], - NotifCounts(highlight_count=1, unread_count=0, notify_count=2), + RoomNotifCounts( + NotifCounts(highlight_count=1, unread_count=0, notify_count=2), {} + ), ) def test_get_rooms_for_user_with_stream_ordering(self): diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 473c965e19..89f986ac34 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple +from typing import Optional, Tuple from twisted.test.proto_helpers import MemoryReactor @@ -20,6 +20,7 @@ from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.storage.databases.main.event_push_actions import NotifCounts +from synapse.types import JsonDict from synapse.util import Clock from tests.unittest import HomeserverTestCase @@ -133,13 +134,14 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): ) ) self.assertEqual( - counts, + counts.main_timeline, NotifCounts( notify_count=noitf_count, unread_count=0, highlight_count=highlight_count, ), ) + self.assertEqual(counts.threads, {}) def _create_event(highlight: bool = False) -> str: result = self.helper.send_event( @@ -186,6 +188,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): _assert_counts(0, 0) _create_event() + _assert_counts(1, 0) _rotate() _assert_counts(1, 0) @@ -236,6 +239,168 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): _rotate() _assert_counts(0, 0) + def test_count_aggregation_threads(self) -> None: + """ + This is essentially the same test as test_count_aggregation, but adds + events to the main timeline and to a thread. + """ + + user_id, token, _, other_token, room_id = self._create_users_and_room() + thread_id: str + + last_event_id: str + + def _assert_counts( + noitf_count: int, + highlight_count: int, + thread_notif_count: int, + thread_highlight_count: int, + ) -> None: + counts = self.get_success( + self.store.db_pool.runInteraction( + "get-unread-counts", + self.store._get_unread_counts_by_receipt_txn, + room_id, + user_id, + ) + ) + self.assertEqual( + counts.main_timeline, + NotifCounts( + notify_count=noitf_count, + unread_count=0, + highlight_count=highlight_count, + ), + ) + if thread_notif_count or thread_highlight_count: + self.assertEqual( + counts.threads, + { + thread_id: NotifCounts( + notify_count=thread_notif_count, + unread_count=0, + highlight_count=thread_highlight_count, + ), + }, + ) + else: + self.assertEqual(counts.threads, {}) + + def _create_event( + highlight: bool = False, thread_id: Optional[str] = None + ) -> str: + content: JsonDict = { + "msgtype": "m.text", + "body": user_id if highlight else "msg", + } + if thread_id: + content["m.relates_to"] = { + "rel_type": "m.thread", + "event_id": thread_id, + } + + result = self.helper.send_event( + room_id, + type="m.room.message", + content=content, + tok=other_token, + ) + nonlocal last_event_id + last_event_id = result["event_id"] + return last_event_id + + def _rotate() -> None: + self.get_success(self.store._rotate_notifs()) + + def _mark_read(event_id: str, thread_id: Optional[str] = None) -> None: + self.get_success( + self.store.insert_receipt( + room_id, + "m.read", + user_id=user_id, + event_ids=[event_id], + thread_id=thread_id, + data={}, + ) + ) + + _assert_counts(0, 0, 0, 0) + thread_id = _create_event() + _assert_counts(1, 0, 0, 0) + _rotate() + _assert_counts(1, 0, 0, 0) + + _create_event(thread_id=thread_id) + _assert_counts(1, 0, 1, 0) + _rotate() + _assert_counts(1, 0, 1, 0) + + _create_event() + _assert_counts(2, 0, 1, 0) + _rotate() + _assert_counts(2, 0, 1, 0) + + event_id = _create_event(thread_id=thread_id) + _assert_counts(2, 0, 2, 0) + _rotate() + _assert_counts(2, 0, 2, 0) + + _create_event() + _create_event(thread_id=thread_id) + _mark_read(event_id) + _assert_counts(1, 0, 1, 0) + + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + + _create_event() + _create_event(thread_id=thread_id) + _assert_counts(1, 0, 1, 0) + _rotate() + _assert_counts(1, 0, 1, 0) + + # Delete old event push actions, this should not affect the (summarised) count. + self.get_success(self.store._remove_old_push_actions_that_have_rotated()) + _assert_counts(1, 0, 1, 0) + + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + + _create_event(True) + _assert_counts(1, 1, 0, 0) + _rotate() + _assert_counts(1, 1, 0, 0) + + event_id = _create_event(True, thread_id) + _assert_counts(1, 1, 1, 1) + _rotate() + _assert_counts(1, 1, 1, 1) + + # Check that adding another notification and rotating after highlight + # works. + _create_event() + _rotate() + _assert_counts(2, 1, 1, 1) + + _create_event(thread_id=thread_id) + _rotate() + _assert_counts(2, 1, 2, 1) + + # Check that sending read receipts at different points results in the + # right counts. + _mark_read(event_id) + _assert_counts(1, 0, 1, 0) + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + + _create_event(True) + _create_event(True, thread_id) + _assert_counts(1, 1, 1, 1) + _mark_read(last_event_id) + _assert_counts(0, 0, 0, 0) + _rotate() + _assert_counts(0, 0, 0, 0) + def test_find_first_stream_ordering_after_ts(self) -> None: def add_event(so: int, ts: int) -> None: self.get_success( -- cgit 1.5.1 From 0506bb100e0348ab6e6e213c6624677a83ef9303 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Tue, 4 Oct 2022 16:42:59 +0100 Subject: Remove get rooms for user with stream ordering (#13991) By getting the joined rooms before the current token we avoid any reading history to confirm a user *was* in a room. We can then use any membership change events, which we already fetch during sync, to determine the final list of joined room IDs. --- changelog.d/13991.misc | 1 + synapse/handlers/sync.py | 149 ++++++++++++++++++++++------------------------- 2 files changed, 70 insertions(+), 80 deletions(-) create mode 100644 changelog.d/13991.misc (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13991.misc b/changelog.d/13991.misc new file mode 100644 index 0000000000..f425fb17b2 --- /dev/null +++ b/changelog.d/13991.misc @@ -0,0 +1 @@ +Optimise queries used to get a users rooms during sync. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 329e89c604..0f684857ca 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1317,6 +1317,19 @@ class SyncHandler: At the end, we transfer data from the `sync_result_builder` to a new `SyncResult` instance to signify that the sync calculation is complete. """ + + user_id = sync_config.user.to_string() + app_service = self.store.get_app_service_by_user_id(user_id) + if app_service: + # We no longer support AS users using /sync directly. + # See https://github.com/matrix-org/matrix-doc/issues/1144 + raise NotImplementedError() + + # Note: we get the users room list *before* we get the current token, this + # avoids checking back in history if rooms are joined after the token is fetched. + token_before_rooms = self.event_sources.get_current_token() + mutable_joined_room_ids = set(await self.store.get_rooms_for_user(user_id)) + # NB: The now_token gets changed by some of the generate_sync_* methods, # this is due to some of the underlying streams not supporting the ability # to query up to a given point. @@ -1324,6 +1337,57 @@ class SyncHandler: now_token = self.event_sources.get_current_token() log_kv({"now_token": now_token}) + # Since we fetched the users room list before the token, there's a small window + # during which membership events may have been persisted, so we fetch these now + # and modify the joined room list for any changes between the get_rooms_for_user + # call and the get_current_token call. + membership_change_events = [] + if since_token: + membership_change_events = await self.store.get_membership_changes_for_user( + user_id, since_token.room_key, now_token.room_key, self.rooms_to_exclude + ) + + mem_last_change_by_room_id: Dict[str, EventBase] = {} + for event in membership_change_events: + mem_last_change_by_room_id[event.room_id] = event + + # For the latest membership event in each room found, add/remove the room ID + # from the joined room list accordingly. In this case we only care if the + # latest change is JOIN. + + for room_id, event in mem_last_change_by_room_id.items(): + assert event.internal_metadata.stream_ordering + if ( + event.internal_metadata.stream_ordering + < token_before_rooms.room_key.stream + ): + continue + + logger.info( + "User membership change between getting rooms and current token: %s %s %s", + user_id, + event.membership, + room_id, + ) + # User joined a room - we have to then check the room state to ensure we + # respect any bans if there's a race between the join and ban events. + if event.membership == Membership.JOIN: + user_ids_in_room = await self.store.get_users_in_room(room_id) + if user_id in user_ids_in_room: + mutable_joined_room_ids.add(room_id) + # The user left the room, or left and was re-invited but not joined yet + else: + mutable_joined_room_ids.discard(room_id) + + # Now we have our list of joined room IDs, exclude as configured and freeze + joined_room_ids = frozenset( + ( + room_id + for room_id in mutable_joined_room_ids + if room_id not in self.rooms_to_exclude + ) + ) + logger.debug( "Calculating sync response for %r between %s and %s", sync_config.user, @@ -1331,22 +1395,13 @@ class SyncHandler: now_token, ) - user_id = sync_config.user.to_string() - app_service = self.store.get_app_service_by_user_id(user_id) - if app_service: - # We no longer support AS users using /sync directly. - # See https://github.com/matrix-org/matrix-doc/issues/1144 - raise NotImplementedError() - else: - joined_room_ids = await self.get_rooms_for_user_at( - user_id, now_token.room_key - ) sync_result_builder = SyncResultBuilder( sync_config, full_state, since_token=since_token, now_token=now_token, joined_room_ids=joined_room_ids, + membership_change_events=membership_change_events, ) logger.debug("Fetching account data") @@ -1827,19 +1882,12 @@ class SyncHandler: Does not modify the `sync_result_builder`. """ - user_id = sync_result_builder.sync_config.user.to_string() since_token = sync_result_builder.since_token - now_token = sync_result_builder.now_token + membership_change_events = sync_result_builder.membership_change_events assert since_token - # Get a list of membership change events that have happened to the user - # requesting the sync. - membership_changes = await self.store.get_membership_changes_for_user( - user_id, since_token.room_key, now_token.room_key - ) - - if membership_changes: + if membership_change_events: return True stream_id = since_token.room_key.stream @@ -1878,16 +1926,10 @@ class SyncHandler: since_token = sync_result_builder.since_token now_token = sync_result_builder.now_token sync_config = sync_result_builder.sync_config + membership_change_events = sync_result_builder.membership_change_events assert since_token - # TODO: we've already called this function and ran this query in - # _have_rooms_changed. We could keep the results in memory to avoid a - # second query, at the cost of more complicated source code. - membership_change_events = await self.store.get_membership_changes_for_user( - user_id, since_token.room_key, now_token.room_key, self.rooms_to_exclude - ) - mem_change_events_by_room_id: Dict[str, List[EventBase]] = {} for event in membership_change_events: mem_change_events_by_room_id.setdefault(event.room_id, []).append(event) @@ -2415,60 +2457,6 @@ class SyncHandler: else: raise Exception("Unrecognized rtype: %r", room_builder.rtype) - async def get_rooms_for_user_at( - self, - user_id: str, - room_key: RoomStreamToken, - ) -> FrozenSet[str]: - """Get set of joined rooms for a user at the given stream ordering. - - The stream ordering *must* be recent, otherwise this may throw an - exception if older than a month. (This function is called with the - current token, which should be perfectly fine). - - Args: - user_id - stream_ordering - - ReturnValue: - Set of room_ids the user is in at given stream_ordering. - """ - joined_rooms = await self.store.get_rooms_for_user_with_stream_ordering(user_id) - - joined_room_ids = set() - - # We need to check that the stream ordering of the join for each room - # is before the stream_ordering asked for. This might not be the case - # if the user joins a room between us getting the current token and - # calling `get_rooms_for_user_with_stream_ordering`. - # If the membership's stream ordering is after the given stream - # ordering, we need to go and work out if the user was in the room - # before. - # We also need to check whether the room should be excluded from sync - # responses as per the homeserver config. - for joined_room in joined_rooms: - if joined_room.room_id in self.rooms_to_exclude: - continue - - if not joined_room.event_pos.persisted_after(room_key): - joined_room_ids.add(joined_room.room_id) - continue - - logger.info("User joined room after current token: %s", joined_room.room_id) - - extrems = ( - await self.store.get_forward_extremities_for_room_at_stream_ordering( - joined_room.room_id, joined_room.event_pos.stream - ) - ) - user_ids_in_room = await self.state.get_current_user_ids_in_room( - joined_room.room_id, extrems - ) - if user_id in user_ids_in_room: - joined_room_ids.add(joined_room.room_id) - - return frozenset(joined_room_ids) - def _action_has_highlight(actions: List[JsonDict]) -> bool: for action in actions: @@ -2565,6 +2553,7 @@ class SyncResultBuilder: since_token: Optional[StreamToken] now_token: StreamToken joined_room_ids: FrozenSet[str] + membership_change_events: List[EventBase] presence: List[UserPresenceState] = attr.Factory(list) account_data: List[JsonDict] = attr.Factory(list) -- cgit 1.5.1 From 66a785733458d0b5801097caff53624e202a91b4 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 7 Oct 2022 09:26:40 -0400 Subject: Use stable identifiers for MSC3771 & MSC3773. (#14050) These are both part of Matrix 1.4 which has now been released. For now, support both the unstable and stable identifiers. --- changelog.d/13776.feature | 2 +- changelog.d/13824.feature | 2 +- changelog.d/13877.feature | 2 +- changelog.d/13878.feature | 2 +- changelog.d/14050.feature | 1 + synapse/api/filtering.py | 13 +++++++---- synapse/config/experimental.py | 2 -- synapse/handlers/receipts.py | 11 ++++------ synapse/handlers/sync.py | 7 +----- synapse/rest/client/receipts.py | 48 ++++++++++++++++++++--------------------- synapse/rest/client/sync.py | 9 +++++--- synapse/rest/client/versions.py | 2 +- 12 files changed, 49 insertions(+), 52 deletions(-) create mode 100644 changelog.d/14050.feature (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/13776.feature b/changelog.d/13776.feature index 22bce125ce..5d0ae16e13 100644 --- a/changelog.d/13776.feature +++ b/changelog.d/13776.feature @@ -1 +1 @@ -Experimental support for thread-specific notifications ([MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). +Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/changelog.d/13824.feature b/changelog.d/13824.feature index d0cb902dff..5d0ae16e13 100644 --- a/changelog.d/13824.feature +++ b/changelog.d/13824.feature @@ -1 +1 @@ -Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)). +Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/changelog.d/13877.feature b/changelog.d/13877.feature index d0cb902dff..5d0ae16e13 100644 --- a/changelog.d/13877.feature +++ b/changelog.d/13877.feature @@ -1 +1 @@ -Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)). +Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/changelog.d/13878.feature b/changelog.d/13878.feature index d0cb902dff..5d0ae16e13 100644 --- a/changelog.d/13878.feature +++ b/changelog.d/13878.feature @@ -1 +1 @@ -Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)). +Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/changelog.d/14050.feature b/changelog.d/14050.feature new file mode 100644 index 0000000000..5d0ae16e13 --- /dev/null +++ b/changelog.d/14050.feature @@ -0,0 +1 @@ +Support for thread-specific notifications & receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771) and [MSC3773](https://github.com/matrix-org/matrix-spec-proposals/pull/3773)). diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index c6e44dcf82..cc31cf8cc7 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -84,6 +84,7 @@ ROOM_EVENT_FILTER_SCHEMA = { "contains_url": {"type": "boolean"}, "lazy_load_members": {"type": "boolean"}, "include_redundant_members": {"type": "boolean"}, + "unread_thread_notifications": {"type": "boolean"}, "org.matrix.msc3773.unread_thread_notifications": {"type": "boolean"}, # Include or exclude events with the provided labels. # cf https://github.com/matrix-org/matrix-doc/pull/2326 @@ -308,12 +309,16 @@ class Filter: self.include_redundant_members = filter_json.get( "include_redundant_members", False ) - if hs.config.experimental.msc3773_enabled: - self.unread_thread_notifications: bool = filter_json.get( + self.unread_thread_notifications: bool = filter_json.get( + "unread_thread_notifications", False + ) + if ( + not self.unread_thread_notifications + and hs.config.experimental.msc3773_enabled + ): + self.unread_thread_notifications = filter_json.get( "org.matrix.msc3773.unread_thread_notifications", False ) - else: - self.unread_thread_notifications = False self.types = filter_json.get("types", None) self.not_types = filter_json.get("not_types", []) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 6503ce6e34..c35301207a 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -95,8 +95,6 @@ class ExperimentalConfig(Config): # MSC2815 (allow room moderators to view redacted event content) self.msc2815_enabled: bool = experimental.get("msc2815_enabled", False) - # MSC3771: Thread read receipts - self.msc3771_enabled: bool = experimental.get("msc3771_enabled", False) # MSC3772: A push rule for mutual relations. self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False) # MSC3773: Thread notifications diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 4768a34c07..4a7ec9e426 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -63,8 +63,6 @@ class ReceiptsHandler: self.clock = self.hs.get_clock() self.state = hs.get_state_handler() - self._msc3771_enabled = hs.config.experimental.msc3771_enabled - async def _received_remote_receipt(self, origin: str, content: JsonDict) -> None: """Called when we receive an EDU of type m.receipt from a remote HS.""" receipts = [] @@ -96,11 +94,10 @@ class ReceiptsHandler: # Check if these receipts apply to a thread. thread_id = None data = user_values.get("data", {}) - if self._msc3771_enabled and isinstance(data, dict): - thread_id = data.get("thread_id") - # If the thread ID is invalid, consider it missing. - if not isinstance(thread_id, str): - thread_id = None + thread_id = data.get("thread_id") + # If the thread ID is invalid, consider it missing. + if not isinstance(thread_id, str): + thread_id = None receipts.append( ReadReceipt( diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0f684857ca..1db5d68021 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -279,8 +279,6 @@ class SyncHandler: self.rooms_to_exclude = hs.config.server.rooms_to_exclude_from_sync - self._msc3773_enabled = hs.config.experimental.msc3773_enabled - async def wait_for_sync_for_user( self, requester: Requester, @@ -2412,10 +2410,7 @@ class SyncHandler: unread_count = notifs.main_timeline.unread_count # Check the sync configuration. - if ( - self._msc3773_enabled - and sync_config.filter_collection.unread_thread_notifications() - ): + if sync_config.filter_collection.unread_thread_notifications(): # And add info for each thread. room_sync.unread_thread_notifications = { thread_id: { diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py index 287dfdd69e..14dec7ac4e 100644 --- a/synapse/rest/client/receipts.py +++ b/synapse/rest/client/receipts.py @@ -50,7 +50,6 @@ class ReceiptRestServlet(RestServlet): ReceiptTypes.READ_PRIVATE, ReceiptTypes.FULLY_READ, } - self._msc3771_enabled = hs.config.experimental.msc3771_enabled async def on_POST( self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str @@ -67,30 +66,29 @@ class ReceiptRestServlet(RestServlet): # Pull the thread ID, if one exists. thread_id = None - if self._msc3771_enabled: - if "thread_id" in body: - thread_id = body.get("thread_id") - if not thread_id or not isinstance(thread_id, str): - raise SynapseError( - 400, - "thread_id field must be a non-empty string", - Codes.INVALID_PARAM, - ) - - if receipt_type == ReceiptTypes.FULLY_READ: - raise SynapseError( - 400, - f"thread_id is not compatible with {ReceiptTypes.FULLY_READ} receipts.", - Codes.INVALID_PARAM, - ) - - # Ensure the event ID roughly correlates to the thread ID. - if thread_id != await self._main_store.get_thread_id(event_id): - raise SynapseError( - 400, - f"event_id {event_id} is not related to thread {thread_id}", - Codes.INVALID_PARAM, - ) + if "thread_id" in body: + thread_id = body.get("thread_id") + if not thread_id or not isinstance(thread_id, str): + raise SynapseError( + 400, + "thread_id field must be a non-empty string", + Codes.INVALID_PARAM, + ) + + if receipt_type == ReceiptTypes.FULLY_READ: + raise SynapseError( + 400, + f"thread_id is not compatible with {ReceiptTypes.FULLY_READ} receipts.", + Codes.INVALID_PARAM, + ) + + # Ensure the event ID roughly correlates to the thread ID. + if thread_id != await self._main_store.get_thread_id(event_id): + raise SynapseError( + 400, + f"event_id {event_id} is not related to thread {thread_id}", + Codes.INVALID_PARAM, + ) await self.presence_handler.bump_presence_active_time(requester.user) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index f1c23d68e5..8a16459105 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -100,6 +100,7 @@ class SyncRestServlet(RestServlet): self._server_notices_sender = hs.get_server_notices_sender() self._event_serializer = hs.get_event_client_serializer() self._msc2654_enabled = hs.config.experimental.msc2654_enabled + self._msc3773_enabled = hs.config.experimental.msc3773_enabled async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: # This will always be set by the time Twisted calls us. @@ -510,9 +511,11 @@ class SyncRestServlet(RestServlet): result["ephemeral"] = {"events": ephemeral_events} result["unread_notifications"] = room.unread_notifications if room.unread_thread_notifications: - result[ - "org.matrix.msc3773.unread_thread_notifications" - ] = room.unread_thread_notifications + result["unread_thread_notifications"] = room.unread_thread_notifications + if self._msc3773_enabled: + result[ + "org.matrix.msc3773.unread_thread_notifications" + ] = room.unread_thread_notifications result["summary"] = room.summary if self._msc2654_enabled: result["org.matrix.msc2654.unread_count"] = room.unread_count diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 18ed313b5c..d1d2e5f7e3 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -105,7 +105,7 @@ class VersionsRestServlet(RestServlet): # Adds support for thread relations, per MSC3440. "org.matrix.msc3440.stable": True, # TODO: remove when "v1.3" is added above # Support for thread read receipts & notification counts. - "org.matrix.msc3771": self.config.experimental.msc3771_enabled, + "org.matrix.msc3771": True, "org.matrix.msc3773": self.config.experimental.msc3773_enabled, # Allows moderators to fetch redacted event content as described in MSC2815 "fi.mau.msc2815": self.config.experimental.msc2815_enabled, -- cgit 1.5.1 From 1eed795fc56d95df3968e37f3a4db92f24513e15 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 15 Nov 2022 17:35:19 +0000 Subject: Include heroes in partial join responses' state (#14442) * Pull out hero selection logic * Include heroes in partial join response's state * Changelog * Fixup trial test * Remove TODO --- changelog.d/14442.feature | 1 + synapse/federation/federation_server.py | 23 +++++++++++++++++---- synapse/handlers/sync.py | 20 +++---------------- synapse/storage/databases/main/roommember.py | 30 ++++++++++++++++++++++++++++ tests/federation/test_federation_server.py | 11 ++++++---- 5 files changed, 60 insertions(+), 25 deletions(-) create mode 100644 changelog.d/14442.feature (limited to 'synapse/handlers/sync.py') diff --git a/changelog.d/14442.feature b/changelog.d/14442.feature new file mode 100644 index 0000000000..917e7edfb3 --- /dev/null +++ b/changelog.d/14442.feature @@ -0,0 +1 @@ +Faster joins: include heroes' membership events in the partial join response, for rooms without a name or canonical alias. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 59e351595b..bb20af6e91 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -74,6 +74,8 @@ from synapse.replication.http.federation import ( ) from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.lock import Lock +from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary +from synapse.storage.roommember import MemberSummary from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util import json_decoder, unwrapFirstError from synapse.util.async_helpers import Linearizer, concurrently_execute, gather_results @@ -691,8 +693,9 @@ class FederationServer(FederationBase): state_event_ids: Collection[str] servers_in_room: Optional[Collection[str]] if caller_supports_partial_state: + summary = await self.store.get_room_summary(room_id) state_event_ids = _get_event_ids_for_partial_state_join( - event, prev_state_ids + event, prev_state_ids, summary ) servers_in_room = await self.state.get_hosts_in_room_at_events( room_id, event_ids=event.prev_event_ids() @@ -1495,6 +1498,7 @@ class FederationHandlerRegistry: def _get_event_ids_for_partial_state_join( join_event: EventBase, prev_state_ids: StateMap[str], + summary: Dict[str, MemberSummary], ) -> Collection[str]: """Calculate state to be retuned in a partial_state send_join @@ -1521,8 +1525,19 @@ def _get_event_ids_for_partial_state_join( if current_membership_event_id is not None: state_event_ids.add(current_membership_event_id) - # TODO: return a few more members: - # - those with invites - # - those that are kicked? / banned + name_id = prev_state_ids.get((EventTypes.Name, "")) + canonical_alias_id = prev_state_ids.get((EventTypes.CanonicalAlias, "")) + if not name_id and not canonical_alias_id: + # Also include the hero members of the room (for DM rooms without a title). + # To do this properly, we should select the correct subset of membership events + # from `prev_state_ids`. Instead, we are lazier and use the (cached) + # `get_room_summary` function, which is based on the current state of the room. + # This introduces races; we choose to ignore them because a) they should be rare + # and b) even if it's wrong, joining servers will get the full state eventually. + heroes = extract_heroes_from_room_summary(summary, join_event.state_key) + for hero in heroes: + membership_event_id = prev_state_ids.get((EventTypes.Member, hero)) + if membership_event_id: + state_event_ids.add(membership_event_id) return state_event_ids diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 1db5d68021..259456b55d 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -41,6 +41,7 @@ from synapse.logging.context import current_context from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, start_active_span from synapse.push.clientformat import format_push_rules_for_user from synapse.storage.databases.main.event_push_actions import RoomNotifCounts +from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.roommember import MemberSummary from synapse.storage.state import StateFilter from synapse.types import ( @@ -805,18 +806,6 @@ class SyncHandler: if canonical_alias and canonical_alias.content.get("alias"): return summary - me = sync_config.user.to_string() - - joined_user_ids = [ - r[0] for r in details.get(Membership.JOIN, empty_ms).members if r[0] != me - ] - invited_user_ids = [ - r[0] for r in details.get(Membership.INVITE, empty_ms).members if r[0] != me - ] - gone_user_ids = [ - r[0] for r in details.get(Membership.LEAVE, empty_ms).members if r[0] != me - ] + [r[0] for r in details.get(Membership.BAN, empty_ms).members if r[0] != me] - # FIXME: only build up a member_ids list for our heroes member_ids = {} for membership in ( @@ -828,11 +817,8 @@ class SyncHandler: for user_id, event_id in details.get(membership, empty_ms).members: member_ids[user_id] = event_id - # FIXME: order by stream ordering rather than as returned by SQL - if joined_user_ids or invited_user_ids: - summary["m.heroes"] = sorted(joined_user_ids + invited_user_ids)[0:5] - else: - summary["m.heroes"] = sorted(gone_user_ids)[0:5] + me = sync_config.user.to_string() + summary["m.heroes"] = extract_heroes_from_room_summary(details, me) if not sync_config.filter_collection.lazy_load_members(): return summary diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index e56a13f21e..f02c1d7ea7 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1517,6 +1517,36 @@ class RoomMemberStore( await self.db_pool.runInteraction("forget_membership", f) +def extract_heroes_from_room_summary( + details: Mapping[str, MemberSummary], me: str +) -> List[str]: + """Determine the users that represent a room, from the perspective of the `me` user. + + The rules which say which users we select are specified in the "Room Summary" + section of + https://spec.matrix.org/v1.4/client-server-api/#get_matrixclientv3sync + + Returns a list (possibly empty) of heroes' mxids. + """ + empty_ms = MemberSummary([], 0) + + joined_user_ids = [ + r[0] for r in details.get(Membership.JOIN, empty_ms).members if r[0] != me + ] + invited_user_ids = [ + r[0] for r in details.get(Membership.INVITE, empty_ms).members if r[0] != me + ] + gone_user_ids = [ + r[0] for r in details.get(Membership.LEAVE, empty_ms).members if r[0] != me + ] + [r[0] for r in details.get(Membership.BAN, empty_ms).members if r[0] != me] + + # FIXME: order by stream ordering rather than as returned by SQL + if joined_user_ids or invited_user_ids: + return sorted(joined_user_ids + invited_user_ids)[0:5] + else: + return sorted(gone_user_ids)[0:5] + + @attr.s(slots=True, auto_attribs=True) class _JoinedHostsCache: """The cached data used by the `_get_joined_hosts_cache`.""" diff --git a/tests/federation/test_federation_server.py b/tests/federation/test_federation_server.py index 3a6ef221ae..177e5b5afc 100644 --- a/tests/federation/test_federation_server.py +++ b/tests/federation/test_federation_server.py @@ -212,7 +212,7 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase): self.assertEqual(r[("m.room.member", joining_user)].membership, "join") @override_config({"experimental_features": {"msc3706_enabled": True}}) - def test_send_join_partial_state(self): + def test_send_join_partial_state(self) -> None: """When MSC3706 support is enabled, /send_join should return partial state""" joining_user = "@misspiggy:" + self.OTHER_SERVER_NAME join_result = self._make_join(joining_user) @@ -240,6 +240,9 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase): ("m.room.power_levels", ""), ("m.room.join_rules", ""), ("m.room.history_visibility", ""), + # Users included here because they're heroes. + ("m.room.member", "@kermit:test"), + ("m.room.member", "@fozzie:test"), ], ) @@ -249,9 +252,9 @@ class SendJoinFederationTests(unittest.FederatingHomeserverTestCase): ] self.assertCountEqual( returned_auth_chain_events, - [ - ("m.room.member", "@kermit:test"), - ], + # TODO: change the test so that we get at least one event in the auth chain + # here. + [], ) # the room should show that the new user is a member -- cgit 1.5.1