From dd2d32dcdb3238735aeeeaff18e5c754b1d50be9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 28 Apr 2021 11:07:47 +0100 Subject: Add type hints to presence handler (#9885) --- synapse/handlers/presence.py | 159 ++++++++++++++++++++++++------------------- 1 file changed, 89 insertions(+), 70 deletions(-) (limited to 'synapse/handlers/presence.py') diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 969c73c1e7..e9f618bb5a 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -28,6 +28,7 @@ from bisect import bisect from contextlib import contextmanager from typing import ( TYPE_CHECKING, + Callable, Collection, Dict, FrozenSet, @@ -232,23 +233,23 @@ class BasePresenceHandler(abc.ABC): """ async def update_external_syncs_row( - self, process_id, user_id, is_syncing, sync_time_msec - ): + self, process_id: str, user_id: str, is_syncing: bool, sync_time_msec: int + ) -> None: """Update the syncing users for an external process as a delta. This is a no-op when presence is handled by a different worker. Args: - process_id (str): An identifier for the process the users are + process_id: An identifier for the process the users are syncing against. This allows synapse to process updates as user start and stop syncing against a given process. - user_id (str): The user who has started or stopped syncing - is_syncing (bool): Whether or not the user is now syncing - sync_time_msec(int): Time in ms when the user was last syncing + user_id: The user who has started or stopped syncing + is_syncing: Whether or not the user is now syncing + sync_time_msec: Time in ms when the user was last syncing """ pass - async def update_external_syncs_clear(self, process_id): + async def update_external_syncs_clear(self, process_id: str) -> None: """Marks all users that had been marked as syncing by a given process as offline. @@ -304,7 +305,7 @@ class _NullContextManager(ContextManager[None]): class WorkerPresenceHandler(BasePresenceHandler): - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): super().__init__(hs) self.hs = hs @@ -327,7 +328,7 @@ class WorkerPresenceHandler(BasePresenceHandler): # user_id -> last_sync_ms. Lists the users that have stopped syncing but # we haven't notified the presence writer of that yet - self.users_going_offline = {} + self.users_going_offline = {} # type: Dict[str, int] self._bump_active_client = ReplicationBumpPresenceActiveTime.make_client(hs) self._set_state_client = ReplicationPresenceSetState.make_client(hs) @@ -346,24 +347,21 @@ class WorkerPresenceHandler(BasePresenceHandler): self._on_shutdown, ) - def _on_shutdown(self): + def _on_shutdown(self) -> None: if self._presence_enabled: self.hs.get_tcp_replication().send_command( ClearUserSyncsCommand(self.instance_id) ) - def send_user_sync(self, user_id, is_syncing, last_sync_ms): + def send_user_sync(self, user_id: str, is_syncing: bool, last_sync_ms: int) -> None: if self._presence_enabled: self.hs.get_tcp_replication().send_user_sync( self.instance_id, user_id, is_syncing, last_sync_ms ) - def mark_as_coming_online(self, user_id): + def mark_as_coming_online(self, user_id: str) -> None: """A user has started syncing. Send a UserSync to the presence writer, unless they had recently stopped syncing. - - Args: - user_id (str) """ going_offline = self.users_going_offline.pop(user_id, None) if not going_offline: @@ -371,18 +369,15 @@ class WorkerPresenceHandler(BasePresenceHandler): # were offline self.send_user_sync(user_id, True, self.clock.time_msec()) - def mark_as_going_offline(self, user_id): + def mark_as_going_offline(self, user_id: str) -> None: """A user has stopped syncing. We wait before notifying the presence writer as its likely they'll come back soon. This allows us to avoid sending a stopped syncing immediately followed by a started syncing notification to the presence writer - - Args: - user_id (str) """ self.users_going_offline[user_id] = self.clock.time_msec() - def send_stop_syncing(self): + def send_stop_syncing(self) -> None: """Check if there are any users who have stopped syncing a while ago and haven't come back yet. If there are poke the presence writer about them. """ @@ -430,7 +425,9 @@ class WorkerPresenceHandler(BasePresenceHandler): return _user_syncing() - async def notify_from_replication(self, states, stream_id): + async def notify_from_replication( + self, states: List[UserPresenceState], stream_id: int + ) -> None: parties = await get_interested_parties(self.store, self.presence_router, states) room_ids_to_states, users_to_states = parties @@ -478,7 +475,12 @@ class WorkerPresenceHandler(BasePresenceHandler): if count > 0 ] - async def set_state(self, target_user, state, ignore_status_msg=False): + async def set_state( + self, + target_user: UserID, + state: JsonDict, + ignore_status_msg: bool = False, + ) -> None: """Set the presence state of the user.""" presence = state["presence"] @@ -508,7 +510,7 @@ class WorkerPresenceHandler(BasePresenceHandler): ignore_status_msg=ignore_status_msg, ) - async def bump_presence_active_time(self, user): + async def bump_presence_active_time(self, user: UserID) -> None: """We've seen the user do something that indicates they're interacting with the app. """ @@ -592,8 +594,8 @@ class PresenceHandler(BasePresenceHandler): # we assume that all the sync requests on that process have stopped. # Stored as a dict from process_id to set of user_id, and a dict of # process_id to millisecond timestamp last updated. - self.external_process_to_current_syncs = {} # type: Dict[int, Set[str]] - self.external_process_last_updated_ms = {} # type: Dict[int, int] + self.external_process_to_current_syncs = {} # type: Dict[str, Set[str]] + self.external_process_last_updated_ms = {} # type: Dict[str, int] self.external_sync_linearizer = Linearizer(name="external_sync_linearizer") @@ -633,7 +635,7 @@ class PresenceHandler(BasePresenceHandler): self._event_pos = self.store.get_current_events_token() self._event_processing = False - async def _on_shutdown(self): + async def _on_shutdown(self) -> None: """Gets called when shutting down. This lets us persist any updates that we haven't yet persisted, e.g. updates that only changes some internal timers. This allows changes to persist across startup without having to @@ -662,7 +664,7 @@ class PresenceHandler(BasePresenceHandler): ) logger.info("Finished _on_shutdown") - async def _persist_unpersisted_changes(self): + async def _persist_unpersisted_changes(self) -> None: """We periodically persist the unpersisted changes, as otherwise they may stack up and slow down shutdown times. """ @@ -762,7 +764,7 @@ class PresenceHandler(BasePresenceHandler): states, destinations ) - async def _handle_timeouts(self): + async def _handle_timeouts(self) -> None: """Checks the presence of users that have timed out and updates as appropriate. """ @@ -814,7 +816,7 @@ class PresenceHandler(BasePresenceHandler): return await self._update_states(changes) - async def bump_presence_active_time(self, user): + async def bump_presence_active_time(self, user: UserID) -> None: """We've seen the user do something that indicates they're interacting with the app. """ @@ -911,17 +913,17 @@ class PresenceHandler(BasePresenceHandler): return [] async def update_external_syncs_row( - self, process_id, user_id, is_syncing, sync_time_msec - ): + self, process_id: str, user_id: str, is_syncing: bool, sync_time_msec: int + ) -> None: """Update the syncing users for an external process as a delta. Args: - process_id (str): An identifier for the process the users are + process_id: An identifier for the process the users are syncing against. This allows synapse to process updates as user start and stop syncing against a given process. - user_id (str): The user who has started or stopped syncing - is_syncing (bool): Whether or not the user is now syncing - sync_time_msec(int): Time in ms when the user was last syncing + user_id: The user who has started or stopped syncing + is_syncing: Whether or not the user is now syncing + sync_time_msec: Time in ms when the user was last syncing """ with (await self.external_sync_linearizer.queue(process_id)): prev_state = await self.current_state_for_user(user_id) @@ -958,7 +960,7 @@ class PresenceHandler(BasePresenceHandler): self.external_process_last_updated_ms[process_id] = self.clock.time_msec() - async def update_external_syncs_clear(self, process_id): + async def update_external_syncs_clear(self, process_id: str) -> None: """Marks all users that had been marked as syncing by a given process as offline. @@ -979,12 +981,12 @@ class PresenceHandler(BasePresenceHandler): ) self.external_process_last_updated_ms.pop(process_id, None) - async def current_state_for_user(self, user_id): + async def current_state_for_user(self, user_id: str) -> UserPresenceState: """Get the current presence state for a user.""" res = await self.current_state_for_users([user_id]) return res[user_id] - async def _persist_and_notify(self, states): + async def _persist_and_notify(self, states: List[UserPresenceState]) -> None: """Persist states in the database, poke the notifier and send to interested remote servers """ @@ -1005,7 +1007,7 @@ class PresenceHandler(BasePresenceHandler): # stream (which is updated by `store.update_presence`). await self.maybe_send_presence_to_interested_destinations(states) - async def incoming_presence(self, origin, content): + async def incoming_presence(self, origin: str, content: JsonDict) -> None: """Called when we receive a `m.presence` EDU from a remote server.""" if not self._presence_enabled: return @@ -1055,7 +1057,9 @@ class PresenceHandler(BasePresenceHandler): federation_presence_counter.inc(len(updates)) await self._update_states(updates) - async def set_state(self, target_user, state, ignore_status_msg=False): + async def set_state( + self, target_user: UserID, state: JsonDict, ignore_status_msg: bool = False + ) -> None: """Set the presence state of the user.""" status_msg = state.get("status_msg", None) presence = state["presence"] @@ -1089,7 +1093,7 @@ class PresenceHandler(BasePresenceHandler): await self._update_states([prev_state.copy_and_replace(**new_fields)]) - async def is_visible(self, observed_user, observer_user): + async def is_visible(self, observed_user: UserID, observer_user: UserID) -> bool: """Returns whether a user can see another user's presence.""" observer_room_ids = await self.store.get_rooms_for_user( observer_user.to_string() @@ -1144,7 +1148,7 @@ class PresenceHandler(BasePresenceHandler): ) return rows - def notify_new_event(self): + def notify_new_event(self) -> None: """Called when new events have happened. Handles users and servers joining rooms and require being sent presence. """ @@ -1163,7 +1167,7 @@ class PresenceHandler(BasePresenceHandler): run_as_background_process("presence.notify_new_event", _process_presence) - async def _unsafe_process(self): + async def _unsafe_process(self) -> None: # Loop round handling deltas until we're up to date while True: with Measure(self.clock, "presence_delta"): @@ -1188,7 +1192,7 @@ class PresenceHandler(BasePresenceHandler): max_pos ) - async def _handle_state_delta(self, deltas): + async def _handle_state_delta(self, deltas: List[JsonDict]) -> None: """Process current state deltas to find new joins that need to be handled. """ @@ -1311,7 +1315,7 @@ class PresenceHandler(BasePresenceHandler): return [remote_host], states -def should_notify(old_state, new_state): +def should_notify(old_state: UserPresenceState, new_state: UserPresenceState) -> bool: """Decides if a presence state change should be sent to interested parties.""" if old_state == new_state: return False @@ -1347,7 +1351,9 @@ def should_notify(old_state, new_state): return False -def format_user_presence_state(state, now, include_user_id=True): +def format_user_presence_state( + state: UserPresenceState, now: int, include_user_id: bool = True +) -> JsonDict: """Convert UserPresenceState to a format that can be sent down to clients and to other servers. @@ -1385,11 +1391,11 @@ class PresenceEventSource: @log_function async def get_new_events( self, - user, - from_key, - room_ids=None, - include_offline=True, - explicit_room_id=None, + user: UserID, + from_key: Optional[int], + room_ids: Optional[List[str]] = None, + include_offline: bool = True, + explicit_room_id: Optional[str] = None, **kwargs, ) -> Tuple[List[UserPresenceState], int]: # The process for getting presence events are: @@ -1594,7 +1600,7 @@ class PresenceEventSource: if update.state != PresenceState.OFFLINE ] - def get_current_key(self): + def get_current_key(self) -> int: return self.store.get_current_presence_token() @cached(num_args=2, cache_context=True) @@ -1654,15 +1660,20 @@ class PresenceEventSource: return users_interested_in -def handle_timeouts(user_states, is_mine_fn, syncing_user_ids, now): +def handle_timeouts( + user_states: List[UserPresenceState], + is_mine_fn: Callable[[str], bool], + syncing_user_ids: Set[str], + now: int, +) -> List[UserPresenceState]: """Checks the presence of users that have timed out and updates as appropriate. Args: - user_states(list): List of UserPresenceState's to check. - is_mine_fn (fn): Function that returns if a user_id is ours - syncing_user_ids (set): Set of user_ids with active syncs. - now (int): Current time in ms. + user_states: List of UserPresenceState's to check. + is_mine_fn: Function that returns if a user_id is ours + syncing_user_ids: Set of user_ids with active syncs. + now: Current time in ms. Returns: List of UserPresenceState updates @@ -1679,14 +1690,16 @@ def handle_timeouts(user_states, is_mine_fn, syncing_user_ids, now): return list(changes.values()) -def handle_timeout(state, is_mine, syncing_user_ids, now): +def handle_timeout( + state: UserPresenceState, is_mine: bool, syncing_user_ids: Set[str], now: int +) -> Optional[UserPresenceState]: """Checks the presence of the user to see if any of the timers have elapsed Args: - state (UserPresenceState) - is_mine (bool): Whether the user is ours - syncing_user_ids (set): Set of user_ids with active syncs. - now (int): Current time in ms. + state + is_mine: Whether the user is ours + syncing_user_ids: Set of user_ids with active syncs. + now: Current time in ms. Returns: A UserPresenceState update or None if no update. @@ -1738,23 +1751,29 @@ def handle_timeout(state, is_mine, syncing_user_ids, now): return state if changed else None -def handle_update(prev_state, new_state, is_mine, wheel_timer, now): +def handle_update( + prev_state: UserPresenceState, + new_state: UserPresenceState, + is_mine: bool, + wheel_timer: WheelTimer, + now: int, +) -> Tuple[UserPresenceState, bool, bool]: """Given a presence update: 1. Add any appropriate timers. 2. Check if we should notify anyone. Args: - prev_state (UserPresenceState) - new_state (UserPresenceState) - is_mine (bool): Whether the user is ours - wheel_timer (WheelTimer) - now (int): Time now in ms + prev_state + new_state + is_mine: Whether the user is ours + wheel_timer + now: Time now in ms Returns: 3-tuple: `(new_state, persist_and_notify, federation_ping)` where: - new_state: is the state to actually persist - - persist_and_notify (bool): whether to persist and notify people - - federation_ping (bool): whether we should send a ping over federation + - persist_and_notify: whether to persist and notify people + - federation_ping: whether we should send a ping over federation """ user_id = new_state.user_id -- cgit 1.5.1 From d0aee697ac0587c005bc1048f5036979331f1101 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 May 2021 16:49:34 +0100 Subject: Use get_current_users_in_room from store and not StateHandler (#9910) --- changelog.d/9910.bugfix | 1 + changelog.d/9910.feature | 1 + synapse/handlers/directory.py | 4 ++-- synapse/handlers/events.py | 2 +- synapse/handlers/message.py | 2 +- synapse/handlers/presence.py | 2 +- synapse/handlers/room.py | 2 +- synapse/handlers/sync.py | 6 +++--- synapse/state/__init__.py | 10 +++++++--- synapse/storage/_base.py | 1 + synapse/storage/databases/main/roommember.py | 8 ++++++-- synapse/storage/databases/main/user_directory.py | 4 +--- 12 files changed, 26 insertions(+), 17 deletions(-) create mode 100644 changelog.d/9910.bugfix create mode 100644 changelog.d/9910.feature (limited to 'synapse/handlers/presence.py') diff --git a/changelog.d/9910.bugfix b/changelog.d/9910.bugfix new file mode 100644 index 0000000000..06d523fd46 --- /dev/null +++ b/changelog.d/9910.bugfix @@ -0,0 +1 @@ +Fix bug where user directory could get out of sync if room visibility and membership changed in quick succession. diff --git a/changelog.d/9910.feature b/changelog.d/9910.feature new file mode 100644 index 0000000000..54165cce18 --- /dev/null +++ b/changelog.d/9910.feature @@ -0,0 +1 @@ +Improve performance after joining a large room when presence is enabled. diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index de1b14cde3..4064a2b859 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -78,7 +78,7 @@ class DirectoryHandler(BaseHandler): # TODO(erikj): Add transactions. # TODO(erikj): Check if there is a current association. if not servers: - users = await self.state.get_current_users_in_room(room_id) + users = await self.store.get_users_in_room(room_id) servers = {get_domain_from_id(u) for u in users} if not servers: @@ -270,7 +270,7 @@ class DirectoryHandler(BaseHandler): Codes.NOT_FOUND, ) - users = await self.state.get_current_users_in_room(room_id) + users = await self.store.get_users_in_room(room_id) extra_servers = {get_domain_from_id(u) for u in users} servers = set(extra_servers) | set(servers) diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index d82144d7fa..f134f1e234 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -103,7 +103,7 @@ class EventStreamHandler(BaseHandler): # Send down presence. if event.state_key == auth_user_id: # Send down presence for everyone in the room. - users = await self.state.get_current_users_in_room( + users = await self.store.get_users_in_room( event.room_id ) # type: Iterable[str] else: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 49f8aa25ea..393f17c3a3 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -258,7 +258,7 @@ class MessageHandler: "Getting joined members after leaving is not implemented" ) - users_with_profile = await self.state.get_current_users_in_room(room_id) + users_with_profile = await self.store.get_users_in_room_with_profiles(room_id) # If this is an AS, double check that they are allowed to see the members. # This can either be because the AS user is in the room or because there diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index ebbc234334..8e085dfbec 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -1293,7 +1293,7 @@ class PresenceHandler(BasePresenceHandler): remote_host = get_domain_from_id(user_id) - users = await self.state.get_current_users_in_room(room_id) + users = await self.store.get_users_in_room(room_id) user_ids = list(filter(self.is_mine_id, users)) states_d = await self.current_state_for_users(user_ids) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 5a888b7941..fb4823a5cc 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1327,7 +1327,7 @@ class RoomShutdownHandler: new_room_id = None logger.info("Shutting down room %r", room_id) - users = await self.state.get_current_users_in_room(room_id) + users = await self.store.get_users_in_room(room_id) kicked_users = [] failed_to_kick_users = [] for user_id in users: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index a9a3ee05c3..0fcc1532da 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1190,7 +1190,7 @@ class SyncHandler: # Step 1b, check for newly joined rooms for room_id in newly_joined_rooms: - joined_users = await self.state.get_current_users_in_room(room_id) + joined_users = await self.store.get_users_in_room(room_id) newly_joined_or_invited_users.update(joined_users) # TODO: Check that these users are actually new, i.e. either they @@ -1206,7 +1206,7 @@ class SyncHandler: # Now find users that we no longer track for room_id in newly_left_rooms: - left_users = await self.state.get_current_users_in_room(room_id) + left_users = await self.store.get_users_in_room(room_id) newly_left_users.update(left_users) # Remove any users that we still share a room with. @@ -1361,7 +1361,7 @@ class SyncHandler: extra_users_ids = set(newly_joined_or_invited_users) for room_id in newly_joined_rooms: - users = await self.state.get_current_users_in_room(room_id) + users = await self.store.get_users_in_room(room_id) extra_users_ids.update(users) extra_users_ids.discard(user.to_string()) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index b3bd92d37c..a1770f620e 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -213,19 +213,23 @@ class StateHandler: return ret.state async def get_current_users_in_room( - self, room_id: str, latest_event_ids: Optional[List[str]] = None + self, room_id: str, latest_event_ids: List[str] ) -> Dict[str, ProfileInfo]: """ Get the users who are currently in a room. + Note: This is much slower than using the equivalent method + `DataStore.get_users_in_room` or `DataStore.get_users_in_room_with_profiles`, + so this should only be used when wanting the users at a particular point + in the room. + Args: room_id: The ID of the room. latest_event_ids: Precomputed list of latest event IDs. Will be computed if None. Returns: Dictionary of user IDs to their profileinfo. """ - if not latest_event_ids: - latest_event_ids = await self.store.get_latest_event_ids_in_room(room_id) + assert latest_event_ids is not None logger.debug("calling resolve_state_groups from get_current_users_in_room") diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 6b68d8720c..3d98d3f5f8 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -69,6 +69,7 @@ class SQLBaseStore(metaclass=ABCMeta): self._attempt_to_invalidate_cache("is_host_joined", (room_id, host)) self._attempt_to_invalidate_cache("get_users_in_room", (room_id,)) + self._attempt_to_invalidate_cache("get_users_in_room_with_profiles", (room_id,)) self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) self._attempt_to_invalidate_cache("get_current_state_ids", (room_id,)) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 2a8532f8c1..5fc3bb5a7d 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -205,8 +205,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_users_in_room_with_profiles(txn) -> Dict[str, ProfileInfo]: sql = """ - SELECT user_id, display_name, avatar_url FROM room_memberships - WHERE room_id = ? AND membership = ? + SELECT state_key, display_name, avatar_url FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? """ txn.execute(sql, (room_id, Membership.JOIN)) diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 7a082fdd21..a6bfb4902a 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -142,8 +142,6 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): batch_size (int): Maximum number of state events to process per cycle. """ - state = self.hs.get_state_handler() - # If we don't have progress filed, delete everything. if not progress: await self.delete_all_from_user_dir() @@ -197,7 +195,7 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): room_id ) - users_with_profile = await state.get_current_users_in_room(room_id) + users_with_profile = await self.get_users_in_room_with_profiles(room_id) user_ids = set(users_with_profile) # Update each user in the user directory. -- cgit 1.5.1 From 37623e33822d4e032ba6d1f523fb09b12fe27aab Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 May 2021 17:27:05 +0100 Subject: Increase perf of handling presence when joining large rooms. (#9916) --- changelog.d/9916.feature | 1 + synapse/handlers/presence.py | 154 +++++++++++++++++++++------------------- tests/handlers/test_presence.py | 14 ++-- 3 files changed, 87 insertions(+), 82 deletions(-) create mode 100644 changelog.d/9916.feature (limited to 'synapse/handlers/presence.py') diff --git a/changelog.d/9916.feature b/changelog.d/9916.feature new file mode 100644 index 0000000000..54165cce18 --- /dev/null +++ b/changelog.d/9916.feature @@ -0,0 +1 @@ +Improve performance after joining a large room when presence is enabled. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 8e085dfbec..6fd1f34289 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -1183,7 +1183,16 @@ class PresenceHandler(BasePresenceHandler): max_pos, deltas = await self.store.get_current_state_deltas( self._event_pos, room_max_stream_ordering ) - await self._handle_state_delta(deltas) + + # We may get multiple deltas for different rooms, but we want to + # handle them on a room by room basis, so we batch them up by + # room. + deltas_by_room: Dict[str, List[JsonDict]] = {} + for delta in deltas: + deltas_by_room.setdefault(delta["room_id"], []).append(delta) + + for room_id, deltas_for_room in deltas_by_room.items(): + await self._handle_state_delta(room_id, deltas_for_room) self._event_pos = max_pos @@ -1192,17 +1201,21 @@ class PresenceHandler(BasePresenceHandler): max_pos ) - async def _handle_state_delta(self, deltas: List[JsonDict]) -> None: - """Process current state deltas to find new joins that need to be - handled. + async def _handle_state_delta(self, room_id: str, deltas: List[JsonDict]) -> None: + """Process current state deltas for the room to find new joins that need + to be handled. """ - # A map of destination to a set of user state that they should receive - presence_destinations = {} # type: Dict[str, Set[UserPresenceState]] + + # Sets of newly joined users. Note that if the local server is + # joining a remote room for the first time we'll see both the joining + # user and all remote users as newly joined. + newly_joined_users = set() for delta in deltas: + assert room_id == delta["room_id"] + typ = delta["type"] state_key = delta["state_key"] - room_id = delta["room_id"] event_id = delta["event_id"] prev_event_id = delta["prev_event_id"] @@ -1231,72 +1244,55 @@ class PresenceHandler(BasePresenceHandler): # Ignore changes to join events. continue - # Retrieve any user presence state updates that need to be sent as a result, - # and the destinations that need to receive it - destinations, user_presence_states = await self._on_user_joined_room( - room_id, state_key - ) - - # Insert the destinations and respective updates into our destinations dict - for destination in destinations: - presence_destinations.setdefault(destination, set()).update( - user_presence_states - ) - - # Send out user presence updates for each destination - for destination, user_state_set in presence_destinations.items(): - self._federation_queue.send_presence_to_destinations( - destinations=[destination], states=user_state_set - ) - - async def _on_user_joined_room( - self, room_id: str, user_id: str - ) -> Tuple[List[str], List[UserPresenceState]]: - """Called when we detect a user joining the room via the current state - delta stream. Returns the destinations that need to be updated and the - presence updates to send to them. - - Args: - room_id: The ID of the room that the user has joined. - user_id: The ID of the user that has joined the room. - - Returns: - A tuple of destinations and presence updates to send to them. - """ - if self.is_mine_id(user_id): - # If this is a local user then we need to send their presence - # out to hosts in the room (who don't already have it) - - # TODO: We should be able to filter the hosts down to those that - # haven't previously seen the user - - remote_hosts = await self.state.get_current_hosts_in_room(room_id) + newly_joined_users.add(state_key) - # Filter out ourselves. - filtered_remote_hosts = [ - host for host in remote_hosts if host != self.server_name - ] - - state = await self.current_state_for_user(user_id) - return filtered_remote_hosts, [state] - else: - # A remote user has joined the room, so we need to: - # 1. Check if this is a new server in the room - # 2. If so send any presence they don't already have for - # local users in the room. - - # TODO: We should be able to filter the users down to those that - # the server hasn't previously seen - - # TODO: Check that this is actually a new server joining the - # room. - - remote_host = get_domain_from_id(user_id) + if not newly_joined_users: + # If nobody has joined then there's nothing to do. + return - users = await self.store.get_users_in_room(room_id) - user_ids = list(filter(self.is_mine_id, users)) + # We want to send: + # 1. presence states of all local users in the room to newly joined + # remote servers + # 2. presence states of newly joined users to all remote servers in + # the room. + # + # TODO: Only send presence states to remote hosts that don't already + # have them (because they already share rooms). + + # Get all the users who were already in the room, by fetching the + # current users in the room and removing the newly joined users. + users = await self.store.get_users_in_room(room_id) + prev_users = set(users) - newly_joined_users + + # Construct sets for all the local users and remote hosts that were + # already in the room + prev_local_users = [] + prev_remote_hosts = set() + for user_id in prev_users: + if self.is_mine_id(user_id): + prev_local_users.append(user_id) + else: + prev_remote_hosts.add(get_domain_from_id(user_id)) + + # Similarly, construct sets for all the local users and remote hosts + # that were *not* already in the room. Care needs to be taken with the + # calculating the remote hosts, as a host may have already been in the + # room even if there is a newly joined user from that host. + newly_joined_local_users = [] + newly_joined_remote_hosts = set() + for user_id in newly_joined_users: + if self.is_mine_id(user_id): + newly_joined_local_users.append(user_id) + else: + host = get_domain_from_id(user_id) + if host not in prev_remote_hosts: + newly_joined_remote_hosts.add(host) - states_d = await self.current_state_for_users(user_ids) + # Send presence states of all local users in the room to newly joined + # remote servers. (We actually only send states for local users already + # in the room, as we'll send states for newly joined local users below.) + if prev_local_users and newly_joined_remote_hosts: + local_states = await self.current_state_for_users(prev_local_users) # Filter out old presence, i.e. offline presence states where # the user hasn't been active for a week. We can change this @@ -1306,13 +1302,27 @@ class PresenceHandler(BasePresenceHandler): now = self.clock.time_msec() states = [ state - for state in states_d.values() + for state in local_states.values() if state.state != PresenceState.OFFLINE or now - state.last_active_ts < 7 * 24 * 60 * 60 * 1000 or state.status_msg is not None ] - return [remote_host], states + self._federation_queue.send_presence_to_destinations( + destinations=newly_joined_remote_hosts, + states=states, + ) + + # Send presence states of newly joined users to all remote servers in + # the room + if newly_joined_local_users and ( + prev_remote_hosts or newly_joined_remote_hosts + ): + local_states = await self.current_state_for_users(newly_joined_local_users) + self._federation_queue.send_presence_to_destinations( + destinations=prev_remote_hosts | newly_joined_remote_hosts, + states=list(local_states.values()), + ) def should_notify(old_state: UserPresenceState, new_state: UserPresenceState) -> bool: diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index ce330e79cc..1ffab709fc 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -729,7 +729,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase): ) self.assertEqual(expected_state.state, PresenceState.ONLINE) self.federation_sender.send_presence_to_destinations.assert_called_once_with( - destinations=["server2"], states={expected_state} + destinations={"server2"}, states=[expected_state] ) # @@ -740,7 +740,7 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase): self._add_new_user(room_id, "@bob:server3") self.federation_sender.send_presence_to_destinations.assert_called_once_with( - destinations=["server3"], states={expected_state} + destinations={"server3"}, states=[expected_state] ) def test_remote_gets_presence_when_local_user_joins(self): @@ -788,14 +788,8 @@ class PresenceJoinTestCase(unittest.HomeserverTestCase): self.presence_handler.current_state_for_user("@test2:server") ) self.assertEqual(expected_state.state, PresenceState.ONLINE) - self.assertEqual( - self.federation_sender.send_presence_to_destinations.call_count, 2 - ) - self.federation_sender.send_presence_to_destinations.assert_any_call( - destinations=["server3"], states={expected_state} - ) - self.federation_sender.send_presence_to_destinations.assert_any_call( - destinations=["server2"], states={expected_state} + self.federation_sender.send_presence_to_destinations.assert_called_once_with( + destinations={"server2", "server3"}, states=[expected_state] ) def _add_new_user(self, room_id, user_id): -- cgit 1.5.1