From 29574fd5b3537cc272a4d792669b8d5be2a92b6f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 10 Apr 2017 16:48:30 +0100 Subject: Reduce federation presence replication traffic This is mainly done by moving the calculation of where to send presence updates from the presence handler to the transaction queue, so we only need to send the presence event (and not the destinations) across the replication connection. Before we were duplicating by sending the full state across once per destination. --- synapse/handlers/presence.py | 54 ++++++++++++-------------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 9ed5af3cb4..c1c0dd4d3d 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -318,11 +318,7 @@ class PresenceHandler(object): if to_federation_ping: federation_presence_out_counter.inc_by(len(to_federation_ping)) - _, _, hosts_to_states = yield self._get_interested_parties( - to_federation_ping.values() - ) - - self._push_to_remotes(hosts_to_states) + self._push_to_remotes(to_federation_ping.values()) def _handle_timeouts(self): """Checks the presence of users that have timed out and updates as @@ -615,12 +611,12 @@ class PresenceHandler(object): defer.returnValue(states) @defer.inlineCallbacks - def _get_interested_parties(self, states, calculate_remote_hosts=True): + def _get_interested_parties(self, states): """Given a list of states return which entities (rooms, users, servers) are interested in the given states. Returns: - 3-tuple: `(room_ids_to_states, users_to_states, hosts_to_states)`, + 2-tuple: `(room_ids_to_states, users_to_states)`, with each item being a dict of `entity_name` -> `[UserPresenceState]` """ room_ids_to_states = {} @@ -637,30 +633,10 @@ class PresenceHandler(object): # Always notify self users_to_states.setdefault(state.user_id, []).append(state) - hosts_to_states = {} - if calculate_remote_hosts: - for room_id, states in room_ids_to_states.items(): - local_states = filter(lambda s: self.is_mine_id(s.user_id), states) - if not local_states: - continue - - hosts = yield self.store.get_hosts_in_room(room_id) - - for host in hosts: - hosts_to_states.setdefault(host, []).extend(local_states) - - for user_id, states in users_to_states.items(): - local_states = filter(lambda s: self.is_mine_id(s.user_id), states) - if not local_states: - continue - - host = get_domain_from_id(user_id) - hosts_to_states.setdefault(host, []).extend(local_states) - # TODO: de-dup hosts_to_states, as a single host might have multiple # of same presence - defer.returnValue((room_ids_to_states, users_to_states, hosts_to_states)) + defer.returnValue((room_ids_to_states, users_to_states)) @defer.inlineCallbacks def _persist_and_notify(self, states): @@ -670,33 +646,32 @@ class PresenceHandler(object): stream_id, max_token = yield self.store.update_presence(states) parties = yield self._get_interested_parties(states) - room_ids_to_states, users_to_states, hosts_to_states = parties + room_ids_to_states, users_to_states = parties self.notifier.on_new_event( "presence_key", stream_id, rooms=room_ids_to_states.keys(), - users=[UserID.from_string(u) for u in users_to_states.keys()] + users=[UserID.from_string(u) for u in users_to_states] ) - self._push_to_remotes(hosts_to_states) + self._push_to_remotes(states) @defer.inlineCallbacks def notify_for_states(self, state, stream_id): parties = yield self._get_interested_parties([state]) - room_ids_to_states, users_to_states, hosts_to_states = parties + room_ids_to_states, users_to_states = parties self.notifier.on_new_event( "presence_key", stream_id, rooms=room_ids_to_states.keys(), - users=[UserID.from_string(u) for u in users_to_states.keys()] + users=[UserID.from_string(u) for u in users_to_states] ) - def _push_to_remotes(self, hosts_to_states): + def _push_to_remotes(self, states): """Sends state updates to remote servers. Args: - hosts_to_states (dict): Mapping `server_name` -> `[UserPresenceState]` + hosts_to_states (list): list(state) """ - for host, states in hosts_to_states.items(): - self.federation.send_presence(host, states) + self.federation.send_presence(states) @defer.inlineCallbacks def incoming_presence(self, origin, content): @@ -837,14 +812,13 @@ class PresenceHandler(object): if self.is_mine(user): state = yield self.current_state_for_user(user.to_string()) - hosts = set(get_domain_from_id(u) for u in user_ids) - self._push_to_remotes({host: (state,) for host in hosts}) + self._push_to_remotes([state]) else: user_ids = filter(self.is_mine_id, user_ids) states = yield self.current_state_for_users(user_ids) - self._push_to_remotes({user.domain: states.values()}) + self._push_to_remotes(states.values()) @defer.inlineCallbacks def get_presence_list(self, observer_user, accepted=None): -- cgit 1.4.1 From 6308ac45b08ff5bf7259f09a2a767212f3f97860 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Apr 2017 15:19:26 +0100 Subject: Move get_interested_remotes back to presence handler --- synapse/federation/transaction_queue.py | 41 ++++---------------------- synapse/handlers/presence.py | 52 +++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 38 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index eb361d904c..08ceda31a6 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -25,7 +25,7 @@ from synapse.util.logcontext import preserve_context_over_fn, preserve_fn from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from synapse.util.metrics import measure_func from synapse.types import get_domain_from_id -from synapse.handlers.presence import format_user_presence_state +from synapse.handlers.presence import format_user_presence_state, get_interested_remotes import synapse.metrics import logging @@ -251,7 +251,10 @@ class TransactionQueue(object): # First we queue up the new presence by user ID, so multiple presence # updates in quick successtion are correctly handled - self.pending_presence.update({state.user_id: state for state in states}) + self.pending_presence.update({ + state.user_id: state for state in states + if self.is_mine_id(state.user_id) + }) # We then handle the new pending presence in batches, first figuring # out the destinations we need to send each state to and then poking it @@ -283,40 +286,8 @@ class TransactionQueue(object): Args: states (list(UserPresenceState)) """ - # First we look up the rooms each user is in (as well as any explicit - # subscriptions), then for each distinct room we look up the remote - # hosts in those rooms. - room_ids_to_states = {} - users_to_states = {} - for state in states.itervalues(): - room_ids = yield self.store.get_rooms_for_user(state.user_id) - for room_id in room_ids: - room_ids_to_states.setdefault(room_id, []).append(state) - - plist = yield self.store.get_presence_list_observers_accepted( - state.user_id, - ) - for u in plist: - users_to_states.setdefault(u, []).append(state) - - hosts_and_states = [] - for room_id, states in room_ids_to_states.items(): - local_states = filter(lambda s: self.is_mine_id(s.user_id), states) - if not local_states: - continue - - hosts = yield self.store.get_hosts_in_room(room_id) - hosts_and_states.append((hosts, local_states)) - - for user_id, states in users_to_states.items(): - local_states = filter(lambda s: self.is_mine_id(s.user_id), states) - if not local_states: - continue - - host = get_domain_from_id(user_id) - hosts_and_states.append(([host], local_states)) + hosts_and_states = yield get_interested_remotes(self.store, states) - # And now finally queue up new transactions for destinations, states in hosts_and_states: for destination in destinations: if not self.can_send_to(destination): diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index c1c0dd4d3d..685373ff28 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -633,9 +633,6 @@ class PresenceHandler(object): # Always notify self users_to_states.setdefault(state.user_id, []).append(state) - # TODO: de-dup hosts_to_states, as a single host might have multiple - # of same presence - defer.returnValue((room_ids_to_states, users_to_states)) @defer.inlineCallbacks @@ -1318,3 +1315,52 @@ def handle_update(prev_state, new_state, is_mine, wheel_timer, now): persist_and_notify = True return new_state, persist_and_notify, federation_ping + +@defer.inlineCallbacks +def get_interested_remotes(store, states): + """Given a list of presence states figure out which remote servers + should be sent which. + + All the presence states should be for local users only. + + Args: + store (DataStore) + states (list(UserPresenceState)) + + Returns: + Deferred list of ([destinations], [UserPresenceState]), where for + each row the list of UserPresenceState should be sent to each + destination + """ + # First we look up the rooms each user is in (as well as any explicit + # subscriptions), then for each distinct room we look up the remote + # hosts in those rooms. + room_ids_to_states = {} + users_to_states = {} + for state in states.itervalues(): + room_ids = yield store.get_rooms_for_user(state.user_id) + for room_id in room_ids: + room_ids_to_states.setdefault(room_id, []).append(state) + + plist = yield store.get_presence_list_observers_accepted( + state.user_id, + ) + for u in plist: + users_to_states.setdefault(u, []).append(state) + + hosts_and_states = [] + for room_id, states in room_ids_to_states.items(): + if not local_states: + continue + + hosts = yield store.get_hosts_in_room(room_id) + hosts_and_states.append((hosts, local_states)) + + for user_id, states in users_to_states.items(): + if not local_states: + continue + + host = get_domain_from_id(user_id) + hosts_and_states.append(([host], local_states)) + + defer.returnValue(hosts_and_states) -- cgit 1.4.1 From 2be8a281d2aac8e2e3829b9aff6eb366506d22d1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Apr 2017 15:28:24 +0100 Subject: Comments --- synapse/federation/send_queue.py | 14 +++++++------- synapse/handlers/presence.py | 5 +++-- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index a12c18f4df..7e52a55eda 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -55,17 +55,17 @@ class FederationRemoteSendQueue(object): self.notifier = hs.get_notifier() self.is_mine_id = hs.is_mine_id - self.presence_map = {} - self.presence_changed = sorteddict() + self.presence_map = {} # Pending presence map user_id -> UserPresenceState + self.presence_changed = sorteddict() # Stream position -> user_id - self.keyed_edu = {} - self.keyed_edu_changed = sorteddict() + self.keyed_edu = {} # (destination, key) -> EDU + self.keyed_edu_changed = sorteddict() # stream position -> (destination, key) - self.edus = sorteddict() + self.edus = sorteddict() # stream position -> Edu - self.failures = sorteddict() + self.failures = sorteddict() # stream position -> (destination, Failure) - self.device_messages = sorteddict() + self.device_messages = sorteddict() # stream position -> destination self.pos = 1 self.pos_time = sorteddict() diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 685373ff28..98e736be5b 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -666,7 +666,7 @@ class PresenceHandler(object): """Sends state updates to remote servers. Args: - hosts_to_states (list): list(state) + hosts_to_states (list(UserPresenceState)) """ self.federation.send_presence(states) @@ -1332,6 +1332,8 @@ def get_interested_remotes(store, states): each row the list of UserPresenceState should be sent to each destination """ + hosts_and_states = [] # Final result to return + # First we look up the rooms each user is in (as well as any explicit # subscriptions), then for each distinct room we look up the remote # hosts in those rooms. @@ -1348,7 +1350,6 @@ def get_interested_remotes(store, states): for u in plist: users_to_states.setdefault(u, []).append(state) - hosts_and_states = [] for room_id, states in room_ids_to_states.items(): if not local_states: continue -- cgit 1.4.1 From 414522aed59aca9b711253aba98d15e2d59e14f9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 11 Apr 2017 15:30:02 +0100 Subject: Move get_interested_parties --- synapse/app/synchrotron.py | 5 ++-- synapse/handlers/presence.py | 69 ++++++++++++++++++++++---------------------- 2 files changed, 36 insertions(+), 38 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 7b6f82abdc..e3fbf02c9c 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -20,7 +20,7 @@ from synapse.api.constants import EventTypes from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig from synapse.config.logger import setup_logging -from synapse.handlers.presence import PresenceHandler +from synapse.handlers.presence import PresenceHandler, get_interested_parties from synapse.http.site import SynapseSite from synapse.http.server import JsonResource from synapse.metrics.resource import MetricsResource, METRICS_PREFIX @@ -172,7 +172,6 @@ class SynchrotronPresence(object): get_states = PresenceHandler.get_states.__func__ get_state = PresenceHandler.get_state.__func__ - _get_interested_parties = PresenceHandler._get_interested_parties.__func__ current_state_for_users = PresenceHandler.current_state_for_users.__func__ def user_syncing(self, user_id, affect_presence): @@ -206,7 +205,7 @@ class SynchrotronPresence(object): @defer.inlineCallbacks def notify_from_replication(self, states, stream_id): - parties = yield self._get_interested_parties(states) + parties = yield get_interested_parties(self.store, states) room_ids_to_states, users_to_states = parties self.notifier.on_new_event( diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 98e736be5b..b9ce997a94 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -610,31 +610,6 @@ class PresenceHandler(object): defer.returnValue(states) - @defer.inlineCallbacks - def _get_interested_parties(self, states): - """Given a list of states return which entities (rooms, users, servers) - are interested in the given states. - - Returns: - 2-tuple: `(room_ids_to_states, users_to_states)`, - with each item being a dict of `entity_name` -> `[UserPresenceState]` - """ - room_ids_to_states = {} - users_to_states = {} - for state in states: - room_ids = yield self.store.get_rooms_for_user(state.user_id) - for room_id in room_ids: - room_ids_to_states.setdefault(room_id, []).append(state) - - plist = yield self.store.get_presence_list_observers_accepted(state.user_id) - for u in plist: - users_to_states.setdefault(u, []).append(state) - - # Always notify self - users_to_states.setdefault(state.user_id, []).append(state) - - defer.returnValue((room_ids_to_states, users_to_states)) - @defer.inlineCallbacks def _persist_and_notify(self, states): """Persist states in the database, poke the notifier and send to @@ -642,7 +617,7 @@ class PresenceHandler(object): """ stream_id, max_token = yield self.store.update_presence(states) - parties = yield self._get_interested_parties(states) + parties = yield get_interested_parties(self.store, states) room_ids_to_states, users_to_states = parties self.notifier.on_new_event( @@ -654,7 +629,7 @@ class PresenceHandler(object): @defer.inlineCallbacks def notify_for_states(self, state, stream_id): - parties = yield self._get_interested_parties([state]) + parties = yield get_interested_parties(self.store, [state]) room_ids_to_states, users_to_states = parties self.notifier.on_new_event( @@ -1316,6 +1291,36 @@ def handle_update(prev_state, new_state, is_mine, wheel_timer, now): return new_state, persist_and_notify, federation_ping + +@defer.inlineCallbacks +def get_interested_parties(store, states): + """Given a list of states return which entities (rooms, users) + are interested in the given states. + + Args: + states (list(UserPresenceState)) + + Returns: + 2-tuple: `(room_ids_to_states, users_to_states)`, + with each item being a dict of `entity_name` -> `[UserPresenceState]` + """ + room_ids_to_states = {} + users_to_states = {} + for state in states: + room_ids = yield store.get_rooms_for_user(state.user_id) + for room_id in room_ids: + room_ids_to_states.setdefault(room_id, []).append(state) + + plist = yield store.get_presence_list_observers_accepted(state.user_id) + for u in plist: + users_to_states.setdefault(u, []).append(state) + + # Always notify self + users_to_states.setdefault(state.user_id, []).append(state) + + defer.returnValue((room_ids_to_states, users_to_states)) + + @defer.inlineCallbacks def get_interested_remotes(store, states): """Given a list of presence states figure out which remote servers @@ -1351,17 +1356,11 @@ def get_interested_remotes(store, states): users_to_states.setdefault(u, []).append(state) for room_id, states in room_ids_to_states.items(): - if not local_states: - continue - hosts = yield store.get_hosts_in_room(room_id) - hosts_and_states.append((hosts, local_states)) + hosts_and_states.append((hosts, states)) for user_id, states in users_to_states.items(): - if not local_states: - continue - host = get_domain_from_id(user_id) - hosts_and_states.append(([host], local_states)) + hosts_and_states.append(([host], states)) defer.returnValue(hosts_and_states) -- cgit 1.4.1 From c7ddb5ef7ac3dc7370010ee6685497ee73f46fa2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 12 Apr 2017 10:11:43 +0100 Subject: Reuse get_interested_parties --- synapse/federation/transaction_queue.py | 6 +++--- synapse/handlers/presence.py | 21 +++++---------------- 2 files changed, 8 insertions(+), 19 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 260a472255..feb1605019 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -269,13 +269,13 @@ class TransactionQueue(object): self._processing_pending_presence = True try: while True: - states = self.pending_presence + states_map = self.pending_presence self.pending_presence = {} - if not states: + if not states_map: break - yield self._process_presence_inner(states) + yield self._process_presence_inner(states_map.values()) finally: self._processing_pending_presence = False diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index b9ce997a94..f3707afcd0 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -641,7 +641,7 @@ class PresenceHandler(object): """Sends state updates to remote servers. Args: - hosts_to_states (list(UserPresenceState)) + states (list(UserPresenceState)) """ self.federation.send_presence(states) @@ -1337,29 +1337,18 @@ def get_interested_remotes(store, states): each row the list of UserPresenceState should be sent to each destination """ - hosts_and_states = [] # Final result to return + hosts_and_states = [] # First we look up the rooms each user is in (as well as any explicit # subscriptions), then for each distinct room we look up the remote # hosts in those rooms. - room_ids_to_states = {} - users_to_states = {} - for state in states.itervalues(): - room_ids = yield store.get_rooms_for_user(state.user_id) - for room_id in room_ids: - room_ids_to_states.setdefault(room_id, []).append(state) - - plist = yield store.get_presence_list_observers_accepted( - state.user_id, - ) - for u in plist: - users_to_states.setdefault(u, []).append(state) + room_ids_to_states, users_to_states = yield get_interested_parties(store, states) - for room_id, states in room_ids_to_states.items(): + for room_id, states in room_ids_to_states.iteritems(): hosts = yield store.get_hosts_in_room(room_id) hosts_and_states.append((hosts, states)) - for user_id, states in users_to_states.items(): + for user_id, states in users_to_states.iteritems(): host = get_domain_from_id(user_id) hosts_and_states.append(([host], states)) -- cgit 1.4.1