From 71b625d80806886794c5e72f7ff11432e99b736c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Feb 2019 16:54:35 +0000 Subject: Stop backpaginating when events not visible --- synapse/handlers/federation.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'synapse/handlers') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 083f2e0ac3..7b3834a915 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -830,6 +830,37 @@ class FederationHandler(BaseHandler): logger.debug("Not backfilling as no extremeties found.") return + # We only want to paginate if we can actually see the events we'll get, + # as otherwise we'll just spend a lot of resources to get redacted + # events. + # + # We do this by filtering all the extremities and seeing if any remain. + # Given we don't have the extremity events themselves, we need to + # actually check the events that references them. + # + # TODO: Filter the list of extremities if we do do a backfill + # TODO: Correctly handle the case where we are allowed to see the + # forward event but not the extremity, e.g. in the case of initial + # join of the server. + + forward_events = yield self.store.get_forward_events( + list(extremities), + ) + + extremities_events = yield self.store.get_events( + forward_events, + check_redacted=False, + get_prev_content=False, + ) + + filtered_extremities = yield filter_events_for_server( + self.store, self.server_name, list(extremities_events.values()), + redact=False, + ) + + if not filtered_extremities: + defer.returnValue(False) + # Check if we reached a point where we should start backfilling. sorted_extremeties_tuple = sorted( extremities.items(), -- cgit 1.5.1 From b183fef9ac8075aaab892d1042596e0bba824167 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 13:06:10 +0000 Subject: Update comments --- synapse/handlers/federation.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 7b3834a915..de839ca527 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -836,12 +836,22 @@ class FederationHandler(BaseHandler): # # We do this by filtering all the extremities and seeing if any remain. # Given we don't have the extremity events themselves, we need to - # actually check the events that references them. + # actually check the events that reference them. + # + # *Note*: the spec wants us to keep backfilling until we reach the start + # of the room in case we are allowed to see some of the history. However + # in practice that causes more issues than its worth, as a) its + # relatively rare for there to be any visible history and b) even when + # there is its often sufficiently long ago that clients would stop + # attempting to paginate before backfill reached the visible history. + # + # TODO: If we do do a backfill the we should filter the extremities to + # only include those that point to visible portions of history. # - # TODO: Filter the list of extremities if we do do a backfill # TODO: Correctly handle the case where we are allowed to see the # forward event but not the extremity, e.g. in the case of initial - # join of the server. + # join of the server where we are allowed to see the join event but + # not anything before it. forward_events = yield self.store.get_forward_events( list(extremities), -- cgit 1.5.1 From 30649529398a2a57c52b6878a5753a5bf650cf25 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 1 Mar 2019 16:47:12 +0000 Subject: Fix incorrect log about not persisting duplicate state event. (#4776) We were logging this when it was not true. --- changelog.d/4776.bugfix | 1 + synapse/handlers/message.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 changelog.d/4776.bugfix (limited to 'synapse/handlers') diff --git a/changelog.d/4776.bugfix b/changelog.d/4776.bugfix new file mode 100644 index 0000000000..ce3e6ce33c --- /dev/null +++ b/changelog.d/4776.bugfix @@ -0,0 +1 @@ +Fix incorrect log about not persisting duplicate state event. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3981fe69ce..c762b58902 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -436,10 +436,11 @@ class EventCreationHandler(object): if event.is_state(): prev_state = yield self.deduplicate_state_event(event, context) - logger.info( - "Not bothering to persist duplicate state event %s", event.event_id, - ) if prev_state is not None: + logger.info( + "Not bothering to persist state event %s duplicated by %s", + event.event_id, prev_state.event_id, + ) defer.returnValue(prev_state) yield self.handle_new_client_event( -- cgit 1.5.1 From 8b63fe4c261a4451024dba7506b5872686c22282 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 11:56:03 +0000 Subject: s/get_forward_events/get_successor_events/ --- synapse/handlers/federation.py | 2 +- synapse/storage/event_federation.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0425380e55..32d7ba6cf5 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -881,7 +881,7 @@ class FederationHandler(BaseHandler): # join of the server where we are allowed to see the join event but # not anything before it. - forward_events = yield self.store.get_forward_events( + forward_events = yield self.store.get_successor_events( list(extremities), ) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 830b171caa..a8d90456e3 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -443,7 +443,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, return event_results @defer.inlineCallbacks - def get_forward_events(self, event_ids): + def get_successor_events(self, event_ids): """Fetch all events that have the given events as a prev event Args: @@ -457,7 +457,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, column="prev_event_id", iterable=event_ids, retcols=("event_id",), - desc="get_forward_events" + desc="get_successor_events" ) defer.returnValue([ -- cgit 1.5.1 From 856c83f5f85d2890ab7de26578464328241ec3ba Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 4 Mar 2019 12:57:44 +0000 Subject: Avoid rebuilding Edu objects in worker mode (#4770) In worker mode, on the federation sender, when we receive an edu for sending over the replication socket, it is parsed into an Edu object. There is no point extracting the contents of it so that we can then immediately build another Edu. --- changelog.d/4770.misc | 1 + synapse/federation/send_queue.py | 14 +++++++------- synapse/federation/transaction_queue.py | 31 ++++++++++++++++++++++++------- synapse/handlers/presence.py | 6 +++--- synapse/handlers/receipts.py | 2 +- synapse/handlers/typing.py | 2 +- 6 files changed, 37 insertions(+), 19 deletions(-) create mode 100644 changelog.d/4770.misc (limited to 'synapse/handlers') diff --git a/changelog.d/4770.misc b/changelog.d/4770.misc new file mode 100644 index 0000000000..144d819958 --- /dev/null +++ b/changelog.d/4770.misc @@ -0,0 +1 @@ +Optimise EDU transmission for the federation_sender worker. diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 6f5995735a..b7d0b25781 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -159,8 +159,12 @@ class FederationRemoteSendQueue(object): # stream. pass - def send_edu(self, destination, edu_type, content, key=None): + def build_and_send_edu(self, destination, edu_type, content, key=None): """As per TransactionQueue""" + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + pos = self._next_pos() edu = Edu( @@ -465,15 +469,11 @@ def process_rows_for_federation(transaction_queue, rows): for destination, edu_map in iteritems(buff.keyed_edus): for key, edu in edu_map.items(): - transaction_queue.send_edu( - edu.destination, edu.edu_type, edu.content, key=key, - ) + transaction_queue.send_edu(edu, key) for destination, edu_list in iteritems(buff.edus): for edu in edu_list: - transaction_queue.send_edu( - edu.destination, edu.edu_type, edu.content, key=None, - ) + transaction_queue.send_edu(edu, None) for destination in buff.device_destinations: transaction_queue.send_device_messages(destination) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 30941f5ad6..e5e42c647d 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -361,7 +361,19 @@ class TransactionQueue(object): self._attempt_new_transaction(destination) - def send_edu(self, destination, edu_type, content, key=None): + def build_and_send_edu(self, destination, edu_type, content, key=None): + """Construct an Edu object, and queue it for sending + + Args: + destination (str): name of server to send to + edu_type (str): type of EDU to send + content (dict): content of EDU + key (Any|None): clobbering key for this edu + """ + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + edu = Edu( origin=self.server_name, destination=destination, @@ -369,18 +381,23 @@ class TransactionQueue(object): content=content, ) - if destination == self.server_name: - logger.info("Not sending EDU to ourselves") - return + self.send_edu(edu, key) + + def send_edu(self, edu, key): + """Queue an EDU for sending + Args: + edu (Edu): edu to send + key (Any|None): clobbering key for this edu + """ if key: self.pending_edus_keyed_by_dest.setdefault( - destination, {} + edu.destination, {} )[(edu.edu_type, key)] = edu else: - self.pending_edus_by_dest.setdefault(destination, []).append(edu) + self.pending_edus_by_dest.setdefault(edu.destination, []).append(edu) - self._attempt_new_transaction(destination) + self._attempt_new_transaction(edu.destination) def send_device_messages(self, destination): if destination == self.server_name: diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index ba3856674d..37e87fc054 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -816,7 +816,7 @@ class PresenceHandler(object): if self.is_mine(observed_user): yield self.invite_presence(observed_user, observer_user) else: - yield self.federation.send_edu( + yield self.federation.build_and_send_edu( destination=observed_user.domain, edu_type="m.presence_invite", content={ @@ -836,7 +836,7 @@ class PresenceHandler(object): if self.is_mine(observer_user): yield self.accept_presence(observed_user, observer_user) else: - self.federation.send_edu( + self.federation.build_and_send_edu( destination=observer_user.domain, edu_type="m.presence_accept", content={ @@ -848,7 +848,7 @@ class PresenceHandler(object): state_dict = yield self.get_state(observed_user, as_event=False) state_dict = format_user_presence_state(state_dict, self.clock.time_msec()) - self.federation.send_edu( + self.federation.build_and_send_edu( destination=observer_user.domain, edu_type="m.presence", content={ diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 696469732c..8b2d03a756 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -148,7 +148,7 @@ class ReceiptsHandler(BaseHandler): logger.debug("Sending receipt to: %r", remotedomains) for domain in remotedomains: - self.federation.send_edu( + self.federation.build_and_send_edu( destination=domain, edu_type="m.receipt", content={ diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index a61bbf9392..39df960c31 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -231,7 +231,7 @@ class TypingHandler(object): for domain in set(get_domain_from_id(u) for u in users): if domain != self.server_name: logger.debug("sending typing update to %s", domain) - self.federation.send_edu( + self.federation.build_and_send_edu( destination=domain, edu_type="m.typing", content={ -- cgit 1.5.1 From d1523aed6bebf00a4643d72eea611b029db65f08 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 14:34:34 +0000 Subject: Only check history visibility when filtering When filtering events to send to server we check more than just history visibility. However when deciding whether to backfill or not we only care about the history visibility. --- synapse/handlers/federation.py | 4 ++- synapse/visibility.py | 77 +++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 35 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 32d7ba6cf5..bf2989aefd 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -891,9 +891,11 @@ class FederationHandler(BaseHandler): get_prev_content=False, ) + # We set `check_history_visibility_only` as we might otherwise get false + # positives from users having been erased. filtered_extremities = yield filter_events_for_server( self.store, self.server_name, list(extremities_events.values()), - redact=False, + redact=False, check_history_visibility_only=True, ) if not filtered_extremities: diff --git a/synapse/visibility.py b/synapse/visibility.py index f6dcc96630..8b9c7180b6 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,8 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events, redact=True): +def filter_events_for_server(store, server_name, events, redact=True, + check_history_visibility_only=False): """Filter a list of events based on whether given server is allowed to see them. @@ -226,34 +227,25 @@ def filter_events_for_server(store, server_name, events, redact=True): events (iterable[FrozenEvent]) redact (bool): Whether to return a redacted version of the event, or to filter them out entirely. + check_history_visibility_only (bool): Whether to only check the + history visibility, rather than things like if the sender has been + erased. This is used e.g. during pagination to decide whether to + backfill or not. Returns Deferred[list[FrozenEvent]] """ - # Whatever else we do, we need to check for senders which have requested - # erasure of their data. - erased_senders = yield store.are_users_erased( - (e.sender for e in events), - ) - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. + def is_sender_erased(event, erased_senders): if erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, ) - if redact: - return prune_event(event) - else: - return None - - # state will be None if we decided we didn't need to filter by - # room membership. - if not state: - return event + return True + return False + def check_event_is_visible(event, state): history = state.get((EventTypes.RoomHistoryVisibility, ''), None) if history: visibility = history.content.get("history_visibility", "shared") @@ -275,18 +267,15 @@ def filter_events_for_server(store, server_name, events, redact=True): memtype = ev.membership if memtype == Membership.JOIN: - return event + return True elif memtype == Membership.INVITE: if visibility == "invited": - return event + return True else: # server has no users in the room: redact - if redact: - return prune_event(event) - else: - return None + return False - return event + return True # Next lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't @@ -315,16 +304,31 @@ def filter_events_for_server(store, server_name, events, redact=True): for e in itervalues(event_map) ) + if not check_history_visibility_only: + erased_senders = yield store.are_users_erased( + (e.sender for e in events), + ) + else: + # We don't want to check whether users are erased, which is equivalent + # to no users having been erased. + erased_senders = {} + if all_open: # all the history_visibility state affecting these events is open, so # we don't need to filter by membership state. We *do* need to check # for user erasure, though. if erased_senders: - events = [ - redact_disallowed(e, None) - for e in events - ] + to_return = [] + for e in events: + if not is_sender_erased(e, erased_senders): + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) + # If there are no erased users then we can just return the given list + # of events without having to copy it. defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -380,8 +384,13 @@ def filter_events_for_server(store, server_name, events, redact=True): for e_id, key_to_eid in iteritems(event_to_state_ids) } - to_return = ( - redact_disallowed(e, event_to_state[e.event_id]) - for e in events - ) - defer.returnValue([e for e in to_return if e is not None]) + to_return = [] + for e in events: + erased = is_sender_erased(e, erased_senders) + visible = check_event_is_visible(e, event_to_state[e.event_id]) + if visible and not erased: + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) -- cgit 1.5.1 From b29693a30b4def57c9c38be4c6f5ff2f9a6e9db9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 4 Mar 2019 18:11:26 +0000 Subject: Clean up read-receipt handling. Remove a call to run_as_background_process: there is no need to run this as a background process, because build_and_send_edu does not block. We may as well inline the whole of _push_remotes. --- changelog.d/4797.misc | 1 + synapse/handlers/receipts.py | 103 ++++++++++++++++++------------------------- 2 files changed, 45 insertions(+), 59 deletions(-) create mode 100644 changelog.d/4797.misc (limited to 'synapse/handlers') diff --git a/changelog.d/4797.misc b/changelog.d/4797.misc new file mode 100644 index 0000000000..822e98e6a7 --- /dev/null +++ b/changelog.d/4797.misc @@ -0,0 +1 @@ +Clean up read-receipt handling. diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 8b2d03a756..1728089667 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,7 +16,6 @@ import logging from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id from ._base import BaseHandler @@ -38,31 +37,6 @@ class ReceiptsHandler(BaseHandler): self.clock = self.hs.get_clock() self.state = hs.get_state_handler() - @defer.inlineCallbacks - def received_client_receipt(self, room_id, receipt_type, user_id, - event_id): - """Called when a client tells us a local user has read up to the given - event_id in the room. - """ - receipt = { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": [event_id], - "data": { - "ts": int(self.clock.time_msec()), - } - } - - is_new = yield self._handle_new_receipts([receipt]) - - if is_new: - # fire off a process in the background to send the receipt to - # remote servers - run_as_background_process( - 'push_receipts_to_remotes', self._push_remotes, receipt - ) - @defer.inlineCallbacks def _received_remote_receipt(self, origin, content): """Called when we receive an EDU of type m.receipt from a remote HS. @@ -128,43 +102,54 @@ class ReceiptsHandler(BaseHandler): defer.returnValue(True) @defer.inlineCallbacks - def _push_remotes(self, receipt): - """Given a receipt, works out which remote servers should be - poked and pokes them. + def received_client_receipt(self, room_id, receipt_type, user_id, + event_id): + """Called when a client tells us a local user has read up to the given + event_id in the room. """ - try: - # TODO: optimise this to move some of the work to the workers. - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] + receipt = { + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + "event_ids": [event_id], + "data": { + "ts": int(self.clock.time_msec()), + } + } - users = yield self.state.get_current_user_in_room(room_id) - remotedomains = set(get_domain_from_id(u) for u in users) - remotedomains = remotedomains.copy() - remotedomains.discard(self.server_name) - - logger.debug("Sending receipt to: %r", remotedomains) - - for domain in remotedomains: - self.federation.build_and_send_edu( - destination=domain, - edu_type="m.receipt", - content={ - room_id: { - receipt_type: { - user_id: { - "event_ids": event_ids, - "data": data, - } + is_new = yield self._handle_new_receipts([receipt]) + if not is_new: + return + + # Work out which remote servers should be poked and poke them. + + # TODO: optimise this to move some of the work to the workers. + data = receipt["data"] + + # XXX why does this not use state.get_current_hosts_in_room() ? + users = yield self.state.get_current_user_in_room(room_id) + remotedomains = set(get_domain_from_id(u) for u in users) + remotedomains = remotedomains.copy() + remotedomains.discard(self.server_name) + + logger.debug("Sending receipt to: %r", remotedomains) + + for domain in remotedomains: + self.federation.build_and_send_edu( + destination=domain, + edu_type="m.receipt", + content={ + room_id: { + receipt_type: { + user_id: { + "event_ids": [event_id], + "data": data, } - }, + } }, - key=(room_id, receipt_type, user_id), - ) - except Exception: - logger.exception("Error pushing receipts to remote servers") + }, + key=(room_id, receipt_type, user_id), + ) @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): -- cgit 1.5.1 From 157e5a8f27cfb7561721785512e69a0177ea48dd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 18:24:32 +0000 Subject: Split DeviceHandler into master and worker --- synapse/handlers/device.py | 342 +++++++++++++++++++++++---------------------- synapse/server.py | 7 +- 2 files changed, 179 insertions(+), 170 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c708c35d4d..7e48661355 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -37,13 +37,185 @@ from ._base import BaseHandler logger = logging.getLogger(__name__) -class DeviceHandler(BaseHandler): +class DeviceWorkerHandler(BaseHandler): def __init__(self, hs): - super(DeviceHandler, self).__init__(hs) + super(DeviceWorkerHandler, self).__init__(hs) self.hs = hs self.state = hs.get_state_handler() self._auth_handler = hs.get_auth_handler() + + @defer.inlineCallbacks + def get_devices_by_user(self, user_id): + """ + Retrieve the given user's devices + + Args: + user_id (str): + Returns: + defer.Deferred: list[dict[str, X]]: info on each device + """ + + device_map = yield self.store.get_devices_by_user(user_id) + + ips = yield self.store.get_last_client_ip_by_device( + user_id, device_id=None + ) + + devices = list(device_map.values()) + for device in devices: + _update_device_from_client_ips(device, ips) + + defer.returnValue(devices) + + @defer.inlineCallbacks + def get_device(self, user_id, device_id): + """ Retrieve the given device + + Args: + user_id (str): + device_id (str): + + Returns: + defer.Deferred: dict[str, X]: info on the device + Raises: + errors.NotFoundError: if the device was not found + """ + try: + device = yield self.store.get_device(user_id, device_id) + except errors.StoreError: + raise errors.NotFoundError + ips = yield self.store.get_last_client_ip_by_device( + user_id, device_id, + ) + _update_device_from_client_ips(device, ips) + defer.returnValue(device) + + @measure_func("device.get_user_ids_changed") + @defer.inlineCallbacks + def get_user_ids_changed(self, user_id, from_token): + """Get list of users that have had the devices updated, or have newly + joined a room, that `user_id` may be interested in. + + Args: + user_id (str) + from_token (StreamToken) + """ + now_token = yield self.hs.get_event_sources().get_current_token() + + room_ids = yield self.store.get_rooms_for_user(user_id) + + # First we check if any devices have changed + changed = yield self.store.get_user_whose_devices_changed( + from_token.device_list_key + ) + + # Then work out if any users have since joined + rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) + + member_events = yield self.store.get_membership_changes_for_user( + user_id, from_token.room_key, now_token.room_key, + ) + rooms_changed.update(event.room_id for event in member_events) + + stream_ordering = RoomStreamToken.parse_stream_token( + from_token.room_key + ).stream + + possibly_changed = set(changed) + possibly_left = set() + for room_id in rooms_changed: + current_state_ids = yield self.store.get_current_state_ids(room_id) + + # The user may have left the room + # TODO: Check if they actually did or if we were just invited. + if room_id not in room_ids: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_left.add(state_key) + continue + + # Fetch the current state at the time. + try: + event_ids = yield self.store.get_forward_extremeties_for_room( + room_id, stream_ordering=stream_ordering + ) + except errors.StoreError: + # we have purged the stream_ordering index since the stream + # ordering: treat it the same as a new room + event_ids = [] + + # special-case for an empty prev state: include all members + # in the changed list + if not event_ids: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_changed.add(state_key) + continue + + current_member_id = current_state_ids.get((EventTypes.Member, user_id)) + if not current_member_id: + continue + + # mapping from event_id -> state_dict + prev_state_ids = yield self.store.get_state_ids_for_events(event_ids) + + # Check if we've joined the room? If so we just blindly add all the users to + # the "possibly changed" users. + for state_dict in itervalues(prev_state_ids): + member_event = state_dict.get((EventTypes.Member, user_id), None) + if not member_event or member_event != current_member_id: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_changed.add(state_key) + break + + # If there has been any change in membership, include them in the + # possibly changed list. We'll check if they are joined below, + # and we're not toooo worried about spuriously adding users. + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + + # check if this member has changed since any of the extremities + # at the stream_ordering, and add them to the list if so. + for state_dict in itervalues(prev_state_ids): + prev_event_id = state_dict.get(key, None) + if not prev_event_id or prev_event_id != event_id: + if state_key != user_id: + possibly_changed.add(state_key) + break + + if possibly_changed or possibly_left: + users_who_share_room = yield self.store.get_users_who_share_room_with_user( + user_id + ) + + # Take the intersection of the users whose devices may have changed + # and those that actually still share a room with the user + possibly_joined = possibly_changed & users_who_share_room + possibly_left = (possibly_changed | possibly_left) - users_who_share_room + else: + possibly_joined = [] + possibly_left = [] + + defer.returnValue({ + "changed": list(possibly_joined), + "left": list(possibly_left), + }) + + +class DeviceHandler(DeviceWorkerHandler): + def __init__(self, hs): + super(DeviceHandler, self).__init__(hs) + self.federation_sender = hs.get_federation_sender() self._edu_updater = DeviceListEduUpdater(hs, self) @@ -103,52 +275,6 @@ class DeviceHandler(BaseHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") - @defer.inlineCallbacks - def get_devices_by_user(self, user_id): - """ - Retrieve the given user's devices - - Args: - user_id (str): - Returns: - defer.Deferred: list[dict[str, X]]: info on each device - """ - - device_map = yield self.store.get_devices_by_user(user_id) - - ips = yield self.store.get_last_client_ip_by_device( - user_id, device_id=None - ) - - devices = list(device_map.values()) - for device in devices: - _update_device_from_client_ips(device, ips) - - defer.returnValue(devices) - - @defer.inlineCallbacks - def get_device(self, user_id, device_id): - """ Retrieve the given device - - Args: - user_id (str): - device_id (str): - - Returns: - defer.Deferred: dict[str, X]: info on the device - Raises: - errors.NotFoundError: if the device was not found - """ - try: - device = yield self.store.get_device(user_id, device_id) - except errors.StoreError: - raise errors.NotFoundError - ips = yield self.store.get_last_client_ip_by_device( - user_id, device_id, - ) - _update_device_from_client_ips(device, ips) - defer.returnValue(device) - @defer.inlineCallbacks def delete_device(self, user_id, device_id): """ Delete the given device @@ -287,126 +413,6 @@ class DeviceHandler(BaseHandler): for host in hosts: self.federation_sender.send_device_messages(host) - @measure_func("device.get_user_ids_changed") - @defer.inlineCallbacks - def get_user_ids_changed(self, user_id, from_token): - """Get list of users that have had the devices updated, or have newly - joined a room, that `user_id` may be interested in. - - Args: - user_id (str) - from_token (StreamToken) - """ - now_token = yield self.hs.get_event_sources().get_current_token() - - room_ids = yield self.store.get_rooms_for_user(user_id) - - # First we check if any devices have changed - changed = yield self.store.get_user_whose_devices_changed( - from_token.device_list_key - ) - - # Then work out if any users have since joined - rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) - - member_events = yield self.store.get_membership_changes_for_user( - user_id, from_token.room_key, now_token.room_key - ) - rooms_changed.update(event.room_id for event in member_events) - - stream_ordering = RoomStreamToken.parse_stream_token( - from_token.room_key - ).stream - - possibly_changed = set(changed) - possibly_left = set() - for room_id in rooms_changed: - current_state_ids = yield self.store.get_current_state_ids(room_id) - - # The user may have left the room - # TODO: Check if they actually did or if we were just invited. - if room_id not in room_ids: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_left.add(state_key) - continue - - # Fetch the current state at the time. - try: - event_ids = yield self.store.get_forward_extremeties_for_room( - room_id, stream_ordering=stream_ordering - ) - except errors.StoreError: - # we have purged the stream_ordering index since the stream - # ordering: treat it the same as a new room - event_ids = [] - - # special-case for an empty prev state: include all members - # in the changed list - if not event_ids: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_changed.add(state_key) - continue - - current_member_id = current_state_ids.get((EventTypes.Member, user_id)) - if not current_member_id: - continue - - # mapping from event_id -> state_dict - prev_state_ids = yield self.store.get_state_ids_for_events(event_ids) - - # Check if we've joined the room? If so we just blindly add all the users to - # the "possibly changed" users. - for state_dict in itervalues(prev_state_ids): - member_event = state_dict.get((EventTypes.Member, user_id), None) - if not member_event or member_event != current_member_id: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_changed.add(state_key) - break - - # If there has been any change in membership, include them in the - # possibly changed list. We'll check if they are joined below, - # and we're not toooo worried about spuriously adding users. - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - - # check if this member has changed since any of the extremities - # at the stream_ordering, and add them to the list if so. - for state_dict in itervalues(prev_state_ids): - prev_event_id = state_dict.get(key, None) - if not prev_event_id or prev_event_id != event_id: - if state_key != user_id: - possibly_changed.add(state_key) - break - - if possibly_changed or possibly_left: - users_who_share_room = yield self.store.get_users_who_share_room_with_user( - user_id - ) - - # Take the intersection of the users whose devices may have changed - # and those that actually still share a room with the user - possibly_joined = possibly_changed & users_who_share_room - possibly_left = (possibly_changed | possibly_left) - users_who_share_room - else: - possibly_joined = [] - possibly_left = [] - - defer.returnValue({ - "changed": list(possibly_joined), - "left": list(possibly_left), - }) - @defer.inlineCallbacks def on_federation_query_user_devices(self, user_id): stream_id, devices = yield self.store.get_devices_with_keys_by_user(user_id) diff --git a/synapse/server.py b/synapse/server.py index 4d364fccce..4323e7ff12 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -51,7 +51,7 @@ from synapse.handlers.acme import AcmeHandler from synapse.handlers.appservice import ApplicationServicesHandler from synapse.handlers.auth import AuthHandler, MacaroonGenerator from synapse.handlers.deactivate_account import DeactivateAccountHandler -from synapse.handlers.device import DeviceHandler +from synapse.handlers.device import DeviceHandler, DeviceWorkerHandler from synapse.handlers.devicemessage import DeviceMessageHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.e2e_room_keys import E2eRoomKeysHandler @@ -307,7 +307,10 @@ class HomeServer(object): return MacaroonGenerator(self) def build_device_handler(self): - return DeviceHandler(self) + if self.config.worker_app: + return DeviceWorkerHandler(self) + else: + return DeviceHandler(self) def build_device_message_handler(self): return DeviceMessageHandler(self) -- cgit 1.5.1 From bfa7d46a107d4a3eb55701c42fe75290688f4e30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 18:09:06 +0000 Subject: Allow /keys/{changes,query} API to run on worker --- docs/workers.rst | 2 ++ synapse/app/client_reader.py | 11 +++++++++++ synapse/handlers/device.py | 4 ++-- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'synapse/handlers') diff --git a/docs/workers.rst b/docs/workers.rst index 3c18db1b19..d80fc04d2e 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -225,6 +225,8 @@ following regular expressions:: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ ^/_matrix/client/(api/v1|r0|unstable)/login$ ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ Additionally, the following REST endpoints can be handled, but all requests must be routed to the same instance:: diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 5070094cad..beaea64a61 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -33,9 +33,13 @@ from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore +from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore +from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.events import SlavedEventStore from synapse.replication.slave.storage.keys import SlavedKeyStore +from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore +from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore @@ -49,6 +53,7 @@ from synapse.rest.client.v1.room import ( RoomStateRestServlet, ) from synapse.rest.client.v2_alpha.account import ThreepidRestServlet +from synapse.rest.client.v2_alpha.keys import KeyChangesServlet, KeyQueryServlet from synapse.rest.client.v2_alpha.register import RegisterRestServlet from synapse.server import HomeServer from synapse.storage.engines import create_engine @@ -61,6 +66,10 @@ logger = logging.getLogger("synapse.app.client_reader") class ClientReaderSlavedStore( + SlavedDeviceInboxStore, + SlavedDeviceStore, + SlavedReceiptsStore, + SlavedPushRuleStore, SlavedAccountDataStore, SlavedEventStore, SlavedKeyStore, @@ -98,6 +107,8 @@ class ClientReaderServer(HomeServer): RegisterRestServlet(self).register(resource) LoginRestServlet(self).register(resource) ThreepidRestServlet(self).register(resource) + KeyQueryServlet(self).register(resource) + KeyChangesServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 7e48661355..c09a7c6280 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -101,7 +101,7 @@ class DeviceWorkerHandler(BaseHandler): user_id (str) from_token (StreamToken) """ - now_token = yield self.hs.get_event_sources().get_current_token() + now_room_key = yield self.store.get_room_events_max_id() room_ids = yield self.store.get_rooms_for_user(user_id) @@ -114,7 +114,7 @@ class DeviceWorkerHandler(BaseHandler): rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) member_events = yield self.store.get_membership_changes_for_user( - user_id, from_token.room_key, now_token.room_key, + user_id, from_token.room_key, now_room_key, ) rooms_changed.update(event.room_id for event in member_events) -- cgit 1.5.1 From aa06d26ae05585ddfb5e33a2dd521c9aa27b6cfa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Mar 2019 09:16:35 +0000 Subject: clarify comments --- changelog.d/4699.bugfix | 2 +- synapse/handlers/federation.py | 19 +++++++++++-------- synapse/visibility.py | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'synapse/handlers') diff --git a/changelog.d/4699.bugfix b/changelog.d/4699.bugfix index 8cd8340cc1..1d7f3174e7 100644 --- a/changelog.d/4699.bugfix +++ b/changelog.d/4699.bugfix @@ -1 +1 @@ -Fix attempting to paginate in rooms where server cannot see any events. +Fix attempting to paginate in rooms where server cannot see any events, to avoid unnecessarily pulling in lots of redacted events. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index bf2989aefd..72b63d64d0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -862,9 +862,9 @@ class FederationHandler(BaseHandler): # as otherwise we'll just spend a lot of resources to get redacted # events. # - # We do this by filtering all the extremities and seeing if any remain. - # Given we don't have the extremity events themselves, we need to - # actually check the events that reference them. + # We do this by filtering all the backwards extremities and seeing if + # any remain. Given we don't have the extremity events themselves, we + # need to actually check the events that reference them. # # *Note*: the spec wants us to keep backfilling until we reach the start # of the room in case we are allowed to see some of the history. However @@ -873,13 +873,16 @@ class FederationHandler(BaseHandler): # there is its often sufficiently long ago that clients would stop # attempting to paginate before backfill reached the visible history. # - # TODO: If we do do a backfill the we should filter the extremities to - # only include those that point to visible portions of history. + # TODO: If we do do a backfill then we should filter the backwards + # extremities to only include those that point to visible portions of + # history. # # TODO: Correctly handle the case where we are allowed to see the - # forward event but not the extremity, e.g. in the case of initial - # join of the server where we are allowed to see the join event but - # not anything before it. + # forward event but not the backward extremity, e.g. in the case of + # initial join of the server where we are allowed to see the join + # event but not anything before it. This would require looking at the + # state *before* the event, ignoring the special casing certain event + # types have. forward_events = yield self.store.get_successor_events( list(extremities), diff --git a/synapse/visibility.py b/synapse/visibility.py index e9dc73c25e..efec21673b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -277,7 +277,7 @@ def filter_events_for_server(store, server_name, events, redact=True, return True - # Next lets check to see if all the events have a history visibility + # Lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( -- cgit 1.5.1 From a4c3a361b70bc02d65104240bef1b3cbb110bf22 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 5 Mar 2019 14:25:33 +0000 Subject: Add rate-limiting on registration (#4735) * Rate-limiting for registration * Add unit test for registration rate limiting * Add config parameters for rate limiting on auth endpoints * Doc * Fix doc of rate limiting function Co-Authored-By: babolivier * Incorporate review * Fix config parsing * Fix linting errors * Set default config for auth rate limiting * Fix tests * Add changelog * Advance reactor instead of mocked clock * Move parameters to registration specific config and give them more sensible default values * Remove unused config options * Don't mock the rate limiter un MAU tests * Rename _register_with_store into register_with_store * Make CI happy * Remove unused import * Update sample config * Fix ratelimiting test for py2 * Add non-guest test --- changelog.d/4735.feature | 1 + docs/sample_config.yaml | 11 +++++++ synapse/api/ratelimiting.py | 31 ++++++++++--------- synapse/config/registration.py | 18 +++++++++++ synapse/handlers/_base.py | 4 +-- synapse/handlers/register.py | 39 ++++++++++++++++++----- synapse/replication/http/register.py | 8 +++-- synapse/rest/client/v2_alpha/register.py | 33 +++++++++++++++++--- tests/api/test_ratelimiting.py | 20 ++++++------ tests/handlers/test_profile.py | 4 +-- tests/replication/slave/storage/_base.py | 4 +-- tests/rest/client/v1/test_events.py | 4 +-- tests/rest/client/v1/test_rooms.py | 6 ++-- tests/rest/client/v1/test_typing.py | 4 +-- tests/rest/client/v2_alpha/test_register.py | 48 +++++++++++++++++++++++++++++ tests/test_mau.py | 3 +- tests/utils.py | 2 ++ 17 files changed, 186 insertions(+), 54 deletions(-) create mode 100644 changelog.d/4735.feature (limited to 'synapse/handlers') diff --git a/changelog.d/4735.feature b/changelog.d/4735.feature new file mode 100644 index 0000000000..a4c0b196f6 --- /dev/null +++ b/changelog.d/4735.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /register endpoint. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 7cf58d2182..e0140003fd 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -657,6 +657,17 @@ trusted_third_party_id_servers: # autocreate_auto_join_rooms: true +# Number of registration requests a client can send per second. +# Defaults to 1/minute (0.17). +# +#rc_registration_requests_per_second: 0.17 + +# Number of registration requests a client can send before being +# throttled. +# Defaults to 3. +# +#rc_registration_request_burst_count: 3.0 + ## Metrics ### diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index 3bb5b3da37..ad68079eeb 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -23,12 +23,13 @@ class Ratelimiter(object): def __init__(self): self.message_counts = collections.OrderedDict() - def send_message(self, user_id, time_now_s, msg_rate_hz, burst_count, update=True): - """Can the user send a message? + def can_do_action(self, key, time_now_s, rate_hz, burst_count, update=True): + """Can the entity (e.g. user or IP address) perform the action? Args: - user_id: The user sending a message. + key: The key we should use when rate limiting. Can be a user ID + (when sending events), an IP address, etc. time_now_s: The time now. - msg_rate_hz: The long term number of messages a user can send in a + rate_hz: The long term number of messages a user can send in a second. burst_count: How many messages the user can send before being limited. @@ -41,10 +42,10 @@ class Ratelimiter(object): """ self.prune_message_counts(time_now_s) message_count, time_start, _ignored = self.message_counts.get( - user_id, (0., time_now_s, None), + key, (0., time_now_s, None), ) time_delta = time_now_s - time_start - sent_count = message_count - time_delta * msg_rate_hz + sent_count = message_count - time_delta * rate_hz if sent_count < 0: allowed = True time_start = time_now_s @@ -56,13 +57,13 @@ class Ratelimiter(object): message_count += 1 if update: - self.message_counts[user_id] = ( - message_count, time_start, msg_rate_hz + self.message_counts[key] = ( + message_count, time_start, rate_hz ) - if msg_rate_hz > 0: + if rate_hz > 0: time_allowed = ( - time_start + (message_count - burst_count + 1) / msg_rate_hz + time_start + (message_count - burst_count + 1) / rate_hz ) if time_allowed < time_now_s: time_allowed = time_now_s @@ -72,12 +73,12 @@ class Ratelimiter(object): return allowed, time_allowed def prune_message_counts(self, time_now_s): - for user_id in list(self.message_counts.keys()): - message_count, time_start, msg_rate_hz = ( - self.message_counts[user_id] + for key in list(self.message_counts.keys()): + message_count, time_start, rate_hz = ( + self.message_counts[key] ) time_delta = time_now_s - time_start - if message_count - time_delta * msg_rate_hz > 0: + if message_count - time_delta * rate_hz > 0: break else: - del self.message_counts[user_id] + del self.message_counts[key] diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 2881482f96..d32f6fff73 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -54,6 +54,13 @@ class RegistrationConfig(Config): config.get("disable_msisdn_registration", False) ) + self.rc_registration_requests_per_second = config.get( + "rc_registration_requests_per_second", 0.17, + ) + self.rc_registration_request_burst_count = config.get( + "rc_registration_request_burst_count", 3, + ) + def default_config(self, generate_secrets=False, **kwargs): if generate_secrets: registration_shared_secret = 'registration_shared_secret: "%s"' % ( @@ -140,6 +147,17 @@ class RegistrationConfig(Config): # users cannot be auto-joined since they do not exist. # autocreate_auto_join_rooms: true + + # Number of registration requests a client can send per second. + # Defaults to 1/minute (0.17). + # + #rc_registration_requests_per_second: 0.17 + + # Number of registration requests a client can send before being + # throttled. + # Defaults to 3. + # + #rc_registration_request_burst_count: 3.0 """ % locals() def add_arguments(self, parser): diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 594754cfd8..d8d86d6ff3 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -93,9 +93,9 @@ class BaseHandler(object): messages_per_second = self.hs.config.rc_messages_per_second burst_count = self.hs.config.rc_message_burst_count - allowed, time_allowed = self.ratelimiter.send_message( + allowed, time_allowed = self.ratelimiter.can_do_action( user_id, time_now, - msg_rate_hz=messages_per_second, + rate_hz=messages_per_second, burst_count=burst_count, update=update, ) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index c0e06929bd..47d5e276f8 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -24,6 +24,7 @@ from synapse.api.errors import ( AuthError, Codes, InvalidCaptchaError, + LimitExceededError, RegistrationError, SynapseError, ) @@ -60,6 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler + self.ratelimiter = hs.get_ratelimiter() self._next_generated_user_id = None @@ -149,6 +151,7 @@ class RegistrationHandler(BaseHandler): threepid=None, user_type=None, default_display_name=None, + address=None, ): """Registers a new client on the server. @@ -167,6 +170,7 @@ class RegistrationHandler(BaseHandler): api.constants.UserTypes, or None for a normal user. default_display_name (unicode|None): if set, the new user's displayname will be set to this. Defaults to 'localpart'. + address (str|None): the IP address used to perform the regitration. Returns: A tuple of (user_id, access_token). Raises: @@ -206,7 +210,7 @@ class RegistrationHandler(BaseHandler): token = None if generate_token: token = self.macaroon_gen.generate_access_token(user_id) - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -215,6 +219,7 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=default_display_name, admin=admin, user_type=user_type, + address=address, ) if self.hs.config.user_directory_search_all_users: @@ -238,12 +243,13 @@ class RegistrationHandler(BaseHandler): if default_display_name is None: default_display_name = localpart try: - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, make_guest=make_guest, create_profile_with_displayname=default_display_name, + address=address, ) except SynapseError: # if user id is taken, just generate another @@ -337,7 +343,7 @@ class RegistrationHandler(BaseHandler): user_id, allowed_appservice=service ) - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, password_hash="", appservice_id=service_id, @@ -513,7 +519,7 @@ class RegistrationHandler(BaseHandler): token = self.macaroon_gen.generate_access_token(user_id) if need_register: - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -590,10 +596,10 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) - def _register_with_store(self, user_id, token=None, password_hash=None, - was_guest=False, make_guest=False, appservice_id=None, - create_profile_with_displayname=None, admin=False, - user_type=None): + def register_with_store(self, user_id, token=None, password_hash=None, + was_guest=False, make_guest=False, appservice_id=None, + create_profile_with_displayname=None, admin=False, + user_type=None, address=None): """Register user in the datastore. Args: @@ -612,10 +618,26 @@ class RegistrationHandler(BaseHandler): admin (boolean): is an admin user? user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. + address (str|None): the IP address used to perform the regitration. Returns: Deferred """ + # Don't rate limit for app services + if appservice_id is None and address is not None: + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + address, time_now_s=time_now, + rate_hz=self.hs.config.rc_registration_requests_per_second, + burst_count=self.hs.config.rc_registration_request_burst_count, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)), + ) + if self.hs.config.worker_app: return self._register_client( user_id=user_id, @@ -627,6 +649,7 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=create_profile_with_displayname, admin=admin, user_type=user_type, + address=address, ) else: return self.store.register( diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 1d27c9221f..912a5ac341 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -33,11 +33,12 @@ class ReplicationRegisterServlet(ReplicationEndpoint): def __init__(self, hs): super(ReplicationRegisterServlet, self).__init__(hs) self.store = hs.get_datastore() + self.registration_handler = hs.get_registration_handler() @staticmethod def _serialize_payload( user_id, token, password_hash, was_guest, make_guest, appservice_id, - create_profile_with_displayname, admin, user_type, + create_profile_with_displayname, admin, user_type, address, ): """ Args: @@ -56,6 +57,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): admin (boolean): is an admin user? user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. + address (str|None): the IP address used to perform the regitration. """ return { "token": token, @@ -66,13 +68,14 @@ class ReplicationRegisterServlet(ReplicationEndpoint): "create_profile_with_displayname": create_profile_with_displayname, "admin": admin, "user_type": user_type, + "address": address, } @defer.inlineCallbacks def _handle_request(self, request, user_id): content = parse_json_object_from_request(request) - yield self.store.register( + yield self.registration_handler.register_with_store( user_id=user_id, token=content["token"], password_hash=content["password_hash"], @@ -82,6 +85,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): create_profile_with_displayname=content["create_profile_with_displayname"], admin=content["admin"], user_type=content["user_type"], + address=content["address"] ) defer.returnValue((200, {})) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 94cbba4303..b7f354570c 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -25,7 +25,12 @@ from twisted.internet import defer import synapse import synapse.types from synapse.api.constants import LoginType -from synapse.api.errors import Codes, SynapseError, UnrecognizedRequestError +from synapse.api.errors import ( + Codes, + LimitExceededError, + SynapseError, + UnrecognizedRequestError, +) from synapse.config.server import is_threepid_reserved from synapse.http.servlet import ( RestServlet, @@ -191,18 +196,36 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() + self.ratelimiter = hs.get_ratelimiter() + self.clock = hs.get_clock() @interactive_auth_handler @defer.inlineCallbacks def on_POST(self, request): body = parse_json_object_from_request(request) + client_addr = request.getClientIP() + + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + client_addr, time_now_s=time_now, + rate_hz=self.hs.config.rc_registration_requests_per_second, + burst_count=self.hs.config.rc_registration_request_burst_count, + update=False, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)), + ) + kind = b"user" if b"kind" in request.args: kind = request.args[b"kind"][0] if kind == b"guest": - ret = yield self._do_guest_registration(body) + ret = yield self._do_guest_registration(body, address=client_addr) defer.returnValue(ret) return elif kind != b"user": @@ -411,6 +434,7 @@ class RegisterRestServlet(RestServlet): guest_access_token=guest_access_token, generate_token=False, threepid=threepid, + address=client_addr, ) # Necessary due to auth checks prior to the threepid being # written to the db @@ -522,12 +546,13 @@ class RegisterRestServlet(RestServlet): defer.returnValue(result) @defer.inlineCallbacks - def _do_guest_registration(self, params): + def _do_guest_registration(self, params, address=None): if not self.hs.config.allow_guest_access: raise SynapseError(403, "Guest access is disabled") user_id, _ = yield self.registration_handler.register( generate_token=False, - make_guest=True + make_guest=True, + address=address, ) # we don't allow guests to specify their own device_id, because diff --git a/tests/api/test_ratelimiting.py b/tests/api/test_ratelimiting.py index 8933fe3b72..30a255d441 100644 --- a/tests/api/test_ratelimiting.py +++ b/tests/api/test_ratelimiting.py @@ -6,34 +6,34 @@ from tests import unittest class TestRatelimiter(unittest.TestCase): def test_allowed(self): limiter = Ratelimiter() - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=0, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=0, rate_hz=0.1, burst_count=1 ) self.assertTrue(allowed) self.assertEquals(10., time_allowed) - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=5, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=5, rate_hz=0.1, burst_count=1 ) self.assertFalse(allowed) self.assertEquals(10., time_allowed) - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=10, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=10, rate_hz=0.1, burst_count=1 ) self.assertTrue(allowed) self.assertEquals(20., time_allowed) def test_pruning(self): limiter = Ratelimiter() - allowed, time_allowed = limiter.send_message( - user_id="test_id_1", time_now_s=0, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id_1", time_now_s=0, rate_hz=0.1, burst_count=1 ) self.assertIn("test_id_1", limiter.message_counts) - allowed, time_allowed = limiter.send_message( - user_id="test_id_2", time_now_s=10, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id_2", time_now_s=10, rate_hz=0.1, burst_count=1 ) self.assertNotIn("test_id_1", limiter.message_counts) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 80da1c8954..d60c124eec 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -55,11 +55,11 @@ class ProfileTestCase(unittest.TestCase): federation_client=self.mock_federation, federation_server=Mock(), federation_registry=self.mock_registry, - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) self.store = hs.get_datastore() diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 9e9fbbfe93..524af4f8d1 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,10 +31,10 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): hs = self.setup_test_homeserver( "blue", federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - hs.get_ratelimiter().send_message.return_value = (True, 0) + hs.get_ratelimiter().can_do_action.return_value = (True, 0) return hs diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index 483bebc832..36d8547275 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -40,10 +40,10 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): config.auto_join_rooms = [] hs = self.setup_test_homeserver( - config=config, ratelimiter=NonCallableMock(spec_set=["send_message"]) + config=config, ratelimiter=NonCallableMock(spec_set=["can_do_action"]) ) self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index a824be9a62..015c144248 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -41,10 +41,10 @@ class RoomBase(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.ratelimiter = self.hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) self.hs.get_federation_handler = Mock(return_value=Mock()) @@ -96,7 +96,7 @@ class RoomPermissionsTestCase(RoomBase): # auth as user_id now self.helper.auth_user_id = self.user_id - def test_send_message(self): + def test_can_do_action(self): msg_content = b'{"msgtype":"m.text","body":"hello"}' seq = iter(range(100)) diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 0ad814c5e5..30fb77bac8 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -42,13 +42,13 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.event_source = hs.get_event_sources().sources["typing"] self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 906b348d3e..3600434858 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -130,3 +130,51 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"403", channel.result) self.assertEquals(channel.json_body["error"], "Guest access is disabled") + + def test_POST_ratelimiting_guest(self): + self.hs.config.rc_registration_request_burst_count = 5 + + for i in range(0, 6): + url = self.url + b"?kind=guest" + request, channel = self.make_request(b"POST", url, b"{}") + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.reactor.advance(retry_after_ms / 1000.) + + request, channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting(self): + self.hs.config.rc_registration_request_burst_count = 5 + + for i in range(0, 6): + params = { + "username": "kermit" + str(i), + "password": "monkey", + "device_id": "frogfone", + "auth": {"type": LoginType.DUMMY}, + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", self.url, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.reactor.advance(retry_after_ms / 1000.) + + request, channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) diff --git a/tests/test_mau.py b/tests/test_mau.py index 04f95c942f..00be1a8c21 100644 --- a/tests/test_mau.py +++ b/tests/test_mau.py @@ -17,7 +17,7 @@ import json -from mock import Mock, NonCallableMock +from mock import Mock from synapse.api.constants import LoginType from synapse.api.errors import Codes, HttpResponseException, SynapseError @@ -36,7 +36,6 @@ class TestMauLimit(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.store = self.hs.get_datastore() diff --git a/tests/utils.py b/tests/utils.py index ee272157aa..e4c42f9fa8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -150,6 +150,8 @@ def default_config(name): config.admin_contact = None config.rc_messages_per_second = 10000 config.rc_message_burst_count = 10000 + config.rc_registration_request_burst_count = 3.0 + config.rc_registration_requests_per_second = 0.17 config.saml2_enabled = False config.public_baseurl = None config.default_identity_server = None -- cgit 1.5.1 From d7dbad3526136cfc9fdbd568635be5016fb637db Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 5 Mar 2019 18:41:27 +0000 Subject: Split ratelimiters in two (one for events, one for registration) --- synapse/handlers/_base.py | 2 +- synapse/handlers/message.py | 2 +- synapse/handlers/register.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 10 +++++++--- tests/handlers/test_profile.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_typing.py | 2 +- 9 files changed, 15 insertions(+), 11 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index d8d86d6ff3..a2212e2023 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -44,7 +44,7 @@ class BaseHandler(object): self.notifier = hs.get_notifier() self.state_handler = hs.get_state_handler() self.distributor = hs.get_distributor() - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.clock = hs.get_clock() self.hs = hs diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c762b58902..120aa0d017 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -224,7 +224,7 @@ class EventCreationHandler(object): self.profile_handler = hs.get_profile_handler() self.event_builder_factory = hs.get_event_builder_factory() self.server_name = hs.hostname - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.notifier = hs.get_notifier() self.config = hs.config diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 47d5e276f8..03130edc54 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -61,7 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_registration_ratelimiter() self._next_generated_user_id = None diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index b7f354570c..6f34029431 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -196,7 +196,7 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_registration_ratelimiter() self.clock = hs.get_clock() @interactive_auth_handler diff --git a/synapse/server.py b/synapse/server.py index 4323e7ff12..f3ca3e259a 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -205,7 +205,8 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() - self.ratelimiter = Ratelimiter() + self.events_ratelimiter = Ratelimiter() + self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -248,8 +249,11 @@ class HomeServer(object): def get_distributor(self): return self.distributor - def get_ratelimiter(self): - return self.ratelimiter + def get_events_ratelimiter(self): + return self.events_ratelimiter + + def get_registration_ratelimiter(self): + return self.registration_ratelimiter def build_federation_client(self): return FederationClient(self) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index d60c124eec..905816a44b 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -58,7 +58,7 @@ class ProfileTestCase(unittest.TestCase): ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) self.store = hs.get_datastore() diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 524af4f8d1..b293e04355 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -34,7 +34,7 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - hs.get_ratelimiter().can_do_action.return_value = (True, 0) + hs.get_events_ratelimiter().can_do_action.return_value = (True, 0) return hs diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index 36d8547275..cd328dc5f1 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -42,7 +42,7 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): hs = self.setup_test_homeserver( config=config, ratelimiter=NonCallableMock(spec_set=["can_do_action"]) ) - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 30fb77bac8..2e2e314a49 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): self.event_source = hs.get_event_sources().sources["typing"] - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() -- cgit 1.5.1 From f4195f41188928b8da9bed38c60e221466274a48 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 6 Mar 2019 10:55:22 +0000 Subject: Revert "Split ratelimiters in two (one for events, one for registration)" This reverts commit d7dbad3526136cfc9fdbd568635be5016fb637db. --- synapse/handlers/_base.py | 2 +- synapse/handlers/message.py | 2 +- synapse/handlers/register.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 10 +++------- tests/handlers/test_profile.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_typing.py | 2 +- 9 files changed, 11 insertions(+), 15 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index a2212e2023..d8d86d6ff3 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -44,7 +44,7 @@ class BaseHandler(object): self.notifier = hs.get_notifier() self.state_handler = hs.get_state_handler() self.distributor = hs.get_distributor() - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.clock = hs.get_clock() self.hs = hs diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 120aa0d017..c762b58902 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -224,7 +224,7 @@ class EventCreationHandler(object): self.profile_handler = hs.get_profile_handler() self.event_builder_factory = hs.get_event_builder_factory() self.server_name = hs.hostname - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.notifier = hs.get_notifier() self.config = hs.config diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 03130edc54..47d5e276f8 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -61,7 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler - self.ratelimiter = hs.get_registration_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self._next_generated_user_id = None diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 6f34029431..b7f354570c 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -196,7 +196,7 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() - self.ratelimiter = hs.get_registration_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.clock = hs.get_clock() @interactive_auth_handler diff --git a/synapse/server.py b/synapse/server.py index f3ca3e259a..4323e7ff12 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -205,8 +205,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() - self.events_ratelimiter = Ratelimiter() - self.registration_ratelimiter = Ratelimiter() + self.ratelimiter = Ratelimiter() self.datastore = None @@ -249,11 +248,8 @@ class HomeServer(object): def get_distributor(self): return self.distributor - def get_events_ratelimiter(self): - return self.events_ratelimiter - - def get_registration_ratelimiter(self): - return self.registration_ratelimiter + def get_ratelimiter(self): + return self.ratelimiter def build_federation_client(self): return FederationClient(self) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 905816a44b..d60c124eec 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -58,7 +58,7 @@ class ProfileTestCase(unittest.TestCase): ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) self.store = hs.get_datastore() diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index b293e04355..524af4f8d1 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -34,7 +34,7 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - hs.get_events_ratelimiter().can_do_action.return_value = (True, 0) + hs.get_ratelimiter().can_do_action.return_value = (True, 0) return hs diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index cd328dc5f1..36d8547275 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -42,7 +42,7 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): hs = self.setup_test_homeserver( config=config, ratelimiter=NonCallableMock(spec_set=["can_do_action"]) ) - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 2e2e314a49..30fb77bac8 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): self.event_source = hs.get_event_sources().sources["typing"] - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() -- cgit 1.5.1 From 6f3cde8b2500aafad2438de7eddfc442ec5288c7 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 6 Mar 2019 11:02:42 +0000 Subject: Make registration ratelimiter separate from the main events one --- synapse/handlers/register.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 47d5e276f8..03130edc54 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -61,7 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_registration_ratelimiter() self._next_generated_user_id = None diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index b7f354570c..6f34029431 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -196,7 +196,7 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_registration_ratelimiter() self.clock = hs.get_clock() @interactive_auth_handler diff --git a/synapse/server.py b/synapse/server.py index 4323e7ff12..72835e8c86 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -206,6 +206,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() self.ratelimiter = Ratelimiter() + self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -251,6 +252,9 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter + def get_registration_ratelimiter(self): + return self.registration_ratelimiter + def build_federation_client(self): return FederationClient(self) -- cgit 1.5.1 From a9de04be724be9e19af0a5a5839c65924f90886a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 10:31:21 +0000 Subject: Implement soft fail --- synapse/events/__init__.py | 14 ++++++++ synapse/handlers/federation.py | 77 +++++++++++++++++++++++++++++++++++++++++- synapse/storage/events.py | 1 + synapse/visibility.py | 4 +++ 4 files changed, 95 insertions(+), 1 deletion(-) (limited to 'synapse/handlers') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 20c1ab4203..bd130f8816 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -77,6 +77,20 @@ class _EventInternalMetadata(object): """ return getattr(self, "recheck_redaction", False) + def is_soft_failed(self): + """Whether the event has been soft failed. + + Soft failed events should be handled as usual, except: + 1. They should not go down sync or event streams, or generally + sent to clients. + 2. They should not be added to the forward extremities (and + therefore not to current state). + + Returns: + bool + """ + return getattr(self, "soft_failed", False) + def _event_dict_property(key): # We want to be able to use hasattr with the event dict properties. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 72b63d64d0..a75abe8e91 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -45,6 +45,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.crypto.event_signing import compute_event_signature +from synapse.event_auth import auth_types_for_event from synapse.events.validator import EventValidator from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -1628,6 +1629,7 @@ class FederationHandler(BaseHandler): origin, event, state=state, auth_events=auth_events, + backfilled=backfilled, ) # reraise does not allow inlineCallbacks to preserve the stacktrace, so we @@ -1672,6 +1674,7 @@ class FederationHandler(BaseHandler): event, state=ev_info.get("state"), auth_events=ev_info.get("auth_events"), + backfilled=backfilled, ) defer.returnValue(res) @@ -1794,7 +1797,7 @@ class FederationHandler(BaseHandler): ) @defer.inlineCallbacks - def _prep_event(self, origin, event, state=None, auth_events=None): + def _prep_event(self, origin, event, state, auth_events, backfilled): """ Args: @@ -1802,6 +1805,7 @@ class FederationHandler(BaseHandler): event: state: auth_events: + backfilled (bool) Returns: Deferred, which resolves to synapse.events.snapshot.EventContext @@ -1843,6 +1847,77 @@ class FederationHandler(BaseHandler): context.rejected = RejectedReason.AUTH_ERROR + # For new (non-backfilled and non-outlier) events we check if the event + # passes auth based on the current state. If it doesn't then we + # "soft-fail" the event. + do_soft_fail_check = not backfilled and not event.internal_metadata.is_outlier() + if do_soft_fail_check: + extrem_ids = yield self.store.get_latest_event_ids_in_room( + event.room_id, + ) + + extrem_ids = set(extrem_ids) + prev_event_ids = set(event.prev_event_ids()) + + if extrem_ids == prev_event_ids: + # If they're the same then the current state is the same as the + # state at the event, so no point rechecking auth for soft fail. + do_soft_fail_check = False + + if do_soft_fail_check: + room_version = yield self.store.get_room_version(event.room_id) + + # Calculate the "current state". + if state is not None: + # If we're explicitly given the state then we won't have all the + # prev events, and so we have a gap in the graph. In this case + # we want to be a little careful as we might have been down for + # a while and have an incorrect view of the current state, + # however we still want to do checks as gaps are easy to + # maliciously manufacture. + # + # So we use a "current state" that is actually a state + # resolution across the current forward extremities and the + # given state at the event. This should correctly handle cases + # like bans, especially with state res v2. + + state_sets = yield self.store.get_state_groups( + event.room_id, extrem_ids, + ) + state_sets = list(state_sets.values()) + state_sets.append(state) + current_state_ids = yield self.state_handler.resolve_events( + room_version, state_sets, event, + ) + current_state_ids = { + k: e.event_id for k, e in iteritems(current_state_ids) + } + else: + current_state_ids = yield self.state_handler.get_current_state_ids( + event.room_id, latest_event_ids=extrem_ids, + ) + + # Now check if event pass auth against said current state + auth_types = auth_types_for_event(event) + current_state_ids = [ + e for k, e in iteritems(current_state_ids) + if k in auth_types + ] + + current_auth_events = yield self.store.get_events(current_state_ids) + current_auth_events = { + (e.type, e.state_key): e for e in current_auth_events.values() + } + + try: + self.auth.check(room_version, event, auth_events=current_auth_events) + except AuthError as e: + logger.warn( + "Failed current state auth resolution for %r because %s", + event, e, + ) + event.internal_metadata.soft_failed = True + if event.type == EventTypes.GuestAccess and not context.rejected: yield self.maybe_kick_guest_users(event) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 06db9e56e6..990a5eaaae 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -537,6 +537,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore new_events = [ event for event, ctx in event_contexts if not event.internal_metadata.is_outlier() and not ctx.rejected + and not event.internal_metadata.is_soft_failed() ] # start with the existing forward extremities diff --git a/synapse/visibility.py b/synapse/visibility.py index efec21673b..16c40cd74c 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -67,6 +67,10 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, Returns: Deferred[list[synapse.events.EventBase]] """ + # Filter out events that have been soft failed so that we don't relay them + # to clients. + events = list(e for e in events if not e.internal_metadata.is_soft_failed()) + types = ( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), -- cgit 1.5.1 From 6d13bdec91e228a54a856ebe0e104062d96a4180 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Mar 2019 17:21:08 +0000 Subject: Add docstrings from matrix-org-hotfixes --- synapse/handlers/sync.py | 33 ++++++++++++++++++++++++++------- synapse/storage/stream.py | 19 +++++++++++++++++++ 2 files changed, 45 insertions(+), 7 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index bd97241ab4..42f514cd10 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1894,15 +1894,34 @@ def _calculate_state( class SyncResultBuilder(object): - "Used to help build up a new SyncResult for a user" + """Used to help build up a new SyncResult for a user + + Attributes: + sync_config (SyncConfig) + full_state (bool) + since_token (StreamToken) + now_token (StreamToken) + joined_room_ids (list[str]) + + # The following mirror the fields in a sync response + presence (list) + account_data (list) + joined (list[JoinedSyncResult]) + invited (list[InvitedSyncResult]) + archived (list[ArchivedSyncResult]) + device (list) + groups (GroupsSyncResult|None) + to_device (list) + """ def __init__(self, sync_config, full_state, since_token, now_token, joined_room_ids): """ Args: - sync_config(SyncConfig) - full_state(bool): The full_state flag as specified by user - since_token(StreamToken): The token supplied by user, or None. - now_token(StreamToken): The token to sync up to. + sync_config (SyncConfig) + full_state (bool): The full_state flag as specified by user + since_token (StreamToken): The token supplied by user, or None. + now_token (StreamToken): The token to sync up to. + joined_room_ids (list[str]): List of rooms the user is joined to """ self.sync_config = sync_config self.full_state = full_state @@ -1930,8 +1949,8 @@ class RoomSyncResultBuilder(object): Args: room_id(str) rtype(str): One of `"joined"` or `"archived"` - events(list): List of events to include in the room, (more events - may be added when generating result). + events(list[FrozenEvent]): List of events to include in the room + (more events may be added when generating result). newly_joined(bool): If the user has newly joined the room full_state(bool): Whether the full state should be sent in result since_token(StreamToken): Earliest point to return events from, or None diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index d6cfdba519..580fafeb3a 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -191,6 +191,25 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, order='DESC'): + """Get new room events in stream ordering since `from_key`. + + Args: + room_id (str) + from_key (str): Token from which no events are returned before + to_key (str): Token from which no events are returned after. (This + is typically the current stream token) + limit (int): Maximum number of events to return + order (str): Either "DESC" or "ASC". Determines which events are + returned when the result is limited. If "DESC" then the most + recent `limit` events are returned, otherwise returns the + oldest `limit` events. + + Returns: + Deferred[dict[str,tuple[list[FrozenEvent], str]]] + A map from room id to a tuple containing: + - list of recent events in the room + - stream ordering key for the start of the chunk of events returned. + """ from_id = RoomStreamToken.parse_stream_token(from_key).stream room_ids = yield self._events_stream_cache.get_entities_changed( -- cgit 1.5.1 From 8b7790e68f552748b0fe20455c766a2376c2fefd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Mar 2019 17:29:15 +0000 Subject: Port #4422 debug logging from hotfixes --- synapse/handlers/sync.py | 53 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 7 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index bd97241ab4..32101eb36a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -39,6 +39,9 @@ from synapse.visibility import filter_events_for_client logger = logging.getLogger(__name__) +# Debug logger for https://github.com/matrix-org/synapse/issues/4422 +issue4422_logger = logging.getLogger("synapse.handler.sync.4422_debug") + # Counts the number of times we returned a non-empty sync. `type` is one of # "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is @@ -962,6 +965,15 @@ class SyncHandler(object): yield self._generate_sync_entry_for_groups(sync_result_builder) + # debug for https://github.com/matrix-org/synapse/issues/4422 + for joined_room in sync_result_builder.joined: + room_id = joined_room.room_id + if room_id in newly_joined_rooms: + issue4422_logger.debug( + "Sync result for newly joined room %s: %r", + room_id, joined_room, + ) + defer.returnValue(SyncResult( presence=sync_result_builder.presence, account_data=sync_result_builder.account_data, @@ -1425,6 +1437,17 @@ class SyncHandler(object): old_mem_ev = yield self.store.get_event( old_mem_ev_id, allow_none=True ) + + # debug for #4422 + if has_join: + prev_membership = None + if old_mem_ev: + prev_membership = old_mem_ev.membership + issue4422_logger.debug( + "Previous membership for room %s with join: %s (event %s)", + room_id, prev_membership, old_mem_ev_id, + ) + if not old_mem_ev or old_mem_ev.membership != Membership.JOIN: newly_joined_rooms.append(room_id) @@ -1519,30 +1542,39 @@ class SyncHandler(object): for room_id in sync_result_builder.joined_room_ids: room_entry = room_to_events.get(room_id, None) + newly_joined = room_id in newly_joined_rooms if room_entry: events, start_key = room_entry prev_batch_token = now_token.copy_and_replace("room_key", start_key) - room_entries.append(RoomSyncResultBuilder( + entry = RoomSyncResultBuilder( room_id=room_id, rtype="joined", events=events, - newly_joined=room_id in newly_joined_rooms, + newly_joined=newly_joined, full_state=False, - since_token=None if room_id in newly_joined_rooms else since_token, + since_token=None if newly_joined else since_token, upto_token=prev_batch_token, - )) + ) else: - room_entries.append(RoomSyncResultBuilder( + entry = RoomSyncResultBuilder( room_id=room_id, rtype="joined", events=[], - newly_joined=room_id in newly_joined_rooms, + newly_joined=newly_joined, full_state=False, since_token=since_token, upto_token=since_token, - )) + ) + + if newly_joined: + # debugging for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "RoomSyncResultBuilder events for newly joined room %s: %r", + room_id, entry.events, + ) + room_entries.append(entry) defer.returnValue((room_entries, invited, newly_joined_rooms, newly_left_rooms)) @@ -1663,6 +1695,13 @@ class SyncHandler(object): newly_joined_room=newly_joined, ) + if newly_joined: + # debug for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "Timeline events after filtering in newly-joined room %s: %r", + room_id, batch, + ) + # When we join the room (or the client requests full_state), we should # send down any existing tags. Usually the user won't have tags in a # newly joined room, unless either a) they've joined before or b) the -- cgit 1.5.1 From f6135d06cf94fdef9942051f43872c7518511e74 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 7 Mar 2019 01:22:53 -0800 Subject: Rewrite userdir to be faster (#4537) --- changelog.d/4537.feature | 1 + synapse/handlers/user_directory.py | 222 ++++---------------- synapse/storage/schema/delta/53/user_share.sql | 47 +++++ synapse/storage/user_directory.py | 271 ++++++++----------------- tests/handlers/test_user_directory.py | 266 ++++++++++++++++++++---- tests/storage/test_user_directory.py | 2 - 6 files changed, 400 insertions(+), 409 deletions(-) create mode 100644 changelog.d/4537.feature create mode 100644 synapse/storage/schema/delta/53/user_share.sql (limited to 'synapse/handlers') diff --git a/changelog.d/4537.feature b/changelog.d/4537.feature new file mode 100644 index 0000000000..8f792b8890 --- /dev/null +++ b/changelog.d/4537.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 283c6c1b81..c21da8343a 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -15,7 +15,7 @@ import logging -from six import iteritems +from six import iteritems, iterkeys from twisted.internet import defer @@ -63,10 +63,6 @@ class UserDirectoryHandler(object): # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already self.initially_handled_users = set() - self.initially_handled_users_in_public = set() - - self.initially_handled_users_share = set() - self.initially_handled_users_share_private_room = set() # The current position in the current_state_delta stream self.pos = None @@ -140,7 +136,6 @@ class UserDirectoryHandler(object): # FIXME(#3714): We should probably do this in the same worker as all # the other changes. yield self.store.remove_from_user_dir(user_id) - yield self.store.remove_from_user_in_public_room(user_id) @defer.inlineCallbacks def _unsafe_process(self): @@ -215,15 +210,13 @@ class UserDirectoryHandler(object): logger.info("Processed all users") self.initially_handled_users = None - self.initially_handled_users_in_public = None - self.initially_handled_users_share = None - self.initially_handled_users_share_private_room = None yield self.store.update_user_directory_stream_pos(new_pos) @defer.inlineCallbacks def _handle_initial_room(self, room_id): - """Called when we initially fill out user_directory one room at a time + """ + Called when we initially fill out user_directory one room at a time """ is_in_room = yield self.store.is_host_joined(room_id, self.server_name) if not is_in_room: @@ -238,23 +231,15 @@ class UserDirectoryHandler(object): unhandled_users = user_ids - self.initially_handled_users yield self.store.add_profiles_to_user_dir( - room_id, {user_id: users_with_profile[user_id] for user_id in unhandled_users}, ) self.initially_handled_users |= unhandled_users - if is_public: - yield self.store.add_users_to_public_room( - room_id, user_ids=user_ids - self.initially_handled_users_in_public - ) - self.initially_handled_users_in_public |= user_ids - # We now go and figure out the new users who share rooms with user entries # We sleep aggressively here as otherwise it can starve resources. # We also batch up inserts/updates, but try to avoid too many at once. to_insert = set() - to_update = set() count = 0 for user_id in user_ids: if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: @@ -277,21 +262,7 @@ class UserDirectoryHandler(object): count += 1 user_set = (user_id, other_user_id) - - if user_set in self.initially_handled_users_share_private_room: - continue - - if user_set in self.initially_handled_users_share: - if is_public: - continue - to_update.add(user_set) - else: - to_insert.add(user_set) - - if is_public: - self.initially_handled_users_share.add(user_set) - else: - self.initially_handled_users_share_private_room.add(user_set) + to_insert.add(user_set) if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: yield self.store.add_users_who_share_room( @@ -299,22 +270,10 @@ class UserDirectoryHandler(object): ) to_insert.clear() - if len(to_update) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - to_update.clear() - if to_insert: yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) to_insert.clear() - if to_update: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - to_update.clear() - @defer.inlineCallbacks def _handle_deltas(self, deltas): """Called with the state deltas to process @@ -356,6 +315,7 @@ class UserDirectoryHandler(object): user_ids = yield self.store.get_users_in_dir_due_to_room( room_id ) + for user_id in user_ids: yield self._handle_remove_user(room_id, user_id) return @@ -436,14 +396,20 @@ class UserDirectoryHandler(object): # ignore the change return - if change: - users_with_profile = yield self.state.get_current_user_in_room(room_id) - for user_id, profile in iteritems(users_with_profile): - yield self._handle_new_user(room_id, user_id, profile) - else: - users = yield self.store.get_users_in_public_due_to_room(room_id) - for user_id in users: - yield self._handle_remove_user(room_id, user_id) + users_with_profile = yield self.state.get_current_user_in_room(room_id) + + # Remove every user from the sharing tables for that room. + for user_id in iterkeys(users_with_profile): + yield self.store.remove_user_who_share_room(user_id, room_id) + + # Then, re-add them to the tables. + # NOTE: this is not the most efficient method, as handle_new_user sets + # up local_user -> other_user and other_user_whos_local -> local_user, + # which when ran over an entire room, will result in the same values + # being added multiple times. The batching upserts shouldn't make this + # too bad, though. + for user_id, profile in iteritems(users_with_profile): + yield self._handle_new_user(room_id, user_id, profile) @defer.inlineCallbacks def _handle_local_user(self, user_id): @@ -457,7 +423,7 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_directory(user_id) if not row: - yield self.store.add_profiles_to_user_dir(None, {user_id: profile}) + yield self.store.add_profiles_to_user_dir({user_id: profile}) @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): @@ -471,55 +437,27 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_directory(user_id) if not row: - yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + yield self.store.add_profiles_to_user_dir({user_id: profile}) is_public = yield self.store.is_room_world_readable_or_publicly_joinable( room_id ) - - if is_public: - row = yield self.store.get_user_in_public_room(user_id) - if not row: - yield self.store.add_users_to_public_room(room_id, [user_id]) - else: - logger.debug("Not adding new user to public dir, %r", user_id) - - # Now we update users who share rooms with users. We do this by getting - # all the current users in the room and seeing which aren't already - # marked in the database as sharing with `user_id` - + # Now we update users who share rooms with users. users_with_profile = yield self.state.get_current_user_in_room(room_id) to_insert = set() - to_update = set() - - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) # First, if they're our user then we need to update for every user - if self.is_mine_id(user_id) and not is_appservice: - # Returns a map of other_user_id -> shared_private. We only need - # to update mappings if for users that either don't share a room - # already (aren't in the map) or, if the room is private, those that - # only share a public room. - user_ids_shared = yield self.store.get_users_who_share_room_from_dir( - user_id - ) + if self.is_mine_id(user_id): - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue + is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + + # We don't care about appservice users. + if not is_appservice: + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - shared_is_private = user_ids_shared.get(other_user_id) - if shared_is_private is True: - # We've already marked in the database they share a private room - continue - elif shared_is_private is False: - # They already share a public room, so only update if this is - # a private room - if not is_public: - to_update.add((user_id, other_user_id)) - elif shared_is_private is None: - # This is the first time they both share a room to_insert.add((user_id, other_user_id)) # Next we need to update for every local user in the room @@ -531,29 +469,11 @@ class UserDirectoryHandler(object): other_user_id ) if self.is_mine_id(other_user_id) and not is_appservice: - shared_is_private = yield self.store.get_if_users_share_a_room( - other_user_id, user_id - ) - if shared_is_private is True: - # We've already marked in the database they share a private room - continue - elif shared_is_private is False: - # They already share a public room, so only update if this is - # a private room - if not is_public: - to_update.add((other_user_id, user_id)) - elif shared_is_private is None: - # This is the first time they both share a room - to_insert.add((other_user_id, user_id)) + to_insert.add((other_user_id, user_id)) if to_insert: yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - if to_update: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): """Called when we might need to remove user to directory @@ -562,84 +482,16 @@ class UserDirectoryHandler(object): room_id (str): room_id that user left or stopped being public that user_id (str) """ - logger.debug("Maybe removing user %r", user_id) - - row = yield self.store.get_user_in_directory(user_id) - update_user_dir = row and row["room_id"] == room_id - - row = yield self.store.get_user_in_public_room(user_id) - update_user_in_public = row and row["room_id"] == room_id - - if update_user_in_public or update_user_dir: - # XXX: Make this faster? - rooms = yield self.store.get_rooms_for_user(user_id) - for j_room_id in rooms: - if not update_user_in_public and not update_user_dir: - break - - is_in_room = yield self.store.is_host_joined( - j_room_id, self.server_name - ) - - if not is_in_room: - continue - - if update_user_dir: - update_user_dir = False - yield self.store.update_user_in_user_dir(user_id, j_room_id) + logger.debug("Removing user %r", user_id) - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) + # Remove user from sharing tables + yield self.store.remove_user_who_share_room(user_id, room_id) - if update_user_in_public and is_public: - yield self.store.update_user_in_public_user_list(user_id, j_room_id) - update_user_in_public = False + # Are they still in a room with members? If not, remove them entirely. + users_in_room_with = yield self.store.get_users_who_share_room_from_dir(user_id) - if update_user_dir: + if len(users_in_room_with) == 0: yield self.store.remove_from_user_dir(user_id) - elif update_user_in_public: - yield self.store.remove_from_user_in_public_room(user_id) - - # Now handle users_who_share_rooms. - - # Get a list of user tuples that were in the DB due to this room and - # users (this includes tuples where the other user matches `user_id`) - user_tuples = yield self.store.get_users_in_share_dir_with_room_id( - user_id, room_id - ) - - for user_id, other_user_id in user_tuples: - # For each user tuple get a list of rooms that they still share, - # trying to find a private room, and update the entry in the DB - rooms = yield self.store.get_rooms_in_common_for_users( - user_id, other_user_id - ) - - # If they dont share a room anymore, remove the mapping - if not rooms: - yield self.store.remove_user_who_share_room(user_id, other_user_id) - continue - - found_public_share = None - for j_room_id in rooms: - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) - - if is_public: - found_public_share = j_room_id - else: - found_public_share = None - yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)] - ) - break - - if found_public_share: - yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)] - ) @defer.inlineCallbacks def _handle_profile_change(self, user_id, room_id, prev_event_id, event_id): diff --git a/synapse/storage/schema/delta/53/user_share.sql b/synapse/storage/schema/delta/53/user_share.sql new file mode 100644 index 0000000000..14424ded0c --- /dev/null +++ b/synapse/storage/schema/delta/53/user_share.sql @@ -0,0 +1,47 @@ +/* Copyright 2017 Vector Creations Ltd, 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Old disused version of the tables below. +DROP TABLE IF EXISTS users_who_share_rooms; + +-- This is no longer used because it's duplicated by the users_who_share_public_rooms +DROP TABLE IF EXISTS users_in_public_rooms; + +-- Tables keeping track of what users share rooms. This is a map of local users +-- to local or remote users, per room. Remote users cannot be in the user_id +-- column, only the other_user_id column. There are two tables, one for public +-- rooms and those for private rooms. +CREATE TABLE IF NOT EXISTS users_who_share_public_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS users_who_share_private_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_who_share_public_rooms_u_idx ON users_who_share_public_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_public_rooms_r_idx ON users_who_share_public_rooms(room_id); +CREATE INDEX users_who_share_public_rooms_o_idx ON users_who_share_public_rooms(other_user_id); + +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id); + +-- Make sure that we populate the tables initially by resetting the stream ID +UPDATE user_directory_stream_pos SET stream_id = NULL; diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index fea866c043..2317d22ed6 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -63,31 +63,14 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(False) - @defer.inlineCallbacks - def add_users_to_public_room(self, room_id, user_ids): - """Add user to the list of users in public rooms - - Args: - room_id (str): A room_id that all users are in that is world_readable - or publically joinable - user_ids (list(str)): Users to add - """ - yield self._simple_insert_many( - table="users_in_public_rooms", - values=[{"user_id": user_id, "room_id": room_id} for user_id in user_ids], - desc="add_users_to_public_room", - ) - for user_id in user_ids: - self.get_user_in_public_room.invalidate((user_id,)) - - def add_profiles_to_user_dir(self, room_id, users_with_profile): + def add_profiles_to_user_dir(self, users_with_profile): """Add profiles to the user directory Args: - room_id (str): A room_id that all users are joined to users_with_profile (dict): Users to add to directory in the form of mapping of user_id -> ProfileInfo """ + if isinstance(self.database_engine, PostgresEngine): # We weight the loclpart most highly, then display name and finally # server name @@ -113,7 +96,7 @@ class UserDirectoryStore(SQLBaseStore): INSERT INTO user_directory_search(user_id, value) VALUES (?,?) """ - args = ( + args = tuple( ( user_id, "%s %s" % (user_id, p.display_name) if p.display_name else user_id, @@ -132,7 +115,7 @@ class UserDirectoryStore(SQLBaseStore): values=[ { "user_id": user_id, - "room_id": room_id, + "room_id": None, "display_name": profile.display_name, "avatar_url": profile.avatar_url, } @@ -250,16 +233,6 @@ class UserDirectoryStore(SQLBaseStore): "update_profile_in_user_dir", _update_profile_in_user_dir_txn ) - @defer.inlineCallbacks - def update_user_in_public_user_list(self, user_id, room_id): - yield self._simple_update_one( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - updatevalues={"room_id": room_id}, - desc="update_user_in_public_user_list", - ) - self.get_user_in_public_room.invalidate((user_id,)) - def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): self._simple_delete_txn( @@ -269,62 +242,50 @@ class UserDirectoryStore(SQLBaseStore): txn, table="user_directory_search", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + txn, + table="users_who_share_public_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_public_rooms", + keyvalues={"other_user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id}, ) txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - txn.call_after(self.get_user_in_public_room.invalidate, (user_id,)) return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) - @defer.inlineCallbacks - def remove_from_user_in_public_room(self, user_id): - yield self._simple_delete( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - desc="remove_from_user_in_public_room", - ) - self.get_user_in_public_room.invalidate((user_id,)) - - def get_users_in_public_due_to_room(self, room_id): - """Get all user_ids that are in the room directory because they're - in the given room_id - """ - return self._simple_select_onecol( - table="users_in_public_rooms", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_public_due_to_room", - ) - @defer.inlineCallbacks def get_users_in_dir_due_to_room(self, room_id): """Get all user_ids that are in the room directory because they're in the given room_id """ - user_ids_dir = yield self._simple_select_onecol( - table="user_directory", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids_pub = yield self._simple_select_onecol( - table="users_in_public_rooms", + user_ids_share_pub = yield self._simple_select_onecol( + table="users_who_share_public_rooms", keyvalues={"room_id": room_id}, - retcol="user_id", + retcol="other_user_id", desc="get_users_in_dir_due_to_room", ) - user_ids_share = yield self._simple_select_onecol( - table="users_who_share_rooms", + user_ids_share_priv = yield self._simple_select_onecol( + table="users_who_share_private_rooms", keyvalues={"room_id": room_id}, - retcol="user_id", + retcol="other_user_id", desc="get_users_in_dir_due_to_room", ) - user_ids = set(user_ids_dir) - user_ids.update(user_ids_pub) - user_ids.update(user_ids_share) + user_ids = set(user_ids_share_pub) + user_ids.update(user_ids_share_priv) defer.returnValue(user_ids) @@ -351,7 +312,7 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue([name for name, in rows]) def add_users_who_share_room(self, room_id, share_private, user_id_tuples): - """Insert entries into the users_who_share_rooms table. The first + """Insert entries into the users_who_share_*_rooms table. The first user should be a local user. Args: @@ -361,109 +322,71 @@ class UserDirectoryStore(SQLBaseStore): """ def _add_users_who_share_room_txn(txn): - self._simple_insert_many_txn( + + if share_private: + tbl = "users_who_share_private_rooms" + else: + tbl = "users_who_share_public_rooms" + + self._simple_upsert_many_txn( txn, - table="users_who_share_rooms", - values=[ - { - "user_id": user_id, - "other_user_id": other_user_id, - "room_id": room_id, - "share_private": share_private, - } + table=tbl, + key_names=["user_id", "other_user_id", "room_id"], + key_values=[ + (user_id, other_user_id, room_id) for user_id, other_user_id in user_id_tuples ], + value_names=(), + value_values=None, ) for user_id, other_user_id in user_id_tuples: txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) - ) return self.runInteraction( "add_users_who_share_room", _add_users_who_share_room_txn ) - def update_users_who_share_room(self, room_id, share_private, user_id_sets): - """Updates entries in the users_who_share_rooms table. The first - user should be a local user. - - Args: - room_id (str) - share_private (bool): Is the room private - user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. + def remove_user_who_share_room(self, user_id, room_id): """ - - def _update_users_who_share_room_txn(txn): - sql = """ - UPDATE users_who_share_rooms - SET room_id = ?, share_private = ? - WHERE user_id = ? AND other_user_id = ? - """ - txn.executemany( - sql, ((room_id, share_private, uid, oid) for uid, oid in user_id_sets) - ) - for user_id, other_user_id in user_id_sets: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) - ) - - return self.runInteraction( - "update_users_who_share_room", _update_users_who_share_room_txn - ) - - def remove_user_who_share_room(self, user_id, other_user_id): - """Deletes entries in the users_who_share_rooms table. The first + Deletes entries in the users_who_share_*_rooms table. The first user should be a local user. Args: + user_id (str) room_id (str) - share_private (bool): Is the room private - user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _remove_user_who_share_room_txn(txn): self._simple_delete_txn( txn, - table="users_who_share_rooms", - keyvalues={"user_id": user_id, "other_user_id": other_user_id}, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id, "room_id": room_id}, ) - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id, "room_id": room_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_public_rooms", + keyvalues={"user_id": user_id, "room_id": room_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_public_rooms", + keyvalues={"other_user_id": user_id, "room_id": room_id}, ) txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) + self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) - @cached(max_entries=500000) - def get_if_users_share_a_room(self, user_id, other_user_id): - """Gets if users share a room. - - Args: - user_id (str): Must be a local user_id - other_user_id (str) - - Returns: - bool|None: None if they don't share a room, otherwise whether they - share a private room or not. - """ - return self._simple_select_one_onecol( - table="users_who_share_rooms", - keyvalues={"user_id": user_id, "other_user_id": other_user_id}, - retcol="share_private", - allow_none=True, - desc="get_if_users_share_a_room", - ) - @cachedInlineCallbacks(max_entries=500000, iterable=True) def get_users_who_share_room_from_dir(self, user_id): """Returns the set of users who share a room with `user_id` @@ -472,32 +395,29 @@ class UserDirectoryStore(SQLBaseStore): user_id(str): Must be a local user Returns: - dict: user_id -> share_private mapping + list: user_id """ - rows = yield self._simple_select_list( - table="users_who_share_rooms", + rows = yield self._simple_select_onecol( + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + retcol="other_user_id", + desc="get_users_who_share_room_with_user", + ) + + pub_rows = yield self._simple_select_onecol( + table="users_who_share_public_rooms", keyvalues={"user_id": user_id}, - retcols=("other_user_id", "share_private"), + retcol="other_user_id", desc="get_users_who_share_room_with_user", ) - defer.returnValue({row["other_user_id"]: row["share_private"] for row in rows}) + users = set(pub_rows) + users.update(rows) - def get_users_in_share_dir_with_room_id(self, user_id, room_id): - """Get all user tuples that are in the users_who_share_rooms due to the - given room_id. + # Remove the user themselves from this list. + users.discard(user_id) - Returns: - [(user_id, other_user_id)]: where one of the two will match the given - user_id. - """ - sql = """ - SELECT user_id, other_user_id FROM users_who_share_rooms - WHERE room_id = ? AND (user_id = ? OR other_user_id = ?) - """ - return self._execute( - "get_users_in_share_dir_with_room_id", None, sql, room_id, user_id, user_id - ) + defer.returnValue(list(users)) @defer.inlineCallbacks def get_rooms_in_common_for_users(self, user_id, other_user_id): @@ -532,12 +452,10 @@ class UserDirectoryStore(SQLBaseStore): def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") - txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_rooms") + txn.execute("DELETE FROM users_who_share_public_rooms") + txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) - txn.call_after(self.get_user_in_public_room.invalidate_all) txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) - txn.call_after(self.get_if_users_share_a_room.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -548,21 +466,11 @@ class UserDirectoryStore(SQLBaseStore): return self._simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, - retcols=("room_id", "display_name", "avatar_url"), + retcols=("display_name", "avatar_url"), allow_none=True, desc="get_user_in_directory", ) - @cached() - def get_user_in_public_room(self, user_id): - return self._simple_select_one( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - retcols=("room_id",), - allow_none=True, - desc="get_user_in_public_room", - ) - def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", @@ -660,14 +568,15 @@ class UserDirectoryStore(SQLBaseStore): where_clause = "1=1" else: join_clause = """ - LEFT JOIN users_in_public_rooms AS p USING (user_id) LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_rooms - WHERE user_id = ? AND share_private - ) AS s USING (user_id) + SELECT other_user_id AS user_id FROM users_who_share_public_rooms + UNION + SELECT other_user_id AS user_id FROM users_who_share_private_rooms + WHERE user_id = ? + ) AS p USING (user_id) """ join_args = (user_id,) - where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" + where_clause = "p.user_id IS NOT NULL" if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -686,7 +595,7 @@ class UserDirectoryStore(SQLBaseStore): %s AND vector @@ to_tsquery('english', ?) ORDER BY - (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) + (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END) * ( diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 11f2bae698..a16a2dc67b 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -14,78 +14,262 @@ # limitations under the License. from mock import Mock -from twisted.internet import defer - from synapse.api.constants import UserTypes -from synapse.handlers.user_directory import UserDirectoryHandler +from synapse.rest.client.v1 import admin, login, room from synapse.storage.roommember import ProfileInfo from tests import unittest -from tests.utils import setup_test_homeserver -class UserDirectoryHandlers(object): - def __init__(self, hs): - self.user_directory_handler = UserDirectoryHandler(hs) +class UserDirectoryTestCase(unittest.HomeserverTestCase): + """ + Tests the UserDirectoryHandler. + """ + servlets = [ + login.register_servlets, + admin.register_servlets, + room.register_servlets, + ] -class UserDirectoryTestCase(unittest.TestCase): - """ Tests the UserDirectoryHandler. """ + def make_homeserver(self, reactor, clock): - @defer.inlineCallbacks - def setUp(self): - hs = yield setup_test_homeserver(self.addCleanup) - self.store = hs.get_datastore() - hs.handlers = UserDirectoryHandlers(hs) + config = self.default_config() + config.update_user_directory = True + return self.setup_test_homeserver(config=config) - self.handler = hs.get_handlers().user_directory_handler + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.handler = hs.get_user_directory_handler() - @defer.inlineCallbacks def test_handle_local_profile_change_with_support_user(self): support_user_id = "@support:test" - yield self.store.register( - user_id=support_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT + self.get_success( + self.store.register( + user_id=support_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT, + ) ) - yield self.handler.handle_local_profile_change(support_user_id, None) - profile = yield self.store.get_user_in_directory(support_user_id) + self.get_success( + self.handler.handle_local_profile_change(support_user_id, None) + ) + profile = self.get_success(self.store.get_user_in_directory(support_user_id)) self.assertTrue(profile is None) display_name = 'display_name' - profile_info = ProfileInfo( - avatar_url='avatar_url', - display_name=display_name, - ) + profile_info = ProfileInfo(avatar_url='avatar_url', display_name=display_name) regular_user_id = '@regular:test' - yield self.handler.handle_local_profile_change(regular_user_id, profile_info) - profile = yield self.store.get_user_in_directory(regular_user_id) + self.get_success( + self.handler.handle_local_profile_change(regular_user_id, profile_info) + ) + profile = self.get_success(self.store.get_user_in_directory(regular_user_id)) self.assertTrue(profile['display_name'] == display_name) - @defer.inlineCallbacks def test_handle_user_deactivated_support_user(self): s_user_id = "@support:test" - self.store.register( - user_id=s_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT + self.get_success( + self.store.register( + user_id=s_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT, + ) ) self.store.remove_from_user_dir = Mock() self.store.remove_from_user_in_public_room = Mock() - yield self.handler.handle_user_deactivated(s_user_id) + self.get_success(self.handler.handle_user_deactivated(s_user_id)) self.store.remove_from_user_dir.not_called() self.store.remove_from_user_in_public_room.not_called() - @defer.inlineCallbacks def test_handle_user_deactivated_regular_user(self): r_user_id = "@regular:test" - self.store.register(user_id=r_user_id, token="123", password_hash=None) + self.get_success( + self.store.register(user_id=r_user_id, token="123", password_hash=None) + ) self.store.remove_from_user_dir = Mock() - self.store.remove_from_user_in_public_room = Mock() - yield self.handler.handle_user_deactivated(r_user_id) + self.get_success(self.handler.handle_user_deactivated(r_user_id)) self.store.remove_from_user_dir.called_once_with(r_user_id) - self.store.remove_from_user_in_public_room.assert_called_once_with(r_user_id) + + def test_private_room(self): + """ + A user can be searched for only by people that are either in a public + room, or that share a private chat. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + u3 = self.register_user("user3", "pass") + + # We do not add users to the directory until they join a room. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + # Check we have populated the database correctly. + shares_public = self.get_users_who_share_public_rooms() + shares_private = self.get_users_who_share_private_rooms() + + self.assertEqual(shares_public, []) + self.assertEqual( + self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) + ) + + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + + # We get NO search results when searching for user2 by user3. + s = self.get_success(self.handler.search_users(u3, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + # We get NO search results when searching for user3 by user1. + s = self.get_success(self.handler.search_users(u1, "user3", 10)) + self.assertEqual(len(s["results"]), 0) + + # User 2 then leaves. + self.helper.leave(room, user=u2, tok=u2_token) + + # Check we have removed the values. + shares_public = self.get_users_who_share_public_rooms() + shares_private = self.get_users_who_share_private_rooms() + + self.assertEqual(shares_public, []) + self.assertEqual(self._compress_shared(shares_private), set()) + + # User1 now gets no search results for any of the other users. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + s = self.get_success(self.handler.search_users(u1, "user3", 10)) + self.assertEqual(len(s["results"]), 0) + + def _compress_shared(self, shared): + """ + Compress a list of users who share rooms dicts to a list of tuples. + """ + r = set() + for i in shared: + r.add((i["user_id"], i["other_user_id"], i["room_id"])) + return r + + def get_users_who_share_public_rooms(self): + return self.get_success( + self.store._simple_select_list( + "users_who_share_public_rooms", + None, + ["user_id", "other_user_id", "room_id"], + ) + ) + + def get_users_who_share_private_rooms(self): + return self.get_success( + self.store._simple_select_list( + "users_who_share_private_rooms", + None, + ["user_id", "other_user_id", "room_id"], + ) + ) + + def test_initial(self): + """ + The user directory's initial handler correctly updates the search tables. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + u3 = self.register_user("user3", "pass") + u3_token = self.login(u3, "pass") + + room = self.helper.create_room_as(u1, is_public=True, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + private_room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(private_room, src=u1, targ=u3, tok=u1_token) + self.helper.join(private_room, user=u3, tok=u3_token) + + self.get_success(self.store.update_user_directory_stream_pos(None)) + self.get_success(self.store.delete_all_from_user_dir()) + + shares_public = self.get_users_who_share_public_rooms() + shares_private = self.get_users_who_share_private_rooms() + + self.assertEqual(shares_private, []) + self.assertEqual(shares_public, []) + + # Reset the handled users caches + self.handler.initially_handled_users = set() + + # Do the initial population + d = self.handler._do_initial_spam() + + # This takes a while, so pump it a bunch of times to get through the + # sleep delays + for i in range(10): + self.pump(1) + + self.get_success(d) + + shares_public = self.get_users_who_share_public_rooms() + shares_private = self.get_users_who_share_private_rooms() + + # User 1 and User 2 share public rooms + self.assertEqual( + self._compress_shared(shares_public), set([(u1, u2, room), (u2, u1, room)]) + ) + + # User 1 and User 3 share private rooms + self.assertEqual( + self._compress_shared(shares_private), + set([(u1, u3, private_room), (u3, u1, private_room)]), + ) + + def test_search_all_users(self): + """ + Search all users = True means that a user does not have to share a + private room with the searching user or be in a public room to be search + visible. + """ + self.handler.search_all_users = True + self.hs.config.user_directory_search_all_users = True + + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + u3 = self.register_user("user3", "pass") + + # User 1 and User 2 join a room. User 3 never does. + room = self.helper.create_room_as(u1, is_public=True, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + self.get_success(self.store.update_user_directory_stream_pos(None)) + self.get_success(self.store.delete_all_from_user_dir()) + + # Reset the handled users caches + self.handler.initially_handled_users = set() + + # Do the initial population + d = self.handler._do_initial_spam() + + # This takes a while, so pump it a bunch of times to get through the + # sleep delays + for i in range(10): + self.pump(1) + + self.get_success(d) + + # Despite not sharing a room, search_all_users means we get a search + # result. + s = self.get_success(self.handler.search_users(u1, u3, 10)) + self.assertEqual(len(s["results"]), 1) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 0dde1ab2fe..a2a652a235 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -35,14 +35,12 @@ class UserDirectoryStoreTestCase(unittest.TestCase): # alice and bob are both in !room_id. bobby is not but shares # a homeserver with alice. yield self.store.add_profiles_to_user_dir( - "!room:id", { ALICE: ProfileInfo(None, "alice"), BOB: ProfileInfo(None, "bob"), BOBBY: ProfileInfo(None, "bobby"), }, ) - yield self.store.add_users_to_public_room("!room:id", [ALICE, BOB]) yield self.store.add_users_who_share_room( "!room:id", False, ((ALICE, BOB), (BOB, ALICE)) ) -- cgit 1.5.1 From c633fc02d72e325ab9689f3f27edb86ef93cec0c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 7 Mar 2019 15:53:14 +0000 Subject: Add some debug logging for device list handling --- synapse/handlers/device.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c09a7c6280..03644a93cc 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -402,6 +402,12 @@ class DeviceHandler(DeviceWorkerHandler): user_id, device_ids, list(hosts) ) + for device_id in device_ids: + logger.debug( + "Notifying about update %r/%r, ID: %r", user_id, device_id, + position, + ) + room_ids = yield self.store.get_rooms_for_user(user_id) yield self.notifier.on_new_event( @@ -409,7 +415,7 @@ class DeviceHandler(DeviceWorkerHandler): ) if hosts: - logger.info("Sending device list update notif to: %r", hosts) + logger.info("Sending device list update notif for %r to: %r", user_id, hosts) for host in hosts: self.federation_sender.send_device_messages(host) @@ -479,15 +485,26 @@ class DeviceListEduUpdater(object): if get_domain_from_id(user_id) != origin: # TODO: Raise? - logger.warning("Got device list update edu for %r from %r", user_id, origin) + logger.warning( + "Got device list update edu for %r/%r from %r", + user_id, device_id, origin, + ) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. + logger.warning( + "Got device list update edu for %r/%r, but don't share a room", + user_id, device_id, + ) return + logger.debug( + "Received device list update for %r/%r", user_id, device_id, + ) + self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content) ) @@ -505,10 +522,18 @@ class DeviceListEduUpdater(object): # This can happen since we batch updates return + for device_id, stream_id, prev_ids, content in pending_updates: + logger.debug( + "Handling update %r/%r, ID: %r, prev: %r ", + user_id, device_id, stream_id, prev_ids, + ) + # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = yield self._need_to_do_resync(user_id, pending_updates) + logger.debug("Need to re-sync devices for %r? %r", user_id, resync) + if resync: # Fetch all devices for the user. origin = get_domain_from_id(user_id) @@ -561,6 +586,12 @@ class DeviceListEduUpdater(object): ) devices = [] + for device in devices: + logger.debug( + "Handling resync update %r/%r, ID: %r", + user_id, device["device_id"], stream_id, + ) + yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, ) @@ -593,6 +624,11 @@ class DeviceListEduUpdater(object): user_id ) + logger.debug( + "Current extremity for %r: %r", + user_id, extremity, + ) + stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: -- cgit 1.5.1 From d42b41544a3d8950f2a804703aa4ad311e9feddd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 7 Mar 2019 16:04:24 +0000 Subject: When re-syncing device lists reset the state We keep track of what stream IDs we've seen so that we know what updates we've handled or missed. If we re-sync we don't know if the updates we've seen are included in the re-sync (there may be a race), so we should reset the seen updates. --- synapse/handlers/device.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c09a7c6280..00f12ba40d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -566,6 +566,10 @@ class DeviceListEduUpdater(object): ) device_ids = [device["device_id"] for device in devices] yield self.device_handler.notify_device_update(user_id, device_ids) + + # We clobber the seen updates since we've re-synced from a given + # point. + self._seen_updates[user_id] = set([stream_id]) else: # Simply update the single device, since we know that is the only # change (because of the single prev_id matching the current cache) @@ -578,9 +582,9 @@ class DeviceListEduUpdater(object): user_id, [device_id for device_id, _, _, _ in pending_updates] ) - self._seen_updates.setdefault(user_id, set()).update( - stream_id for _, stream_id, _, _ in pending_updates - ) + self._seen_updates.setdefault(user_id, set()).update( + stream_id for _, stream_id, _, _ in pending_updates + ) @defer.inlineCallbacks def _need_to_do_resync(self, user_id, updates): -- cgit 1.5.1 From 0ff8163eae50626dc7bc07eda7638f9f5fed5b6e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Mar 2019 11:26:33 +0000 Subject: Factor out soft fail checks --- synapse/handlers/federation.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a75abe8e91..9eaf2d3e18 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1847,6 +1847,28 @@ class FederationHandler(BaseHandler): context.rejected = RejectedReason.AUTH_ERROR + if not context.rejected: + yield self._check_for_soft_fail(event, state, backfilled) + + if event.type == EventTypes.GuestAccess and not context.rejected: + yield self.maybe_kick_guest_users(event) + + defer.returnValue(context) + + @defer.inlineCallbacks + def _check_for_soft_fail(self, event, state, backfilled): + """Checks if we should soft fail the event, if so marks the event as + such. + + Args: + event (FrozenEvent) + state (dict|None): The state at the event if we don't have all the + event's prev events + backfilled (bool): Whether the event is from backfill + + Returns: + Deferred + """ # For new (non-backfilled and non-outlier) events we check if the event # passes auth based on the current state. If it doesn't then we # "soft-fail" the event. @@ -1918,11 +1940,6 @@ class FederationHandler(BaseHandler): ) event.internal_metadata.soft_failed = True - if event.type == EventTypes.GuestAccess and not context.rejected: - yield self.maybe_kick_guest_users(event) - - defer.returnValue(context) - @defer.inlineCallbacks def on_query_auth(self, origin, event_id, room_id, remote_auth_chain, rejects, missing): -- cgit 1.5.1 From 4abc988c6a1020a8f9e5d3aec92f4b817f6e352e Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 11 Mar 2019 21:11:36 +1100 Subject: initial --- synapse/handlers/user_directory.py | 41 +++++++++++++++++++++- synapse/storage/schema/delta/53/user_share.sql | 3 -- .../schema/delta/53/users_in_public_rooms.sql | 28 +++++++++++++++ synapse/storage/user_directory.py | 34 ++++++++++++++++++ tests/handlers/test_user_directory.py | 12 +++++++ 5 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 synapse/storage/schema/delta/53/users_in_public_rooms.sql (limited to 'synapse/handlers') diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index c21da8343a..fc45123d0c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -64,6 +64,10 @@ class UserDirectoryHandler(object): # This is a set of user_id's we've inserted already self.initially_handled_users = set() + self.register_background_update_handler( + "users_in_public_rooms_initial", self._populate_users_in_public_rooms + ) + # The current position in the current_state_delta stream self.pos = None @@ -77,6 +81,41 @@ class UserDirectoryHandler(object): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) + @defer.inlineCallbacks + def _populate_users_in_public_rooms(self, progress, batch_size): + """ + Populate the users_in_public_rooms table with the contents of the + users_who_share_public_rooms table. + """ + + def _fetch(txn): + sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" + txn.execute(sql) + return txn.fetchall() + + users = yield self.store.runInteraction( + "populate_users_in_public_rooms_fetch", _fetch + ) + + if users: + + def _fill(txn): + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id"], + key_values=users, + value_names=(), + value_values=None, + ) + + users = yield self.store.runInteraction( + "populate_users_in_public_rooms_fill", _fill + ) + + yield self._end_background_update("users_in_public_rooms_initial") + defer.returnValue(1) + def search_users(self, user_id, search_term, limit): """Searches for users in directory @@ -231,7 +270,7 @@ class UserDirectoryHandler(object): unhandled_users = user_ids - self.initially_handled_users yield self.store.add_profiles_to_user_dir( - {user_id: users_with_profile[user_id] for user_id in unhandled_users}, + {user_id: users_with_profile[user_id] for user_id in unhandled_users} ) self.initially_handled_users |= unhandled_users diff --git a/synapse/storage/schema/delta/53/user_share.sql b/synapse/storage/schema/delta/53/user_share.sql index 14424ded0c..5831b1a6f8 100644 --- a/synapse/storage/schema/delta/53/user_share.sql +++ b/synapse/storage/schema/delta/53/user_share.sql @@ -16,9 +16,6 @@ -- Old disused version of the tables below. DROP TABLE IF EXISTS users_who_share_rooms; --- This is no longer used because it's duplicated by the users_who_share_public_rooms -DROP TABLE IF EXISTS users_in_public_rooms; - -- Tables keeping track of what users share rooms. This is a map of local users -- to local or remote users, per room. Remote users cannot be in the user_id -- column, only the other_user_id column. There are two tables, one for public diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql new file mode 100644 index 0000000000..bd57fd778b --- /dev/null +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -0,0 +1,28 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We don't need the old version of this table. +DROP TABLE IF EXISTS users_in_public_rooms; + +-- Track what users are in public rooms. +CREATE TABLE IF NOT EXISTS users_in_public_rooms ( + user_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id); + +-- Fill the table. +INSERT INTO background_updates (update_name, progress_json) VALUES + ('users_in_public_rooms_initial', '{}'); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 2317d22ed6..8f40277b50 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -241,6 +241,9 @@ class UserDirectoryStore(SQLBaseStore): self._simple_delete_txn( txn, table="user_directory_search", keyvalues={"user_id": user_id} ) + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) self._simple_delete_txn( txn, table="users_who_share_public_rooms", @@ -339,6 +342,21 @@ class UserDirectoryStore(SQLBaseStore): value_names=(), value_values=None, ) + + # If it's a public room, also update them in users_in_public_rooms. + # We don't look before they're in the table before we do it, as it's + # more efficient to simply have Postgres do that (one UPSERT vs one + # SELECT and maybe one INSERT). + if not share_private: + for user_id in set([x[1] for x in user_id_tuples]): + self._simple_upsert_txn( + txn, + "users_in_public_rooms", + keyvalues={"user_id": user_id}, + values={}, + desc="add_user_as_in_public_room", + ) + for user_id, other_user_id in user_id_tuples: txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,) @@ -379,6 +397,21 @@ class UserDirectoryStore(SQLBaseStore): table="users_who_share_public_rooms", keyvalues={"other_user_id": user_id, "room_id": room_id}, ) + + # Are the users still in a public room after we deleted them from this one? + still_in_public = self._simple_select_one_onecol_txn( + txn, + "users_who_share_public_rooms", + keyvalues={"other_user_id": user_id}, + retcol="other_user_id", + allow_none=True, + ) + + if still_in_public is None: + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) + txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) @@ -452,6 +485,7 @@ class UserDirectoryStore(SQLBaseStore): def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") + txn.execute("DELETE FROM users_in_public_rooms") txn.execute("DELETE FROM users_who_share_public_rooms") txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index a16a2dc67b..0e0ac0a48b 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -121,6 +121,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) + self.assertEqual(set(public_users), set([u1, u2])) # We get one search result when searching for user2 by user1. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -140,9 +141,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have removed the values. shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() self.assertEqual(shares_public, []) self.assertEqual(self._compress_shared(shares_private), set()) + self.assertEqual(public_users, [u1]) # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -160,6 +163,15 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): r.add((i["user_id"], i["other_user_id"], i["room_id"])) return r + def get_users_in_public_rooms(self): + return self.get_success( + self.store._simple_select_list( + "users_in_public_rooms", + None, + ["user_id"], + ) + ) + def get_users_who_share_public_rooms(self): return self.get_success( self.store._simple_select_list( -- cgit 1.5.1 From 5ba8ceab4cd7062d9f1b23a19d43f8a9ef7c5d60 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 00:35:31 +1100 Subject: fixes --- synapse/handlers/user_directory.py | 45 ++++-------------------------- synapse/storage/_base.py | 13 +++++++-- synapse/storage/user_directory.py | 52 +++++++++++++++++++++++++++++++---- tests/handlers/test_user_directory.py | 16 ++++++++--- 4 files changed, 74 insertions(+), 52 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index fc45123d0c..20a026e776 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -60,14 +60,16 @@ class UserDirectoryHandler(object): self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users + # If we're a worker, don't sleep when doing the initial room work, as it + # won't monopolise the master's CPU. + if hs.config.worker_app: + self.INITIAL_ROOM_SLEEP_MS = 0 + self.INITIAL_USER_SLEEP_MS = 0 + # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already self.initially_handled_users = set() - self.register_background_update_handler( - "users_in_public_rooms_initial", self._populate_users_in_public_rooms - ) - # The current position in the current_state_delta stream self.pos = None @@ -81,41 +83,6 @@ class UserDirectoryHandler(object): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) - @defer.inlineCallbacks - def _populate_users_in_public_rooms(self, progress, batch_size): - """ - Populate the users_in_public_rooms table with the contents of the - users_who_share_public_rooms table. - """ - - def _fetch(txn): - sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" - txn.execute(sql) - return txn.fetchall() - - users = yield self.store.runInteraction( - "populate_users_in_public_rooms_fetch", _fetch - ) - - if users: - - def _fill(txn): - self._simple_upsert_many_txn( - txn, - table="users_in_public_rooms", - key_names=["user_id"], - key_values=users, - value_names=(), - value_values=None, - ) - - users = yield self.store.runInteraction( - "populate_users_in_public_rooms_fill", _fill - ) - - yield self._end_background_update("users_in_public_rooms_initial") - defer.returnValue(1) - def search_users(self, user_id, search_term, limit): """Searches for users in directory diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a0333d5309..7e3903859b 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -767,18 +767,25 @@ class SQLBaseStore(object): """ allvalues = {} allvalues.update(keyvalues) - allvalues.update(values) allvalues.update(insertion_values) + if not values: + latter = "NOTHING" + else: + allvalues.update(values) + latter = ( + "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values) + ) + sql = ( "INSERT INTO %s (%s) VALUES (%s) " - "ON CONFLICT (%s) DO UPDATE SET %s" + "ON CONFLICT (%s) DO %s" ) % ( table, ", ".join(k for k in allvalues), ", ".join("?" for _ in allvalues), ", ".join(k for k in keyvalues), - ", ".join(k + "=EXCLUDED." + k for k in values), + latter ) txn.execute(sql, list(allvalues.values())) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 8f40277b50..a15366a117 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -22,16 +22,57 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore - logger = logging.getLogger(__name__) -class UserDirectoryStore(SQLBaseStore): +class UserDirectoryStore(BackgroundUpdateStore): + def __init__(self, dbconn, hs): + super(UserDirectoryStore, self).__init__(dbconn, hs) + + self.register_background_update_handler( + "users_in_public_rooms_initial", self._populate_users_in_public_rooms + ) + + + @defer.inlineCallbacks + def _populate_users_in_public_rooms(self, progress, batch_size): + """ + Populate the users_in_public_rooms table with the contents of the + users_who_share_public_rooms table. + """ + + def _fetch(txn): + sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" + txn.execute(sql) + return txn.fetchall() + + users = yield self.runInteraction( + "populate_users_in_public_rooms_fetch", _fetch + ) + + if users: + def _fill(txn): + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id"], + key_values=users, + value_names=(), + value_values=None, + ) + + users = yield self.runInteraction( + "populate_users_in_public_rooms_fill", _fill + ) + + yield self._end_background_update("users_in_public_rooms_initial") + defer.returnValue(1) + @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -353,8 +394,7 @@ class UserDirectoryStore(SQLBaseStore): txn, "users_in_public_rooms", keyvalues={"user_id": user_id}, - values={}, - desc="add_user_as_in_public_room", + values=None, ) for user_id, other_user_id in user_id_tuples: @@ -603,7 +643,7 @@ class UserDirectoryStore(SQLBaseStore): else: join_clause = """ LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_public_rooms + SELECT user_id FROM users_in_public_rooms UNION SELECT other_user_id AS user_id FROM users_who_share_private_rooms WHERE user_id = ? diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 0e0ac0a48b..7a78451a6d 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -116,12 +116,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have populated the database correctly. shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() self.assertEqual(shares_public, []) self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) - self.assertEqual(set(public_users), set([u1, u2])) + self.assertEqual(public_users, []) # We get one search result when searching for user2 by user1. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -145,7 +146,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual(shares_public, []) self.assertEqual(self._compress_shared(shares_private), set()) - self.assertEqual(public_users, [u1]) + self.assertEqual(public_users, []) # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -165,10 +166,10 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def get_users_in_public_rooms(self): return self.get_success( - self.store._simple_select_list( + self.store._simple_select_onecol( "users_in_public_rooms", None, - ["user_id"], + "user_id", ) ) @@ -214,9 +215,12 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + # Nothing updated yet self.assertEqual(shares_private, []) self.assertEqual(shares_public, []) + self.assertEqual(public_users, []) # Reset the handled users caches self.handler.initially_handled_users = set() @@ -233,6 +237,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() # User 1 and User 2 share public rooms self.assertEqual( @@ -245,6 +250,9 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): set([(u1, u3, private_room), (u3, u1, private_room)]), ) + # User 1 and 2 are in public rooms + self.assertEqual(set(public_users), set([u1, u2])) + def test_search_all_users(self): """ Search all users = True means that a user does not have to share a -- cgit 1.5.1 From 10480c434881d9c38acc02c98ab4b85b98097870 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 21:47:14 +1100 Subject: fixup --- synapse/handlers/user_directory.py | 117 +++++++++------ .../schema/delta/53/users_in_public_rooms.sql | 17 ++- synapse/storage/user_directory.py | 165 +++++++-------------- tests/handlers/test_user_directory.py | 36 +++-- 4 files changed, 159 insertions(+), 176 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 20a026e776..f9f7b8abd0 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -247,38 +247,58 @@ class UserDirectoryHandler(object): # We also batch up inserts/updates, but try to avoid too many at once. to_insert = set() count = 0 - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - if not self.is_mine_id(user_id): - count += 1 - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue + if is_public: + for user_id in user_ids: + if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - for other_user_id in user_ids: - if user_id == other_user_id: + if self.store.get_if_app_services_interested_in_user(user_id): + count += 1 continue + to_insert.add(user_id) + if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: + yield self.store.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + + if to_insert: + yield self.store.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + else: + + for user_id in user_ids: if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - count += 1 - user_set = (user_id, other_user_id) - to_insert.add(user_set) + if not self.is_mine_id(user_id): + count += 1 + continue - if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert - ) - to_insert.clear() + if self.store.get_if_app_services_interested_in_user(user_id): + count += 1 + continue - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - to_insert.clear() + for other_user_id in user_ids: + if user_id == other_user_id: + continue + + if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) + count += 1 + + user_set = (user_id, other_user_id) + to_insert.add(user_set) + + if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: + yield self.store.add_users_who_share_private_room( + room_id, not is_public, to_insert + ) + to_insert.clear() + + if to_insert: + yield self.store.add_users_who_share_private_room(room_id, to_insert) + to_insert.clear() @defer.inlineCallbacks def _handle_deltas(self, deltas): @@ -451,34 +471,37 @@ class UserDirectoryHandler(object): # Now we update users who share rooms with users. users_with_profile = yield self.state.get_current_user_in_room(room_id) - to_insert = set() + if is_public: + yield self.store.add_users_in_public_rooms(room_id, (user_id,)) + else: + to_insert = set() - # First, if they're our user then we need to update for every user - if self.is_mine_id(user_id): + # First, if they're our user then we need to update for every user + if self.is_mine_id(user_id): - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + is_appservice = self.store.get_if_app_services_interested_in_user(user_id) - # We don't care about appservice users. - if not is_appservice: - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue + # We don't care about appservice users. + if not is_appservice: + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - to_insert.add((user_id, other_user_id)) + to_insert.add((user_id, other_user_id)) - # Next we need to update for every local user in the room - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue + # Next we need to update for every local user in the room + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - is_appservice = self.store.get_if_app_services_interested_in_user( - other_user_id - ) - if self.is_mine_id(other_user_id) and not is_appservice: - to_insert.add((other_user_id, user_id)) + is_appservice = self.store.get_if_app_services_interested_in_user( + other_user_id + ) + if self.is_mine_id(other_user_id) and not is_appservice: + to_insert.add((other_user_id, user_id)) - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) + if to_insert: + yield self.store.add_users_who_share_private_room(room_id, to_insert) @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): @@ -493,10 +516,10 @@ class UserDirectoryHandler(object): # Remove user from sharing tables yield self.store.remove_user_who_share_room(user_id, room_id) - # Are they still in a room with members? If not, remove them entirely. - users_in_room_with = yield self.store.get_users_who_share_room_from_dir(user_id) + # Are they still in any rooms? If not, remove them entirely. + rooms_user_is_in = yield self.store.get_rooms_user_is_in(user_id) - if len(users_in_room_with) == 0: + if len(rooms_user_is_in) == 0: yield self.store.remove_from_user_dir(user_id) @defer.inlineCallbacks diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql index bd57fd778b..40adc98387 100644 --- a/synapse/storage/schema/delta/53/users_in_public_rooms.sql +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -16,13 +16,20 @@ -- We don't need the old version of this table. DROP TABLE IF EXISTS users_in_public_rooms; +-- Old version of users_in_public_rooms +DROP TABLE IF EXISTS users_who_share_public_rooms; + -- Track what users are in public rooms. CREATE TABLE IF NOT EXISTS users_in_public_rooms ( - user_id TEXT NOT NULL + user_id TEXT NOT NULL, + room_id TEXT NOT NULL ); -CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id); +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); + +-- Track what users are publicly visible +CREATE TABLE IF NOT EXISTS publicly_visible_users ( + user_id TEXT NOT NULL +); --- Fill the table. -INSERT INTO background_updates (update_name, progress_json) VALUES - ('users_in_public_rooms_initial', '{}'); +CREATE UNIQUE INDEX publicly_visible_users_u_idx ON publicly_visible_users(user_id); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 4de552c1bb..af4260bc61 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -21,57 +21,15 @@ from six import iteritems from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules -from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(BackgroundUpdateStore): - def __init__(self, dbconn, hs): - super(UserDirectoryStore, self).__init__(dbconn, hs) - - self.register_background_update_handler( - "users_in_public_rooms_initial", self._populate_users_in_public_rooms - ) - - @defer.inlineCallbacks - def _populate_users_in_public_rooms(self, progress, batch_size): - """ - Populate the users_in_public_rooms table with the contents of the - users_who_share_public_rooms table. - """ - - def _fetch(txn): - sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" - txn.execute(sql) - return txn.fetchall() - - users = yield self.runInteraction( - "populate_users_in_public_rooms_fetch", _fetch - ) - - if users: - def _fill(txn): - self._simple_upsert_many_txn( - txn, - table="users_in_public_rooms", - key_names=["user_id"], - key_values=users, - value_names=(), - value_values=None, - ) - - users = yield self.runInteraction( - "populate_users_in_public_rooms_fill", _fill - ) - - yield self._end_background_update("users_in_public_rooms_initial") - defer.returnValue(1) - +class UserDirectoryStore(object): @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -282,17 +240,10 @@ class UserDirectoryStore(BackgroundUpdateStore): txn, table="user_directory_search", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + txn, table="publicly_visible_users", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"user_id": user_id}, - ) - self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"other_user_id": user_id}, + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) self._simple_delete_txn( txn, @@ -314,9 +265,9 @@ class UserDirectoryStore(BackgroundUpdateStore): in the given room_id """ user_ids_share_pub = yield self._simple_select_onecol( - table="users_who_share_public_rooms", + table="publicly_visible_users", keyvalues={"room_id": room_id}, - retcol="other_user_id", + retcol="user_id", desc="get_users_in_dir_due_to_room", ) @@ -354,26 +305,19 @@ class UserDirectoryStore(BackgroundUpdateStore): rows = yield self._execute("get_all_local_users", None, sql) defer.returnValue([name for name, in rows]) - def add_users_who_share_room(self, room_id, share_private, user_id_tuples): - """Insert entries into the users_who_share_*_rooms table. The first + def add_users_who_share_private_room(self, room_id, user_id_tuples): + """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. Args: room_id (str) - share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _add_users_who_share_room_txn(txn): - - if share_private: - tbl = "users_who_share_private_rooms" - else: - tbl = "users_who_share_public_rooms" - self._simple_upsert_many_txn( txn, - table=tbl, + table="users_who_share_private_rooms", key_names=["user_id", "other_user_id", "room_id"], key_values=[ (user_id, other_user_id, room_id) @@ -383,26 +327,44 @@ class UserDirectoryStore(BackgroundUpdateStore): value_values=None, ) - # If it's a public room, also update them in users_in_public_rooms. + return self.runInteraction( + "add_users_who_share_room", _add_users_who_share_room_txn + ) + + def add_users_in_public_rooms(self, room_id, user_ids): + """Insert entries into the users_who_share_private_rooms table. The first + user should be a local user. + + Args: + room_id (str) + user_ids (list[str]) + """ + + def _add_users_in_public_rooms_txn(txn): + + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id", "room_id"], + key_values=[(user_id, room_id) for user_id in user_ids], + value_names=(), + value_values=None, + ) + + # If it's a public room, also update them in publicly_visible_users. # We don't look before they're in the table before we do it, as it's # more efficient to simply have Postgres do that (one UPSERT vs one # SELECT and maybe one INSERT). - if not share_private: - for user_id in set([x[1] for x in user_id_tuples]): - self._simple_upsert_txn( - txn, - "users_in_public_rooms", - keyvalues={"user_id": user_id}, - values={}, - ) - - for user_id, other_user_id in user_id_tuples: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) + for user_id in user_ids: + self._simple_upsert_txn( + txn, + "publicly_visible_users", + keyvalues={"user_id": user_id}, + values={}, ) return self.runInteraction( - "add_users_who_share_room", _add_users_who_share_room_txn + "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) def remove_user_who_share_room(self, user_id, room_id): @@ -428,40 +390,32 @@ class UserDirectoryStore(BackgroundUpdateStore): ) self._simple_delete_txn( txn, - table="users_who_share_public_rooms", + table="users_in_public_rooms", keyvalues={"user_id": user_id, "room_id": room_id}, ) - self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"other_user_id": user_id, "room_id": room_id}, - ) # Are the users still in a public room after we deleted them from this one? still_in_public = self._simple_select_one_onecol_txn( txn, - "users_who_share_public_rooms", - keyvalues={"other_user_id": user_id}, - retcol="other_user_id", + "users_in_public_rooms", + keyvalues={"user_id": user_id}, + retcol="user_id", allow_none=True, ) if still_in_public is None: self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + txn, table="publicly_visible_users", keyvalues={"user_id": user_id} ) - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) - @cachedInlineCallbacks(max_entries=500000, iterable=True) - def get_users_who_share_room_from_dir(self, user_id): - """Returns the set of users who share a room with `user_id` + @defer.inlineCallbacks + def get_rooms_user_is_in(self, user_id): + """ + Returns the rooms that a user is in. Args: user_id(str): Must be a local user @@ -472,23 +426,19 @@ class UserDirectoryStore(BackgroundUpdateStore): rows = yield self._simple_select_onecol( table="users_who_share_private_rooms", keyvalues={"user_id": user_id}, - retcol="other_user_id", - desc="get_users_who_share_room_with_user", + retcol="room_id", + desc="get_rooms_user_is_in", ) pub_rows = yield self._simple_select_onecol( - table="users_who_share_public_rooms", + table="users_in_public_rooms", keyvalues={"user_id": user_id}, - retcol="other_user_id", - desc="get_users_who_share_room_with_user", + retcol="room_id", + desc="get_rooms_user_is_in", ) users = set(pub_rows) users.update(rows) - - # Remove the user themselves from this list. - users.discard(user_id) - defer.returnValue(list(users)) @defer.inlineCallbacks @@ -525,10 +475,9 @@ class UserDirectoryStore(BackgroundUpdateStore): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_public_rooms") + txn.execute("DELETE FROM publicly_visible_users") txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) - txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -641,7 +590,7 @@ class UserDirectoryStore(BackgroundUpdateStore): where_clause = "1=1" else: join_clause = """ - LEFT JOIN users_in_public_rooms AS p USING (user_id) + LEFT JOIN publicly_visible_users AS p USING (user_id) LEFT JOIN ( SELECT other_user_id AS user_id FROM users_who_share_private_rooms WHERE user_id = ? diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 7a78451a6d..d8248def3f 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -114,11 +114,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.helper.join(room, user=u2, tok=u2_token) # Check we have populated the database correctly. - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) @@ -140,11 +140,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.helper.leave(room, user=u2, tok=u2_token) # Check we have removed the values. - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual(self._compress_shared(shares_private), set()) self.assertEqual(public_users, []) @@ -165,20 +165,24 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): return r def get_users_in_public_rooms(self): - return self.get_success( - self.store._simple_select_onecol( + r = self.get_success( + self.store._simple_select_list( "users_in_public_rooms", None, - "user_id", + ("user_id", "room_id"), ) ) + retval = [] + for i in r: + retval.append((i["user_id"], i["room_id"])) + return retval - def get_users_who_share_public_rooms(self): + def get_publicly_visible_users(self): return self.get_success( - self.store._simple_select_list( - "users_who_share_public_rooms", + self.store._simple_select_onecol( + "publicly_visible_users", None, - ["user_id", "other_user_id", "room_id"], + "user_id", ) ) @@ -213,13 +217,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.get_success(self.store.update_user_directory_stream_pos(None)) self.get_success(self.store.delete_all_from_user_dir()) - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() # Nothing updated yet self.assertEqual(shares_private, []) - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual(public_users, []) # Reset the handled users caches @@ -235,13 +239,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.get_success(d) - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() # User 1 and User 2 share public rooms self.assertEqual( - self._compress_shared(shares_public), set([(u1, u2, room), (u2, u1, room)]) + set(public_users), set([(u1, room), (u2, room)]) ) # User 1 and User 3 share private rooms @@ -251,7 +255,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): ) # User 1 and 2 are in public rooms - self.assertEqual(set(public_users), set([u1, u2])) + self.assertEqual(set(visible_users), set([u1, u2])) def test_search_all_users(self): """ -- cgit 1.5.1 From c0332d095f6116c1e8af2738bcc8f1fbe5b4432c Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 13 Mar 2019 01:30:54 +1100 Subject: fixup --- synapse/handlers/user_directory.py | 2 +- synapse/storage/user_directory.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index f9f7b8abd0..d92f8c529c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -517,7 +517,7 @@ class UserDirectoryHandler(object): yield self.store.remove_user_who_share_room(user_id, room_id) # Are they still in any rooms? If not, remove them entirely. - rooms_user_is_in = yield self.store.get_rooms_user_is_in(user_id) + rooms_user_is_in = yield self.store.get_user_dir_rooms_user_is_in(user_id) if len(rooms_user_is_in) == 0: yield self.store.remove_from_user_dir(user_id) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 8fd4fd50da..1c00b956e5 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -385,7 +385,7 @@ class UserDirectoryStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_rooms_user_is_in(self, user_id): + def get_user_dir_rooms_user_is_in(self, user_id): """ Returns the rooms that a user is in. -- cgit 1.5.1 From d42c81d724477803e6b0db6017281a3394a9cee5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 12 Mar 2019 14:42:53 +0000 Subject: Transfer local user's push rules on room upgrade (#4838) Transfer push rules (notifications) on room upgrade --- changelog.d/4838.bugfix | 1 + synapse/handlers/room_member.py | 4 +++ synapse/storage/push_rule.py | 57 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 changelog.d/4838.bugfix (limited to 'synapse/handlers') diff --git a/changelog.d/4838.bugfix b/changelog.d/4838.bugfix new file mode 100644 index 0000000000..7f4fceabff --- /dev/null +++ b/changelog.d/4838.bugfix @@ -0,0 +1 @@ +Transfer a user's notification settings (push rules) on room upgrade. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 190ea2c7b1..aead9e4608 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -232,6 +232,10 @@ class RoomMemberHandler(object): self.copy_room_tags_and_direct_to_room( predecessor["room_id"], room_id, user_id, ) + # Move over old push rules + self.store.move_push_rules_from_room_to_room_for_user( + predecessor["room_id"], room_id, user_id, + ) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 6a5028961d..4b8438c3e9 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -185,6 +185,63 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, defer.returnValue(results) + @defer.inlineCallbacks + def move_push_rule_from_room_to_room( + self, new_room_id, user_id, rule, + ): + """Move a single push rule from one room to another for a specific user. + + Args: + new_room_id (str): ID of the new room. + user_id (str): ID of user the push rule belongs to. + rule (Dict): A push rule. + """ + # Create new rule id + rule_id_scope = '/'.join(rule["rule_id"].split('/')[:-1]) + new_rule_id = rule_id_scope + "/" + new_room_id + + # Change room id in each condition + for condition in rule.get("conditions", []): + if condition.get("key") == "room_id": + condition["pattern"] = new_room_id + + # Add the rule for the new room + yield self.add_push_rule( + user_id=user_id, + rule_id=new_rule_id, + priority_class=rule["priority_class"], + conditions=rule["conditions"], + actions=rule["actions"], + ) + + # Delete push rule for the old room + yield self.delete_push_rule(user_id, rule["rule_id"]) + + @defer.inlineCallbacks + def move_push_rules_from_room_to_room_for_user( + self, old_room_id, new_room_id, user_id, + ): + """Move all of the push rules from one room to another for a specific + user. + + Args: + old_room_id (str): ID of the old room. + new_room_id (str): ID of the new room. + user_id (str): ID of user to copy push rules for. + """ + # Retrieve push rules for this user + user_push_rules = yield self.get_push_rules_for_user(user_id) + + # Get rules relating to the old room, move them to the new room, then + # delete them from the old room + for rule in user_push_rules: + conditions = rule.get("conditions", []) + if any((c.get("key") == "room_id" and + c.get("pattern") == old_room_id) for c in conditions): + self.move_push_rule_from_room_to_room( + new_room_id, user_id, rule, + ) + @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): state_group = context.state_group -- cgit 1.5.1 From eed7271b3b1e754e1b164da61d225ca4326cfa0a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 12 Mar 2019 16:50:58 +0000 Subject: declare a ReadReceipt class I'm going to use this in queues and things, so it'll be useful to give it more of a structure. --- synapse/handlers/receipts.py | 46 +++++++++++++++++++++----------------------- synapse/types.py | 12 ++++++++++++ 2 files changed, 34 insertions(+), 24 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 1728089667..733e7c3752 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,7 +16,7 @@ import logging from twisted.internet import defer -from synapse.types import get_domain_from_id +from synapse.types import ReadReceipt, get_domain_from_id from ._base import BaseHandler @@ -42,13 +42,13 @@ class ReceiptsHandler(BaseHandler): """Called when we receive an EDU of type m.receipt from a remote HS. """ receipts = [ - { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": user_values["event_ids"], - "data": user_values.get("data", {}), - } + ReadReceipt( + room_id=room_id, + receipt_type=receipt_type, + user_id=user_id, + event_ids=user_values["event_ids"], + data=user_values.get("data", {}), + ) for room_id, room_values in content.items() for receipt_type, users in room_values.items() for user_id, user_values in users.items() @@ -64,14 +64,12 @@ class ReceiptsHandler(BaseHandler): max_batch_id = None for receipt in receipts: - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] - res = yield self.store.insert_receipt( - room_id, receipt_type, user_id, event_ids, data + receipt.room_id, + receipt.receipt_type, + receipt.user_id, + receipt.event_ids, + receipt.data, ) if not res: @@ -107,15 +105,15 @@ class ReceiptsHandler(BaseHandler): """Called when a client tells us a local user has read up to the given event_id in the room. """ - receipt = { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": [event_id], - "data": { + receipt = ReadReceipt( + room_id=room_id, + receipt_type=receipt_type, + user_id=user_id, + event_ids=[event_id], + data={ "ts": int(self.clock.time_msec()), - } - } + }, + ) is_new = yield self._handle_new_receipts([receipt]) if not is_new: @@ -124,7 +122,7 @@ class ReceiptsHandler(BaseHandler): # Work out which remote servers should be poked and poke them. # TODO: optimise this to move some of the work to the workers. - data = receipt["data"] + data = receipt.data # XXX why does this not use state.get_current_hosts_in_room() ? users = yield self.state.get_current_user_in_room(room_id) diff --git a/synapse/types.py b/synapse/types.py index d8cb64addb..3de94b6335 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -16,6 +16,8 @@ import re import string from collections import namedtuple +import attr + from synapse.api.errors import SynapseError @@ -455,3 +457,13 @@ class ThirdPartyInstanceID( @classmethod def create(cls, appservice_id, network_id,): return cls(appservice_id=appservice_id, network_id=network_id) + + +@attr.s(slots=True) +class ReadReceipt(object): + """Information about a read-receipt""" + room_id = attr.ib() + receipt_type = attr.ib() + user_id = attr.ib() + event_ids = attr.ib() + data = attr.ib() -- cgit 1.5.1 From fdcad8eabdf20718d053255555fb27ac190613c4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 13 Mar 2019 15:55:37 +0000 Subject: Move client receipt processing to federation sender worker. This is mostly a prerequisite for #4730, but also fits with the general theme of "move everything off the master that we possibly can". --- synapse/app/federation_sender.py | 30 ++++++++++++++++++++++++++ synapse/federation/send_queue.py | 9 ++++++++ synapse/federation/transaction_queue.py | 35 +++++++++++++++++++++++++++++++ synapse/handlers/receipts.py | 37 ++++----------------------------- 4 files changed, 78 insertions(+), 33 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index a461442fdc..9711a7147c 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -28,6 +28,7 @@ from synapse.config.logger import setup_logging from synapse.federation import send_queue from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore from synapse.replication.slave.storage.devices import SlavedDeviceStore @@ -37,8 +38,10 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.replication.tcp.streams import ReceiptsStream from synapse.server import HomeServer from synapse.storage.engines import create_engine +from synapse.types import ReadReceipt from synapse.util.async_helpers import Linearizer from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, run_in_background @@ -202,6 +205,7 @@ class FederationSenderHandler(object): """ def __init__(self, hs, replication_client): self.store = hs.get_datastore() + self._is_mine_id = hs.is_mine_id self.federation_sender = hs.get_federation_sender() self.replication_client = replication_client @@ -234,6 +238,32 @@ class FederationSenderHandler(object): elif stream_name == "events": self.federation_sender.notify_new_events(token) + # ... and when new receipts happen + elif stream_name == ReceiptsStream.NAME: + run_as_background_process( + "process_receipts_for_federation", self._on_new_receipts, rows, + ) + + @defer.inlineCallbacks + def _on_new_receipts(self, rows): + """ + Args: + rows (iterable[synapse.replication.tcp.streams.ReceiptsStreamRow]): + new receipts to be processed + """ + for receipt in rows: + # we only want to send on receipts for our own users + if not self._is_mine_id(receipt.user_id): + continue + receipt_info = ReadReceipt( + receipt.room_id, + receipt.receipt_type, + receipt.user_id, + [receipt.event_id], + receipt.data, + ) + yield self.federation_sender.send_read_receipt(receipt_info) + @defer.inlineCallbacks def update_token(self, token): try: diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index b7d0b25781..bcb41da338 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -183,6 +183,15 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() + def send_read_receipt(self, receipt): + """As per TransactionQueue + + Args: + receipt (synapse.types.ReadReceipt): + """ + # nothing to do here: the replication listener will handle it. + pass + def send_presence(self, states): """As per TransactionQueue diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index e5e42c647d..288cb5045c 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -290,6 +290,41 @@ class TransactionQueue(object): self._attempt_new_transaction(destination) + @defer.inlineCallbacks + def send_read_receipt(self, receipt): + """Send a RR to any other servers in the room + + Args: + receipt (synapse.types.ReadReceipt): receipt to be sent + """ + # Work out which remote servers should be poked and poke them. + domains = yield self.state.get_current_hosts_in_room(receipt.room_id) + domains = [d for d in domains if d != self.server_name] + if not domains: + return + + logger.debug("Sending receipt to: %r", domains) + + content = { + receipt.room_id: { + receipt.receipt_type: { + receipt.user_id: { + "event_ids": receipt.event_ids, + "data": receipt.data, + }, + }, + }, + } + key = (receipt.room_id, receipt.receipt_type, receipt.user_id) + + for domain in domains: + self.build_and_send_edu( + destination=domain, + edu_type="m.receipt", + content=content, + key=key, + ) + @logcontext.preserve_fn # the caller should not yield on this @defer.inlineCallbacks def send_presence(self, states): diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 733e7c3752..dd783ae134 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,9 +16,8 @@ import logging from twisted.internet import defer -from synapse.types import ReadReceipt, get_domain_from_id - -from ._base import BaseHandler +from synapse.handlers._base import BaseHandler +from synapse.types import ReadReceipt logger = logging.getLogger(__name__) @@ -87,7 +86,7 @@ class ReceiptsHandler(BaseHandler): # no new receipts defer.returnValue(False) - affected_room_ids = list(set([r["room_id"] for r in receipts])) + affected_room_ids = list(set([r.room_id for r in receipts])) self.notifier.on_new_event( "receipt_key", max_batch_id, rooms=affected_room_ids @@ -119,35 +118,7 @@ class ReceiptsHandler(BaseHandler): if not is_new: return - # Work out which remote servers should be poked and poke them. - - # TODO: optimise this to move some of the work to the workers. - data = receipt.data - - # XXX why does this not use state.get_current_hosts_in_room() ? - users = yield self.state.get_current_user_in_room(room_id) - remotedomains = set(get_domain_from_id(u) for u in users) - remotedomains = remotedomains.copy() - remotedomains.discard(self.server_name) - - logger.debug("Sending receipt to: %r", remotedomains) - - for domain in remotedomains: - self.federation.build_and_send_edu( - destination=domain, - edu_type="m.receipt", - content={ - room_id: { - receipt_type: { - user_id: { - "event_ids": [event_id], - "data": data, - } - } - }, - }, - key=(room_id, receipt_type, user_id), - ) + self.federation.send_read_receipt(receipt) @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): -- cgit 1.5.1 From 899e523d6d92dfbc17dce81eb36f63053e447a97 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 15 Mar 2019 17:46:16 +0000 Subject: Add ratelimiting on login (#4821) Add two ratelimiters on login (per-IP address and per-userID). --- changelog.d/4821.feature | 1 + docs/sample_config.yaml | 39 ++++++--- synapse/api/ratelimiting.py | 12 +++ synapse/config/ratelimiting.py | 58 +++++++++----- synapse/handlers/auth.py | 36 +++++++++ synapse/handlers/register.py | 4 +- synapse/rest/client/v1/login.py | 10 +++ synapse/rest/client/v2_alpha/register.py | 4 +- tests/rest/client/v1/test_login.py | 118 ++++++++++++++++++++++++++++ tests/rest/client/v2_alpha/test_register.py | 6 +- tests/utils.py | 8 +- 11 files changed, 259 insertions(+), 37 deletions(-) create mode 100644 changelog.d/4821.feature create mode 100644 tests/rest/client/v1/test_login.py (limited to 'synapse/handlers') diff --git a/changelog.d/4821.feature b/changelog.d/4821.feature new file mode 100644 index 0000000000..61d4eb8d60 --- /dev/null +++ b/changelog.d/4821.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /login endpoint. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 5f2534e465..b3df272c54 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -379,6 +379,34 @@ rc_messages_per_second: 0.2 # rc_message_burst_count: 10.0 +# Ratelimiting settings for registration and login. +# +# Each ratelimiting configuration is made of two parameters: +# - per_second: number of requests a client can send per second. +# - burst_count: number of requests a client can send before being throttled. +# +# Synapse currently uses the following configurations: +# - one for registration that ratelimits registration requests based on the +# client's IP address. +# - one for login that ratelimits login requests based on the client's IP +# address. +# - one for login that ratelimits login requests based on the account the +# client is attempting to log into. +# +# The defaults are as shown below. +# +#rc_registration: +# per_second: 0.17 +# burst_count: 3 +# +#rc_login: +# address: +# per_second: 0.17 +# burst_count: 3 +# account: +# per_second: 0.17 +# burst_count: 3 + # The federation window size in milliseconds # federation_rc_window_size: 1000 @@ -403,17 +431,6 @@ federation_rc_reject_limit: 50 # federation_rc_concurrent: 3 -# Number of registration requests a client can send per second. -# Defaults to 1/minute (0.17). -# -#rc_registration_requests_per_second: 0.17 - -# Number of registration requests a client can send before being -# throttled. -# Defaults to 3. -# -#rc_registration_request_burst_count: 3.0 - # Directory where uploaded images and attachments are stored. diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index ad68079eeb..296c4a1c17 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -14,6 +14,8 @@ import collections +from synapse.api.errors import LimitExceededError + class Ratelimiter(object): """ @@ -82,3 +84,13 @@ class Ratelimiter(object): break else: del self.message_counts[key] + + def ratelimit(self, key, time_now_s, rate_hz, burst_count, update=True): + allowed, time_allowed = self.can_do_action( + key, time_now_s, rate_hz, burst_count, update + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now_s)), + ) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 093042fdb9..649f018356 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -15,25 +15,30 @@ from ._base import Config +class RateLimitConfig(object): + def __init__(self, config): + self.per_second = config.get("per_second", 0.17) + self.burst_count = config.get("burst_count", 3.0) + + class RatelimitConfig(Config): def read_config(self, config): self.rc_messages_per_second = config["rc_messages_per_second"] self.rc_message_burst_count = config["rc_message_burst_count"] + self.rc_registration = RateLimitConfig(config.get("rc_registration", {})) + + rc_login_config = config.get("rc_login", {}) + self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {})) + self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {})) + self.federation_rc_window_size = config["federation_rc_window_size"] self.federation_rc_sleep_limit = config["federation_rc_sleep_limit"] self.federation_rc_sleep_delay = config["federation_rc_sleep_delay"] self.federation_rc_reject_limit = config["federation_rc_reject_limit"] self.federation_rc_concurrent = config["federation_rc_concurrent"] - self.rc_registration_requests_per_second = config.get( - "rc_registration_requests_per_second", 0.17, - ) - self.rc_registration_request_burst_count = config.get( - "rc_registration_request_burst_count", 3, - ) - def default_config(self, **kwargs): return """\ ## Ratelimiting ## @@ -46,6 +51,34 @@ class RatelimitConfig(Config): # rc_message_burst_count: 10.0 + # Ratelimiting settings for registration and login. + # + # Each ratelimiting configuration is made of two parameters: + # - per_second: number of requests a client can send per second. + # - burst_count: number of requests a client can send before being throttled. + # + # Synapse currently uses the following configurations: + # - one for registration that ratelimits registration requests based on the + # client's IP address. + # - one for login that ratelimits login requests based on the client's IP + # address. + # - one for login that ratelimits login requests based on the account the + # client is attempting to log into. + # + # The defaults are as shown below. + # + #rc_registration: + # per_second: 0.17 + # burst_count: 3 + # + #rc_login: + # address: + # per_second: 0.17 + # burst_count: 3 + # account: + # per_second: 0.17 + # burst_count: 3 + # The federation window size in milliseconds # federation_rc_window_size: 1000 @@ -69,15 +102,4 @@ class RatelimitConfig(Config): # single server # federation_rc_concurrent: 3 - - # Number of registration requests a client can send per second. - # Defaults to 1/minute (0.17). - # - #rc_registration_requests_per_second: 0.17 - - # Number of registration requests a client can send before being - # throttled. - # Defaults to 3. - # - #rc_registration_request_burst_count: 3.0 """ diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 2abd9af94f..74f3790f25 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( StoreError, SynapseError, ) +from synapse.api.ratelimiting import Ratelimiter from synapse.module_api import ModuleApi from synapse.types import UserID from synapse.util import logcontext @@ -99,6 +100,10 @@ class AuthHandler(BaseHandler): login_types.append(t) self._supported_login_types = login_types + self._account_ratelimiter = Ratelimiter() + + self._clock = self.hs.get_clock() + @defer.inlineCallbacks def validate_user_via_ui_auth(self, requester, request_body, clientip): """ @@ -568,7 +573,12 @@ class AuthHandler(BaseHandler): Returns: defer.Deferred: (unicode) canonical_user_id, or None if zero or multiple matches + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ + self.ratelimit_login_per_account(user_id) res = yield self._find_user_id_and_pwd_hash(user_id) if res is not None: defer.returnValue(res[0]) @@ -634,6 +644,8 @@ class AuthHandler(BaseHandler): StoreError if there was a problem accessing the database SynapseError if there was a problem with the request LoginError if there was an authentication problem. + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ if username.startswith('@'): @@ -643,6 +655,8 @@ class AuthHandler(BaseHandler): username, self.hs.hostname ).to_string() + self.ratelimit_login_per_account(qualified_user_id) + login_type = login_submission.get("type") known_login_type = False @@ -735,6 +749,10 @@ class AuthHandler(BaseHandler): password (unicode): the provided password Returns: (unicode) the canonical_user_id, or None if unknown user / bad password + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ lookupres = yield self._find_user_id_and_pwd_hash(user_id) if not lookupres: @@ -763,6 +781,7 @@ class AuthHandler(BaseHandler): auth_api.validate_macaroon(macaroon, "login", True, user_id) except Exception: raise AuthError(403, "Invalid token", errcode=Codes.FORBIDDEN) + self.ratelimit_login_per_account(user_id) yield self.auth.check_auth_blocking(user_id) defer.returnValue(user_id) @@ -934,6 +953,23 @@ class AuthHandler(BaseHandler): else: return defer.succeed(False) + def ratelimit_login_per_account(self, user_id): + """Checks whether the process must be stopped because of ratelimiting. + + Args: + user_id (unicode): complete @user:id + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. + """ + self._account_ratelimiter.ratelimit( + user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_account.per_second, + burst_count=self.hs.config.rc_login_account.burst_count, + update=True, + ) + @attr.s class MacaroonGenerator(object): diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 03130edc54..0ec16b1d2e 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -629,8 +629,8 @@ class RegistrationHandler(BaseHandler): allowed, time_allowed = self.ratelimiter.can_do_action( address, time_now_s=time_now, - rate_hz=self.hs.config.rc_registration_requests_per_second, - burst_count=self.hs.config.rc_registration_request_burst_count, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, ) if not allowed: diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 6121c5b6df..8d56effbb8 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -22,6 +22,7 @@ from twisted.internet import defer from twisted.web.client import PartialDownloadError from synapse.api.errors import Codes, LoginError, SynapseError +from synapse.api.ratelimiting import Ratelimiter from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, @@ -97,6 +98,7 @@ class LoginRestServlet(ClientV1RestServlet): self.registration_handler = hs.get_registration_handler() self.handlers = hs.get_handlers() self._well_known_builder = WellKnownBuilder(hs) + self._address_ratelimiter = Ratelimiter() def on_GET(self, request): flows = [] @@ -129,6 +131,13 @@ class LoginRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request): + self._address_ratelimiter.ratelimit( + request.getClientIP(), time_now_s=self.hs.clock.time(), + rate_hz=self.hs.config.rc_login_address.per_second, + burst_count=self.hs.config.rc_login_address.burst_count, + update=True, + ) + login_submission = parse_json_object_from_request(request) try: if self.jwt_enabled and (login_submission["type"] == @@ -285,6 +294,7 @@ class LoginRestServlet(ClientV1RestServlet): raise LoginError(401, "Invalid JWT", errcode=Codes.UNAUTHORIZED) user_id = UserID(user, self.hs.hostname).to_string() + auth_handler = self.auth_handler registered_user_id = yield auth_handler.check_user_exists(user_id) if registered_user_id: diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 6f34029431..6d235262c8 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -210,8 +210,8 @@ class RegisterRestServlet(RestServlet): allowed, time_allowed = self.ratelimiter.can_do_action( client_addr, time_now_s=time_now, - rate_hz=self.hs.config.rc_registration_requests_per_second, - burst_count=self.hs.config.rc_registration_request_burst_count, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, update=False, ) diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py new file mode 100644 index 0000000000..4035f76cca --- /dev/null +++ b/tests/rest/client/v1/test_login.py @@ -0,0 +1,118 @@ +import json + +from synapse.rest.client.v1 import admin, login + +from tests import unittest + +LOGIN_URL = b"/_matrix/client/r0/login" + + +class LoginRestServletTestCase(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + login.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + + self.hs = self.setup_test_homeserver() + self.hs.config.enable_registration = True + self.hs.config.registrations_require_3pid = [] + self.hs.config.auto_join_rooms = [] + self.hs.config.enable_registration_captcha = False + + return self.hs + + def test_POST_ratelimiting_per_address(self): + self.hs.config.rc_login_address.burst_count = 5 + self.hs.config.rc_login_address.per_second = 0.17 + + # Create different users so we're sure not to be bothered by the per-user + # ratelimiter. + for i in range(0, 6): + self.register_user("kermit" + str(i), "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit" + str(i), + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit" + str(i), + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting_per_account(self): + self.hs.config.rc_login_account.burst_count = 5 + self.hs.config.rc_login_account.per_second = 0.17 + + self.register_user("kermit", "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 3600434858..8fb525d3bf 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -132,7 +132,8 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.json_body["error"], "Guest access is disabled") def test_POST_ratelimiting_guest(self): - self.hs.config.rc_registration_request_burst_count = 5 + self.hs.config.rc_registration.burst_count = 5 + self.hs.config.rc_registration.per_second = 0.17 for i in range(0, 6): url = self.url + b"?kind=guest" @@ -153,7 +154,8 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"200", channel.result) def test_POST_ratelimiting(self): - self.hs.config.rc_registration_request_burst_count = 5 + self.hs.config.rc_registration.burst_count = 5 + self.hs.config.rc_registration.per_second = 0.17 for i in range(0, 6): params = { diff --git a/tests/utils.py b/tests/utils.py index 03b5a05b22..a412736492 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -151,8 +151,12 @@ def default_config(name): config.admin_contact = None config.rc_messages_per_second = 10000 config.rc_message_burst_count = 10000 - config.rc_registration_request_burst_count = 3.0 - config.rc_registration_requests_per_second = 0.17 + config.rc_registration.per_second = 10000 + config.rc_registration.burst_count = 10000 + config.rc_login_address.per_second = 10000 + config.rc_login_address.burst_count = 10000 + config.rc_login_account.per_second = 10000 + config.rc_login_account.burst_count = 10000 config.saml2_enabled = False config.public_baseurl = None config.default_identity_server = None -- cgit 1.5.1 From 651ad8bc96d360500e7f5953d05ef418b51acc86 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 18 Mar 2019 12:57:20 +0000 Subject: Add ratelimiting on failed login attempts (#4865) --- changelog.d/4865.feature | 1 + docs/sample_config.yaml | 6 +++++ synapse/config/ratelimiting.py | 9 ++++++++ synapse/handlers/auth.py | 28 +++++++++++++++++++----- tests/rest/client/v1/test_login.py | 45 ++++++++++++++++++++++++++++++++++++++ tests/utils.py | 2 ++ 6 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 changelog.d/4865.feature (limited to 'synapse/handlers') diff --git a/changelog.d/4865.feature b/changelog.d/4865.feature new file mode 100644 index 0000000000..61d4eb8d60 --- /dev/null +++ b/changelog.d/4865.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /login endpoint. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index b3df272c54..84e2cc97f9 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -392,6 +392,9 @@ rc_message_burst_count: 10.0 # address. # - one for login that ratelimits login requests based on the account the # client is attempting to log into. +# - one for login that ratelimits login requests based on the account the +# client is attempting to log into, based on the amount of failed login +# attempts for this account. # # The defaults are as shown below. # @@ -406,6 +409,9 @@ rc_message_burst_count: 10.0 # account: # per_second: 0.17 # burst_count: 3 +# failed_attempts: +# per_second: 0.17 +# burst_count: 3 # The federation window size in milliseconds # diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 649f018356..7e6cc5d0ea 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -32,6 +32,9 @@ class RatelimitConfig(Config): rc_login_config = config.get("rc_login", {}) self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {})) self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {})) + self.rc_login_failed_attempts = RateLimitConfig( + rc_login_config.get("failed_attempts", {}), + ) self.federation_rc_window_size = config["federation_rc_window_size"] self.federation_rc_sleep_limit = config["federation_rc_sleep_limit"] @@ -64,6 +67,9 @@ class RatelimitConfig(Config): # address. # - one for login that ratelimits login requests based on the account the # client is attempting to log into. + # - one for login that ratelimits login requests based on the account the + # client is attempting to log into, based on the amount of failed login + # attempts for this account. # # The defaults are as shown below. # @@ -78,6 +84,9 @@ class RatelimitConfig(Config): # account: # per_second: 0.17 # burst_count: 3 + # failed_attempts: + # per_second: 0.17 + # burst_count: 3 # The federation window size in milliseconds # diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 74f3790f25..caad9ae2dd 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -101,6 +101,7 @@ class AuthHandler(BaseHandler): self._supported_login_types = login_types self._account_ratelimiter = Ratelimiter() + self._failed_attempts_ratelimiter = Ratelimiter() self._clock = self.hs.get_clock() @@ -729,9 +730,16 @@ class AuthHandler(BaseHandler): if not known_login_type: raise SynapseError(400, "Unknown login type %s" % login_type) - # unknown username or invalid password. We raise a 403 here, but note - # that if we're doing user-interactive login, it turns all LoginErrors - # into a 401 anyway. + # unknown username or invalid password. + self._failed_attempts_ratelimiter.ratelimit( + qualified_user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=True, + ) + + # We raise a 403 here, but note that if we're doing user-interactive + # login, it turns all LoginErrors into a 401 anyway. raise LoginError( 403, "Invalid password", errcode=Codes.FORBIDDEN @@ -956,13 +964,23 @@ class AuthHandler(BaseHandler): def ratelimit_login_per_account(self, user_id): """Checks whether the process must be stopped because of ratelimiting. + Checks against two ratelimiters: the generic one for login attempts per + account and the one specific to failed attempts. + Args: user_id (unicode): complete @user:id Raises: - LimitExceededError if the ratelimiter's login requests count for this - user is too high too proceed. + LimitExceededError if one of the ratelimiters' login requests count + for this user is too high too proceed. """ + self._failed_attempts_ratelimiter.ratelimit( + user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=False, + ) + self._account_ratelimiter.ratelimit( user_id.lower(), time_now_s=self._clock.time(), rate_hz=self.hs.config.rc_login_account.per_second, diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py index 4035f76cca..86312f1096 100644 --- a/tests/rest/client/v1/test_login.py +++ b/tests/rest/client/v1/test_login.py @@ -116,3 +116,48 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): self.render(request) self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting_per_account_failed_attempts(self): + self.hs.config.rc_login_failed_attempts.burst_count = 5 + self.hs.config.rc_login_failed_attempts.per_second = 0.17 + + self.register_user("kermit", "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "notamonkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"403", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "notamonkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"403", channel.result) diff --git a/tests/utils.py b/tests/utils.py index a412736492..b58b674aa4 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -157,6 +157,8 @@ def default_config(name): config.rc_login_address.burst_count = 10000 config.rc_login_account.per_second = 10000 config.rc_login_account.burst_count = 10000 + config.rc_login_failed_attempts.per_second = 10000 + config.rc_login_failed_attempts.burst_count = 10000 config.saml2_enabled = False config.public_baseurl = None config.default_identity_server = None -- cgit 1.5.1 From 282c97327f150a37d53f90ab6207bc1f98e70da3 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 19 Mar 2019 04:50:24 +1100 Subject: Migrate the user directory initial population to a background task (#4864) --- changelog.d/4864.feature | 1 + synapse/handlers/user_directory.py | 173 +--------- synapse/storage/background_updates.py | 8 +- .../storage/schema/delta/53/user_dir_populate.sql | 30 ++ synapse/storage/user_directory.py | 370 +++++++++++++++------ tests/handlers/test_user_directory.py | 109 ++++-- tests/storage/test_user_directory.py | 11 +- tests/unittest.py | 4 +- 8 files changed, 405 insertions(+), 301 deletions(-) create mode 100644 changelog.d/4864.feature create mode 100644 synapse/storage/schema/delta/53/user_dir_populate.sql (limited to 'synapse/handlers') diff --git a/changelog.d/4864.feature b/changelog.d/4864.feature new file mode 100644 index 0000000000..57927f2620 --- /dev/null +++ b/changelog.d/4864.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. \ No newline at end of file diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index d92f8c529c..7dc0e236e7 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -38,18 +38,8 @@ class UserDirectoryHandler(object): world_readable or publically joinable room. We keep a database table up to date by streaming changes of the current state and recalculating whether users should be in the directory or not when necessary. - - For each user in the directory we also store a room_id which is public and that the - user is joined to. This allows us to ignore history_visibility and join_rules changes - for that user in all other public rooms, as we know they'll still be in at least - one public room. """ - INITIAL_ROOM_SLEEP_MS = 50 - INITIAL_ROOM_SLEEP_COUNT = 100 - INITIAL_ROOM_BATCH_SIZE = 100 - INITIAL_USER_SLEEP_MS = 10 - def __init__(self, hs): self.store = hs.get_datastore() self.state = hs.get_state_handler() @@ -59,17 +49,6 @@ class UserDirectoryHandler(object): self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users - - # If we're a worker, don't sleep when doing the initial room work, as it - # won't monopolise the master's CPU. - if hs.config.worker_app: - self.INITIAL_ROOM_SLEEP_MS = 0 - self.INITIAL_USER_SLEEP_MS = 0 - - # When start up for the first time we need to populate the user_directory. - # This is a set of user_id's we've inserted already - self.initially_handled_users = set() - # The current position in the current_state_delta stream self.pos = None @@ -132,7 +111,7 @@ class UserDirectoryHandler(object): # Support users are for diagnostics and should not appear in the user directory. if not is_support: yield self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url, None + user_id, profile.display_name, profile.avatar_url ) @defer.inlineCallbacks @@ -149,10 +128,9 @@ class UserDirectoryHandler(object): if self.pos is None: self.pos = yield self.store.get_user_directory_stream_pos() - # If still None then we need to do the initial fill of directory + # If still None then the initial background update hasn't happened yet if self.pos is None: - yield self._do_initial_spam() - self.pos = yield self.store.get_user_directory_stream_pos() + defer.returnValue(None) # Loop round handling deltas until we're up to date while True: @@ -173,133 +151,6 @@ class UserDirectoryHandler(object): yield self.store.update_user_directory_stream_pos(self.pos) - @defer.inlineCallbacks - def _do_initial_spam(self): - """Populates the user_directory from the current state of the DB, used - when synapse first starts with user_directory support - """ - new_pos = yield self.store.get_max_stream_id_in_current_state_deltas() - - # Delete any existing entries just in case there are any - yield self.store.delete_all_from_user_dir() - - # We process by going through each existing room at a time. - room_ids = yield self.store.get_all_rooms() - - logger.info("Doing initial update of user directory. %d rooms", len(room_ids)) - num_processed_rooms = 0 - - for room_id in room_ids: - logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) - yield self._handle_initial_room(room_id) - num_processed_rooms += 1 - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - logger.info("Processed all rooms.") - - if self.search_all_users: - num_processed_users = 0 - user_ids = yield self.store.get_all_local_users() - logger.info( - "Doing initial update of user directory. %d users", len(user_ids) - ) - for user_id in user_ids: - # We add profiles for all users even if they don't match the - # include pattern, just in case we want to change it in future - logger.info( - "Handling user %d/%d", num_processed_users + 1, len(user_ids) - ) - yield self._handle_local_user(user_id) - num_processed_users += 1 - yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0) - - logger.info("Processed all users") - - self.initially_handled_users = None - - yield self.store.update_user_directory_stream_pos(new_pos) - - @defer.inlineCallbacks - def _handle_initial_room(self, room_id): - """ - Called when we initially fill out user_directory one room at a time - """ - is_in_room = yield self.store.is_host_joined(room_id, self.server_name) - if not is_in_room: - return - - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - room_id - ) - - users_with_profile = yield self.state.get_current_user_in_room(room_id) - user_ids = set(users_with_profile) - unhandled_users = user_ids - self.initially_handled_users - - yield self.store.add_profiles_to_user_dir( - {user_id: users_with_profile[user_id] for user_id in unhandled_users} - ) - - self.initially_handled_users |= unhandled_users - - # We now go and figure out the new users who share rooms with user entries - # We sleep aggressively here as otherwise it can starve resources. - # We also batch up inserts/updates, but try to avoid too many at once. - to_insert = set() - count = 0 - - if is_public: - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue - - to_insert.add(user_id) - if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_in_public_rooms(room_id, to_insert) - to_insert.clear() - - if to_insert: - yield self.store.add_users_in_public_rooms(room_id, to_insert) - to_insert.clear() - else: - - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - if not self.is_mine_id(user_id): - count += 1 - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue - - for other_user_id in user_ids: - if user_id == other_user_id: - continue - - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - count += 1 - - user_set = (user_id, other_user_id) - to_insert.add(user_set) - - if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_who_share_private_room( - room_id, not is_public, to_insert - ) - to_insert.clear() - - if to_insert: - yield self.store.add_users_who_share_private_room(room_id, to_insert) - to_insert.clear() - @defer.inlineCallbacks def _handle_deltas(self, deltas): """Called with the state deltas to process @@ -449,7 +300,9 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_directory(user_id) if not row: - yield self.store.add_profiles_to_user_dir({user_id: profile}) + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): @@ -461,9 +314,9 @@ class UserDirectoryHandler(object): """ logger.debug("Adding new user to dir, %r", user_id) - row = yield self.store.get_user_in_directory(user_id) - if not row: - yield self.store.add_profiles_to_user_dir({user_id: profile}) + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) is_public = yield self.store.is_room_world_readable_or_publicly_joinable( room_id @@ -479,7 +332,9 @@ class UserDirectoryHandler(object): # First, if they're our user then we need to update for every user if self.is_mine_id(user_id): - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + is_appservice = self.store.get_if_app_services_interested_in_user( + user_id + ) # We don't care about appservice users. if not is_appservice: @@ -546,9 +401,7 @@ class UserDirectoryHandler(object): new_avatar = event.content.get("avatar_url") if prev_name != new_name or prev_avatar != new_avatar: - yield self.store.update_profile_in_user_dir( - user_id, new_name, new_avatar, room_id - ) + yield self.store.update_profile_in_user_dir(user_id, new_name, new_avatar) @defer.inlineCallbacks def _get_key_change(self, prev_event_id, event_id, key_name, public_value): diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 60cdc884e6..a2f8c23a65 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -52,7 +52,9 @@ class BackgroundUpdatePerformance(object): Returns: A duration in ms as a float """ - if self.total_item_count == 0: + if self.avg_duration_ms == 0: + return 0 + elif self.total_item_count == 0: return None else: # Use the exponential moving average so that we can adapt to @@ -64,7 +66,9 @@ class BackgroundUpdatePerformance(object): Returns: A duration in ms as a float """ - if self.total_item_count == 0: + if self.total_duration_ms == 0: + return 0 + elif self.total_item_count == 0: return None else: return float(self.total_item_count) / float(self.total_duration_ms) diff --git a/synapse/storage/schema/delta/53/user_dir_populate.sql b/synapse/storage/schema/delta/53/user_dir_populate.sql new file mode 100644 index 0000000000..955b8fdbd6 --- /dev/null +++ b/synapse/storage/schema/delta/53/user_dir_populate.sql @@ -0,0 +1,30 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_user_directory_createtables', '{}'); + +-- Run through each room and update the user directory according to who is in it +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); + +-- Insert all users, if search_all_users is on +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_users', '{}', 'populate_user_directory_rooms'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 1c00b956e5..4ee653210f 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -16,12 +16,10 @@ import logging import re -from six import iteritems - from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules -from synapse.storage._base import SQLBaseStore +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id @@ -30,7 +28,276 @@ from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(SQLBaseStore): +TEMP_TABLE = "_temp_populate_user_directory" + + +class UserDirectoryStore(BackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(UserDirectoryStore, self).__init__(db_conn, hs) + + self.server_name = hs.hostname + + self.register_background_update_handler( + "populate_user_directory_createtables", + self._populate_user_directory_createtables, + ) + self.register_background_update_handler( + "populate_user_directory_process_rooms", + self._populate_user_directory_process_rooms, + ) + self.register_background_update_handler( + "populate_user_directory_process_users", + self._populate_user_directory_process_users, + ) + self.register_background_update_handler( + "populate_user_directory_cleanup", self._populate_user_directory_cleanup + ) + + @defer.inlineCallbacks + def _populate_user_directory_createtables(self, progress, batch_size): + + # Get all the rooms that we want to process. + def _make_staging_area(txn): + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)" + ) + txn.execute(sql) + + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_position(position TEXT NOT NULL)" + ) + txn.execute(sql) + + # Get rooms we want to process from the database + sql = """ + SELECT room_id, count(*) FROM current_state_events + GROUP BY room_id + """ + txn.execute(sql) + rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()] + self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) + del rooms + + # If search all users is on, get all the users we want to add. + if self.hs.config.user_directory_search_all_users: + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_users(user_id TEXT NOT NULL)" + ) + txn.execute(sql) + + txn.execute("SELECT name FROM users") + users = [{"user_id": x[0]} for x in txn.fetchall()] + + self._simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) + + new_pos = yield self.get_max_stream_id_in_current_state_deltas() + yield self.runInteraction( + "populate_user_directory_temp_build", _make_staging_area + ) + yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos}) + + yield self._end_background_update("populate_user_directory_createtables") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_user_directory_cleanup(self, progress, batch_size): + """ + Update the user directory stream position, then clean up the old tables. + """ + position = yield self._simple_select_one_onecol( + TEMP_TABLE + "_position", None, "position" + ) + yield self.update_user_directory_stream_pos(position) + + def _delete_staging_area(txn): + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position") + + yield self.runInteraction( + "populate_user_directory_cleanup", _delete_staging_area + ) + + yield self._end_background_update("populate_user_directory_cleanup") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_user_directory_process_rooms(self, progress, batch_size): + + state = self.hs.get_state_handler() + + # If we don't have progress filed, delete everything. + if not progress: + yield self.delete_all_from_user_dir() + + def _get_next_batch(txn): + sql = """ + SELECT room_id FROM %s + ORDER BY events DESC + LIMIT %s + """ % ( + TEMP_TABLE + "_rooms", + str(batch_size), + ) + txn.execute(sql) + rooms_to_work_on = txn.fetchall() + + if not rooms_to_work_on: + return None + + rooms_to_work_on = [x[0] for x in rooms_to_work_on] + + # Get how many are left to process, so we can give status on how + # far we are in processing + txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms") + progress["remaining"] = txn.fetchone()[0] + + return rooms_to_work_on + + rooms_to_work_on = yield self.runInteraction( + "populate_user_directory_temp_read", _get_next_batch + ) + + # No more rooms -- complete the transaction. + if not rooms_to_work_on: + yield self._end_background_update("populate_user_directory_process_rooms") + defer.returnValue(1) + + logger.info( + "Processing the next %d rooms of %d remaining" + % (len(rooms_to_work_on), progress["remaining"]) + ) + + for room_id in rooms_to_work_on: + is_in_room = yield self.is_host_joined(room_id, self.server_name) + + if is_in_room: + is_public = yield self.is_room_world_readable_or_publicly_joinable( + room_id + ) + + users_with_profile = yield state.get_current_user_in_room(room_id) + user_ids = set(users_with_profile) + + # Update each user in the user directory. + for user_id, profile in users_with_profile.items(): + yield self.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) + + to_insert = set() + + if is_public: + for user_id in user_ids: + if self.get_if_app_services_interested_in_user(user_id): + continue + + to_insert.add(user_id) + + if to_insert: + yield self.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + else: + for user_id in user_ids: + if not self.hs.is_mine_id(user_id): + continue + + if self.get_if_app_services_interested_in_user(user_id): + continue + + for other_user_id in user_ids: + if user_id == other_user_id: + continue + + user_set = (user_id, other_user_id) + to_insert.add(user_set) + + if to_insert: + yield self.add_users_who_share_private_room(room_id, to_insert) + to_insert.clear() + + # We've finished a room. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_user_directory", + self._background_update_progress_txn, + "populate_user_directory_process_rooms", + progress, + ) + + defer.returnValue(len(rooms_to_work_on)) + + @defer.inlineCallbacks + def _populate_user_directory_process_users(self, progress, batch_size): + """ + If search_all_users is enabled, add all of the users to the user directory. + """ + if not self.hs.config.user_directory_search_all_users: + yield self._end_background_update("populate_user_directory_process_users") + defer.returnValue(1) + + def _get_next_batch(txn): + sql = "SELECT user_id FROM %s LIMIT %s" % ( + TEMP_TABLE + "_users", + str(batch_size), + ) + txn.execute(sql) + users_to_work_on = txn.fetchall() + + if not users_to_work_on: + return None + + users_to_work_on = [x[0] for x in users_to_work_on] + + # Get how many are left to process, so we can give status on how + # far we are in processing + sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users" + txn.execute(sql) + progress["remaining"] = txn.fetchone()[0] + + return users_to_work_on + + users_to_work_on = yield self.runInteraction( + "populate_user_directory_temp_read", _get_next_batch + ) + + # No more users -- complete the transaction. + if not users_to_work_on: + yield self._end_background_update("populate_user_directory_process_users") + defer.returnValue(1) + + logger.info( + "Processing the next %d users of %d remaining" + % (len(users_to_work_on), progress["remaining"]) + ) + + for user_id in users_to_work_on: + profile = yield self.get_profileinfo(get_localpart_from_id(user_id)) + yield self.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) + + # We've finished processing a user. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_users", {"user_id": user_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_user_directory", + self._background_update_progress_txn, + "populate_user_directory_process_users", + progress, + ) + + defer.returnValue(len(users_to_work_on)) + @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -62,89 +329,16 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(False) - def add_profiles_to_user_dir(self, users_with_profile): - """Add profiles to the user directory - - Args: - users_with_profile (dict): Users to add to directory in the form of - mapping of user_id -> ProfileInfo + def update_profile_in_user_dir(self, user_id, display_name, avatar_url): + """ + Update or add a user's profile in the user directory. """ - if isinstance(self.database_engine, PostgresEngine): - # We weight the loclpart most highly, then display name and finally - # server name - sql = """ - INSERT INTO user_directory_search(user_id, vector) - VALUES (?, - setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - ) - """ - args = ( - ( - user_id, - get_localpart_from_id(user_id), - get_domain_from_id(user_id), - profile.display_name, - ) - for user_id, profile in iteritems(users_with_profile) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = """ - INSERT INTO user_directory_search(user_id, value) - VALUES (?,?) - """ - args = tuple( - ( - user_id, - "%s %s" % (user_id, p.display_name) if p.display_name else user_id, - ) - for user_id, p in iteritems(users_with_profile) - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - def _add_profiles_to_user_dir_txn(txn): - txn.executemany(sql, args) - self._simple_insert_many_txn( - txn, - table="user_directory", - values=[ - { - "user_id": user_id, - "room_id": None, - "display_name": profile.display_name, - "avatar_url": profile.avatar_url, - } - for user_id, profile in iteritems(users_with_profile) - ], - ) - for user_id in users_with_profile: - txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - - return self.runInteraction( - "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn - ) - - @defer.inlineCallbacks - def update_user_in_user_dir(self, user_id, room_id): - yield self._simple_update_one( - table="user_directory", - keyvalues={"user_id": user_id}, - updatevalues={"room_id": room_id}, - desc="update_user_in_user_dir", - ) - self.get_user_in_directory.invalidate((user_id,)) - - def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id): def _update_profile_in_user_dir_txn(txn): new_entry = self._simple_upsert_txn( txn, table="user_directory", keyvalues={"user_id": user_id}, - insertion_values={"room_id": room_id}, values={"display_name": display_name, "avatar_url": avatar_url}, lock=False, # We're only inserter ) @@ -281,18 +475,6 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(user_ids) - @defer.inlineCallbacks - def get_all_rooms(self): - """Get all room_ids we've ever known about, in ascending order of "size" - """ - sql = """ - SELECT room_id FROM current_state_events - GROUP BY room_id - ORDER BY count(*) ASC - """ - rows = yield self._execute("get_all_rooms", None, sql) - defer.returnValue([room_id for room_id, in rows]) - @defer.inlineCallbacks def get_all_local_users(self): """Get all local users @@ -553,8 +735,8 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - join_args = () - where_clause = "1=1" + join_args = (user_id,) + where_clause = "user_id != ?" else: join_args = (user_id,) where_clause = """ diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 114807efc1..aefe11ac28 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -163,9 +163,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def get_users_in_public_rooms(self): r = self.get_success( self.store._simple_select_list( - "users_in_public_rooms", - None, - ("user_id", "room_id"), + "users_in_public_rooms", None, ("user_id", "room_id") ) ) retval = [] @@ -182,6 +180,53 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): ) ) + def _add_background_updates(self): + """ + Add the background updates we need to run. + """ + # Ugh, have to reset this flag + self.store._all_done = False + + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_createtables", + "progress_json": "{}", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_process_rooms", + "progress_json": "{}", + "depends_on": "populate_user_directory_createtables", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_process_users", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_rooms", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_cleanup", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_users", + }, + ) + ) + def test_initial(self): """ The user directory's initial handler correctly updates the search tables. @@ -211,26 +256,17 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual(shares_private, []) self.assertEqual(public_users, []) - # Reset the handled users caches - self.handler.initially_handled_users = set() + # Do the initial population of the user directory via the background update + self._add_background_updates() - # Do the initial population - d = self.handler._do_initial_spam() - - # This takes a while, so pump it a bunch of times to get through the - # sleep delays - for i in range(10): - self.pump(1) - - self.get_success(d) + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() # User 1 and User 2 are in the same public room - self.assertEqual( - set(public_users), set([(u1, room), (u2, room)]) - ) + self.assertEqual(set(public_users), set([(u1, room), (u2, room)])) # User 1 and User 3 share private rooms self.assertEqual( @@ -238,7 +274,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): set([(u1, u3, private_room), (u3, u1, private_room)]), ) - def test_search_all_users(self): + def test_initial_share_all_users(self): """ Search all users = True means that a user does not have to share a private room with the searching user or be in a public room to be search @@ -248,33 +284,36 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.hs.config.user_directory_search_all_users = True u1 = self.register_user("user1", "pass") - u1_token = self.login(u1, "pass") - u2 = self.register_user("user2", "pass") - u2_token = self.login(u2, "pass") + self.register_user("user2", "pass") u3 = self.register_user("user3", "pass") - # User 1 and User 2 join a room. User 3 never does. - room = self.helper.create_room_as(u1, is_public=True, tok=u1_token) - self.helper.invite(room, src=u1, targ=u2, tok=u1_token) - self.helper.join(room, user=u2, tok=u2_token) - + # Wipe the user dir self.get_success(self.store.update_user_directory_stream_pos(None)) self.get_success(self.store.delete_all_from_user_dir()) - # Reset the handled users caches - self.handler.initially_handled_users = set() + # Do the initial population of the user directory via the background update + self._add_background_updates() - # Do the initial population - d = self.handler._do_initial_spam() + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) - # This takes a while, so pump it a bunch of times to get through the - # sleep delays - for i in range(10): - self.pump(1) + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() - self.get_success(d) + # No users share rooms + self.assertEqual(public_users, []) + self.assertEqual(self._compress_shared(shares_private), set([])) # Despite not sharing a room, search_all_users means we get a search # result. s = self.get_success(self.handler.search_users(u1, u3, 10)) self.assertEqual(len(s["results"]), 1) + + # We can find the other two users + s = self.get_success(self.handler.search_users(u1, "user", 10)) + self.assertEqual(len(s["results"]), 2) + + # Registering a user and then searching for them works. + u4 = self.register_user("user4", "pass") + s = self.get_success(self.handler.search_users(u1, u4, 10)) + self.assertEqual(len(s["results"]), 1) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 512d76e7a3..fd3361404f 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -16,7 +16,6 @@ from twisted.internet import defer from synapse.storage import UserDirectoryStore -from synapse.storage.roommember import ProfileInfo from tests import unittest from tests.utils import setup_test_homeserver @@ -34,13 +33,9 @@ class UserDirectoryStoreTestCase(unittest.TestCase): # alice and bob are both in !room_id. bobby is not but shares # a homeserver with alice. - yield self.store.add_profiles_to_user_dir( - { - ALICE: ProfileInfo(None, "alice"), - BOB: ProfileInfo(None, "bob"), - BOBBY: ProfileInfo(None, "bobby"), - }, - ) + yield self.store.update_profile_in_user_dir(ALICE, "alice", None) + yield self.store.update_profile_in_user_dir(BOB, "bob", None) + yield self.store.update_profile_in_user_dir(BOBBY, "bobby", None) yield self.store.add_users_in_public_rooms( "!room:id", (ALICE, BOB) ) diff --git a/tests/unittest.py b/tests/unittest.py index ef31321bc8..7772a47078 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -330,10 +330,10 @@ class HomeserverTestCase(TestCase): """ self.reactor.pump([by] * 100) - def get_success(self, d): + def get_success(self, d, by=0.0): if not isinstance(d, Deferred): return d - self.pump() + self.pump(by=by) return self.successResultOf(d) def register_user(self, username, password, admin=False): -- cgit 1.5.1 From 88f0675967d629ed14ae6ea6d4815bd0b5e0a44e Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 19 Mar 2019 11:38:59 +0000 Subject: fix test_auto_create_auto_join_where_no_consent (#4886) --- changelog.d/4886.bugfix | 1 + changelog.d/4886.misc | 1 + synapse/handlers/message.py | 13 ++++++++++--- synapse/handlers/register.py | 5 +++++ tests/handlers/test_register.py | 24 ++++++++++++++++++++++-- 5 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 changelog.d/4886.bugfix create mode 100644 changelog.d/4886.misc (limited to 'synapse/handlers') diff --git a/changelog.d/4886.bugfix b/changelog.d/4886.bugfix new file mode 100644 index 0000000000..b17aa92485 --- /dev/null +++ b/changelog.d/4886.bugfix @@ -0,0 +1 @@ +fix test_auto_create_auto_join_where_no_consent. diff --git a/changelog.d/4886.misc b/changelog.d/4886.misc new file mode 100644 index 0000000000..b17aa92485 --- /dev/null +++ b/changelog.d/4886.misc @@ -0,0 +1 @@ +fix test_auto_create_auto_join_where_no_consent. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c762b58902..55787563c0 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -243,7 +243,14 @@ class EventCreationHandler(object): self.spam_checker = hs.get_spam_checker() - if self.config.block_events_without_consent_error is not None: + self._block_events_without_consent_error = ( + self.config.block_events_without_consent_error + ) + + # we need to construct a ConsentURIBuilder here, as it checks that the necessary + # config options, but *only* if we have a configuration for which we are + # going to need it. + if self._block_events_without_consent_error: self._consent_uri_builder = ConsentURIBuilder(self.config) @defer.inlineCallbacks @@ -378,7 +385,7 @@ class EventCreationHandler(object): Raises: ConsentNotGivenError: if the user has not given consent yet """ - if self.config.block_events_without_consent_error is None: + if self._block_events_without_consent_error is None: return # exempt AS users from needing consent @@ -405,7 +412,7 @@ class EventCreationHandler(object): consent_uri = self._consent_uri_builder.build_user_consent_uri( requester.user.localpart, ) - msg = self.config.block_events_without_consent_error % { + msg = self._block_events_without_consent_error % { 'consent_uri': consent_uri, } raise ConsentNotGivenError( diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 0ec16b1d2e..68f73d3793 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -23,6 +23,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import ( AuthError, Codes, + ConsentNotGivenError, InvalidCaptchaError, LimitExceededError, RegistrationError, @@ -311,6 +312,10 @@ class RegistrationHandler(BaseHandler): ) else: yield self._join_user_to_room(fake_requester, r) + except ConsentNotGivenError as e: + # Technically not necessary to pull out this error though + # moving away from bare excepts is a good thing to do. + logger.error("Failed to join new user to %r: %r", r, e) except Exception as e: logger.error("Failed to join new user to %r: %r", r, e) diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index c9c1506273..010e65829e 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -187,12 +187,32 @@ class RegistrationTestCase(unittest.TestCase): @defer.inlineCallbacks def test_auto_create_auto_join_where_no_consent(self): - self.hs.config.user_consent_at_registration = True - self.hs.config.block_events_without_consent_error = "Error" + """Test to ensure that the first user is not auto-joined to a room if + they have not given general consent. + """ + + # Given:- + # * a user must give consent, + # * they have not given that consent + # * The server is configured to auto-join to a room + # (and autocreate if necessary) + + event_creation_handler = self.hs.get_event_creation_handler() + # (Messing with the internals of event_creation_handler is fragile + # but can't see a better way to do this. One option could be to subclass + # the test with custom config.) + event_creation_handler._block_events_without_consent_error = ("Error") + event_creation_handler._consent_uri_builder = Mock() room_alias_str = "#room:test" self.hs.config.auto_join_rooms = [room_alias_str] + + # When:- + # * the user is registered and post consent actions are called res = yield self.handler.register(localpart='jeff') yield self.handler.post_consent_actions(res[0]) + + # Then:- + # * Ensure that they have not been joined to the room rooms = yield self.store.get_rooms_for_user(res[0]) self.assertEqual(len(rooms), 0) -- cgit 1.5.1 From 213c98c00a473bac7363e1a728828e0f056550b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Mar 2019 16:50:51 +0000 Subject: Add option to disable search room lists This disables both local and remote room list searching. --- docs/sample_config.yaml | 5 +++++ synapse/config/room_directory.py | 9 +++++++++ synapse/handlers/room_list.py | 13 +++++++++++++ 3 files changed, 27 insertions(+) (limited to 'synapse/handlers') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index f9886a900d..f7b1825d61 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1036,6 +1036,11 @@ password_config: +# Wether the public room list can be searched. When disabled blocks +# searching local and remote room list for local and remote users. +# +#enable_room_list_search: true + # The `alias_creation` option controls who's allowed to create aliases # on this server. # diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 9b897abe3c..a25a41d16d 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -20,6 +20,10 @@ from ._base import Config, ConfigError class RoomDirectoryConfig(Config): def read_config(self, config): + self.enable_room_list_search = config.get( + "enable_room_list_search", True, + ) + alias_creation_rules = config.get("alias_creation_rules") if alias_creation_rules is not None: @@ -54,6 +58,11 @@ class RoomDirectoryConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ + # Wether the public room list can be searched. When disabled blocks + # searching local and remote room list for local and remote users. + # + #enable_room_list_search: true + # The `alias_creation` option controls who's allowed to create aliases # on this server. # diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index afa508d729..ba50c8aa95 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -44,6 +44,7 @@ EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) class RoomListHandler(BaseHandler): def __init__(self, hs): super(RoomListHandler, self).__init__(hs) + self.config = hs.config self.response_cache = ResponseCache(hs, "room_list") self.remote_response_cache = ResponseCache(hs, "remote_room_list", timeout_ms=30 * 1000) @@ -70,6 +71,12 @@ class RoomListHandler(BaseHandler): "Getting public room list: limit=%r, since=%r, search=%r, network=%r", limit, since_token, bool(search_filter), network_tuple, ) + if not self.config.enable_room_list_search: + return defer.succeed({ + "chunk": [], + "total_room_count_estimate": 0, + }) + if search_filter: # We explicitly don't bother caching searches or requests for # appservice specific lists. @@ -441,6 +448,12 @@ class RoomListHandler(BaseHandler): def get_remote_public_room_list(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): + if not self.config.enable_room_list_search: + defer.returnValue({ + "chunk": [], + "total_room_count_estimate": 0, + }) + if search_filter: # We currently don't support searching across federation, so we have # to do it manually without pagination -- cgit 1.5.1 From 7529038e66a81d36a71c654f26165a4215d918b3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 14:25:28 +0000 Subject: Return before we log --- synapse/handlers/room_list.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index ba50c8aa95..dc54634107 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -67,16 +67,17 @@ class RoomListHandler(BaseHandler): appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. """ - logger.info( - "Getting public room list: limit=%r, since=%r, search=%r, network=%r", - limit, since_token, bool(search_filter), network_tuple, - ) if not self.config.enable_room_list_search: return defer.succeed({ "chunk": [], "total_room_count_estimate": 0, }) + logger.info( + "Getting public room list: limit=%r, since=%r, search=%r, network=%r", + limit, since_token, bool(search_filter), network_tuple, + ) + if search_filter: # We explicitly don't bother caching searches or requests for # appservice specific lists. -- cgit 1.5.1 From 2c90422146b169cd43df12ab98e4e02ae53243c7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 14:25:58 +0000 Subject: Pull out config option --- synapse/handlers/room_list.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index dc54634107..d6c9d56007 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -44,7 +44,7 @@ EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) class RoomListHandler(BaseHandler): def __init__(self, hs): super(RoomListHandler, self).__init__(hs) - self.config = hs.config + self.enable_room_list_search = hs.config.enable_room_list_search self.response_cache = ResponseCache(hs, "room_list") self.remote_response_cache = ResponseCache(hs, "remote_room_list", timeout_ms=30 * 1000) @@ -67,7 +67,7 @@ class RoomListHandler(BaseHandler): appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. """ - if not self.config.enable_room_list_search: + if not self.enable_room_list_search: return defer.succeed({ "chunk": [], "total_room_count_estimate": 0, @@ -449,7 +449,7 @@ class RoomListHandler(BaseHandler): def get_remote_public_room_list(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): - if not self.config.enable_room_list_search: + if not self.enable_room_list_search: defer.returnValue({ "chunk": [], "total_room_count_estimate": 0, -- cgit 1.5.1 From cc197a61a1b494e5f8a7fbbc299161845f2ab8af Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 14:30:36 +0000 Subject: Disable publishing to room list when its disabled --- synapse/handlers/directory.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'synapse/handlers') diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8b113307d2..fe128d9c88 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -44,6 +44,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() self.config = hs.config + self.enable_room_list_search = hs.config.enable_room_list_search self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( @@ -411,6 +412,13 @@ class DirectoryHandler(BaseHandler): if visibility not in ["public", "private"]: raise SynapseError(400, "Invalid visibility setting") + if visibility == "public" and not self.enable_room_list_search: + # The room list has been disabled. + raise AuthError( + 403, + "This user is not permitted to publish rooms to the room list" + ) + room = yield self.store.get_room(room_id) if room is None: raise SynapseError(400, "Unknown room") -- cgit 1.5.1 From a902d131804890ee6cc4137a669be92ceb2253c4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 20 Mar 2019 16:02:25 +0000 Subject: Batch up outgoing read-receipts to reduce federation traffic. (#4890) Rate-limit outgoing read-receipts as per #4730. --- changelog.d/4890.feature | 1 + docs/sample_config.yaml | 8 ++ synapse/config/ratelimiting.py | 12 ++ synapse/federation/sender/__init__.py | 115 +++++++++++++++--- synapse/federation/sender/per_destination_queue.py | 64 ++++++++++- synapse/handlers/receipts.py | 2 +- tests/federation/test_federation_sender.py | 128 +++++++++++++++++++++ 7 files changed, 308 insertions(+), 22 deletions(-) create mode 100644 changelog.d/4890.feature create mode 100644 tests/federation/test_federation_sender.py (limited to 'synapse/handlers') diff --git a/changelog.d/4890.feature b/changelog.d/4890.feature new file mode 100644 index 0000000000..8d74262250 --- /dev/null +++ b/changelog.d/4890.feature @@ -0,0 +1 @@ +Batch up outgoing read-receipts to reduce federation traffic. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index f9886a900d..d72b90a37b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -438,6 +438,14 @@ log_config: "CONFDIR/SERVERNAME.log.config" # #federation_rc_concurrent: 3 +# Target outgoing federation transaction frequency for sending read-receipts, +# per-room. +# +# If we end up trying to send out more read-receipts, they will get buffered up +# into fewer transactions. +# +#federation_rr_transactions_per_room_per_second: 50 + # Directory where uploaded images and attachments are stored. diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 898a19dd8c..5a68399e63 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -42,6 +42,10 @@ class RatelimitConfig(Config): self.federation_rc_reject_limit = config.get("federation_rc_reject_limit", 50) self.federation_rc_concurrent = config.get("federation_rc_concurrent", 3) + self.federation_rr_transactions_per_room_per_second = config.get( + "federation_rr_transactions_per_room_per_second", 50, + ) + def default_config(self, **kwargs): return """\ ## Ratelimiting ## @@ -111,4 +115,12 @@ class RatelimitConfig(Config): # single server # #federation_rc_concurrent: 3 + + # Target outgoing federation transaction frequency for sending read-receipts, + # per-room. + # + # If we end up trying to send out more read-receipts, they will get buffered up + # into fewer transactions. + # + #federation_rr_transactions_per_room_per_second: 50 """ diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 1bcc353d18..1dc041752b 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -104,7 +104,26 @@ class FederationSender(object): self._processing_pending_presence = False + # map from room_id to a set of PerDestinationQueues which we believe are + # awaiting a call to flush_read_receipts_for_room. The presence of an entry + # here for a given room means that we are rate-limiting RR flushes to that room, + # and that there is a pending call to _flush_rrs_for_room in the system. + self._queues_awaiting_rr_flush_by_room = { + } # type: dict[str, set[PerDestinationQueue]] + + self._rr_txn_interval_per_room_ms = ( + 1000.0 / hs.get_config().federation_rr_transactions_per_room_per_second + ) + def _get_per_destination_queue(self, destination): + """Get or create a PerDestinationQueue for the given destination + + Args: + destination (str): server_name of remote server + + Returns: + PerDestinationQueue + """ queue = self._per_destination_queues.get(destination) if not queue: queue = PerDestinationQueue(self.hs, self._transaction_manager, destination) @@ -250,33 +269,91 @@ class FederationSender(object): Args: receipt (synapse.types.ReadReceipt): receipt to be sent """ + + # Some background on the rate-limiting going on here. + # + # It turns out that if we attempt to send out RRs as soon as we get them from + # a client, then we end up trying to do several hundred Hz of federation + # transactions. (The number of transactions scales as O(N^2) on the size of a + # room, since in a large room we have both more RRs coming in, and more servers + # to send them to.) + # + # This leads to a lot of CPU load, and we end up getting behind. The solution + # currently adopted is as follows: + # + # The first receipt in a given room is sent out immediately, at time T0. Any + # further receipts are, in theory, batched up for N seconds, where N is calculated + # based on the number of servers in the room to achieve a transaction frequency + # of around 50Hz. So, for example, if there were 100 servers in the room, then + # N would be 100 / 50Hz = 2 seconds. + # + # Then, after T+N, we flush out any receipts that have accumulated, and restart + # the timer to flush out more receipts at T+2N, etc. If no receipts accumulate, + # we stop the cycle and go back to the start. + # + # However, in practice, it is often possible to flush out receipts earlier: in + # particular, if we are sending a transaction to a given server anyway (for + # example, because we have a PDU or a RR in another room to send), then we may + # as well send out all of the pending RRs for that server. So it may be that + # by the time we get to T+N, we don't actually have any RRs left to send out. + # Nevertheless we continue to buffer up RRs for the room in question until we + # reach the point that no RRs arrive between timer ticks. + # + # For even more background, see https://github.com/matrix-org/synapse/issues/4730. + + room_id = receipt.room_id + # Work out which remote servers should be poked and poke them. - domains = yield self.state.get_current_hosts_in_room(receipt.room_id) + domains = yield self.state.get_current_hosts_in_room(room_id) domains = [d for d in domains if d != self.server_name] if not domains: return - logger.debug("Sending receipt to: %r", domains) + queues_pending_flush = self._queues_awaiting_rr_flush_by_room.get( + room_id + ) - content = { - receipt.room_id: { - receipt.receipt_type: { - receipt.user_id: { - "event_ids": receipt.event_ids, - "data": receipt.data, - }, - }, - }, - } - key = (receipt.room_id, receipt.receipt_type, receipt.user_id) + # if there is no flush yet scheduled, we will send out these receipts with + # immediate flushes, and schedule the next flush for this room. + if queues_pending_flush is not None: + logger.debug("Queuing receipt for: %r", domains) + else: + logger.debug("Sending receipt to: %r", domains) + self._schedule_rr_flush_for_room(room_id, len(domains)) for domain in domains: - self.build_and_send_edu( - destination=domain, - edu_type="m.receipt", - content=content, - key=key, - ) + queue = self._get_per_destination_queue(domain) + queue.queue_read_receipt(receipt) + + # if there is already a RR flush pending for this room, then make sure this + # destination is registered for the flush + if queues_pending_flush is not None: + queues_pending_flush.add(queue) + else: + queue.flush_read_receipts_for_room(room_id) + + def _schedule_rr_flush_for_room(self, room_id, n_domains): + # that is going to cause approximately len(domains) transactions, so now back + # off for that multiplied by RR_TXN_INTERVAL_PER_ROOM + backoff_ms = self._rr_txn_interval_per_room_ms * n_domains + + logger.debug("Scheduling RR flush in %s in %d ms", room_id, backoff_ms) + self.clock.call_later(backoff_ms, self._flush_rrs_for_room, room_id) + self._queues_awaiting_rr_flush_by_room[room_id] = set() + + def _flush_rrs_for_room(self, room_id): + queues = self._queues_awaiting_rr_flush_by_room.pop(room_id) + logger.debug("Flushing RRs in %s to %s", room_id, queues) + + if not queues: + # no more RRs arrived for this room; we are done. + return + + # schedule the next flush + self._schedule_rr_flush_for_room(room_id, len(queues)) + + for queue in queues: + queue.flush_read_receipts_for_room(room_id) @logcontext.preserve_fn # the caller should not yield on this @defer.inlineCallbacks diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 385039add4..be99211003 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -80,6 +80,10 @@ class PerDestinationQueue(object): # destination self._pending_presence = {} # type: dict[str, UserPresenceState] + # room_id -> receipt_type -> user_id -> receipt_dict + self._pending_rrs = {} + self._rrs_pending_flush = False + # stream_id of last successfully sent to-device message. # NB: may be a long or an int. self._last_device_stream_id = 0 @@ -87,6 +91,9 @@ class PerDestinationQueue(object): # stream_id of last successfully sent device list update. self._last_device_list_stream_id = 0 + def __str__(self): + return "PerDestinationQueue[%s]" % self._destination + def pending_pdu_count(self): return len(self._pending_pdus) @@ -118,6 +125,30 @@ class PerDestinationQueue(object): }) self.attempt_new_transaction() + def queue_read_receipt(self, receipt): + """Add a RR to the list to be sent. Doesn't start the transmission loop yet + (see flush_read_receipts_for_room) + + Args: + receipt (synapse.api.receipt_info.ReceiptInfo): receipt to be queued + """ + self._pending_rrs.setdefault( + receipt.room_id, {}, + ).setdefault( + receipt.receipt_type, {} + )[receipt.user_id] = { + "event_ids": receipt.event_ids, + "data": receipt.data, + } + + def flush_read_receipts_for_room(self, room_id): + # if we don't have any read-receipts for this room, it may be that we've already + # sent them out, so we don't need to flush. + if room_id not in self._pending_rrs: + return + self._rrs_pending_flush = True + self.attempt_new_transaction() + def send_keyed_edu(self, edu, key): self._pending_edus_keyed[(edu.edu_type, key)] = edu self.attempt_new_transaction() @@ -183,10 +214,12 @@ class PerDestinationQueue(object): # We can only include at most 50 PDUs per transactions pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] - pending_edus = self._pending_edus + pending_edus = [] + + pending_edus.extend(self._get_rr_edus(force_flush=False)) # We can only include at most 100 EDUs per transactions - pending_edus, self._pending_edus = pending_edus[:100], pending_edus[100:] + pending_edus.extend(self._pop_pending_edus(100 - len(pending_edus))) pending_edus.extend( self._pending_edus_keyed.values() @@ -224,6 +257,11 @@ class PerDestinationQueue(object): self._last_device_stream_id = device_stream_id return + # if we've decided to send a transaction anyway, and we have room, we + # may as well send any pending RRs + if len(pending_edus) < 100: + pending_edus.extend(self._get_rr_edus(force_flush=True)) + # END CRITICAL SECTION success = yield self._transaction_manager.send_new_transaction( @@ -285,6 +323,28 @@ class PerDestinationQueue(object): # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False + def _get_rr_edus(self, force_flush): + if not self._pending_rrs: + return + if not force_flush and not self._rrs_pending_flush: + # not yet time for this lot + return + + edu = Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.receipt", + content=self._pending_rrs, + ) + self._pending_rrs = {} + self._rrs_pending_flush = False + yield edu + + def _pop_pending_edus(self, limit): + pending_edus = self._pending_edus + pending_edus, self._pending_edus = pending_edus[:limit], pending_edus[limit:] + return pending_edus + @defer.inlineCallbacks def _get_new_device_messages(self): last_device_stream_id = self._last_device_stream_id diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index dd783ae134..274d2946ad 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -118,7 +118,7 @@ class ReceiptsHandler(BaseHandler): if not is_new: return - self.federation.send_read_receipt(receipt) + yield self.federation.send_read_receipt(receipt) @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py new file mode 100644 index 0000000000..28e7e27416 --- /dev/null +++ b/tests/federation/test_federation_sender.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mock import Mock + +from twisted.internet import defer + +from synapse.types import ReadReceipt + +from tests.unittest import HomeserverTestCase + + +class FederationSenderTestCases(HomeserverTestCase): + def make_homeserver(self, reactor, clock): + return super(FederationSenderTestCases, self).setup_test_homeserver( + state_handler=Mock(spec=["get_current_hosts_in_room"]), + federation_transport_client=Mock(spec=["send_transaction"]), + ) + + def test_send_receipts(self): + mock_state_handler = self.hs.get_state_handler() + mock_state_handler.get_current_hosts_in_room.return_value = ["test", "host2"] + + mock_send_transaction = self.hs.get_federation_transport_client().send_transaction + mock_send_transaction.return_value = defer.succeed({}) + + sender = self.hs.get_federation_sender() + receipt = ReadReceipt("room_id", "m.read", "user_id", ["event_id"], {"ts": 1234}) + self.successResultOf(sender.send_read_receipt(receipt)) + + self.pump() + + # expect a call to send_transaction + mock_send_transaction.assert_called_once() + json_cb = mock_send_transaction.call_args[0][1] + data = json_cb() + self.assertEqual(data['edus'], [ + { + 'edu_type': 'm.receipt', + 'content': { + 'room_id': { + 'm.read': { + 'user_id': { + 'event_ids': ['event_id'], + 'data': {'ts': 1234}, + }, + }, + }, + }, + }, + ]) + + def test_send_receipts_with_backoff(self): + """Send two receipts in quick succession; the second should be flushed, but + only after 20ms""" + mock_state_handler = self.hs.get_state_handler() + mock_state_handler.get_current_hosts_in_room.return_value = ["test", "host2"] + + mock_send_transaction = self.hs.get_federation_transport_client().send_transaction + mock_send_transaction.return_value = defer.succeed({}) + + sender = self.hs.get_federation_sender() + receipt = ReadReceipt("room_id", "m.read", "user_id", ["event_id"], {"ts": 1234}) + self.successResultOf(sender.send_read_receipt(receipt)) + + self.pump() + + # expect a call to send_transaction + mock_send_transaction.assert_called_once() + json_cb = mock_send_transaction.call_args[0][1] + data = json_cb() + self.assertEqual(data['edus'], [ + { + 'edu_type': 'm.receipt', + 'content': { + 'room_id': { + 'm.read': { + 'user_id': { + 'event_ids': ['event_id'], + 'data': {'ts': 1234}, + }, + }, + }, + }, + }, + ]) + mock_send_transaction.reset_mock() + + # send the second RR + receipt = ReadReceipt("room_id", "m.read", "user_id", ["other_id"], {"ts": 1234}) + self.successResultOf(sender.send_read_receipt(receipt)) + self.pump() + mock_send_transaction.assert_not_called() + + self.reactor.advance(19) + mock_send_transaction.assert_not_called() + + self.reactor.advance(10) + mock_send_transaction.assert_called_once() + json_cb = mock_send_transaction.call_args[0][1] + data = json_cb() + self.assertEqual(data['edus'], [ + { + 'edu_type': 'm.receipt', + 'content': { + 'room_id': { + 'm.read': { + 'user_id': { + 'event_ids': ['other_id'], + 'data': {'ts': 1234}, + }, + }, + }, + }, + }, + ]) -- cgit 1.5.1 From 74c46d81fa7c3e4f1cfc3688d9ce3f46d35ee5a5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 16:50:23 +0000 Subject: Only require consent for events with an associated request There are a number of instances where a server or admin may puppet a user to join/leave rooms, which we don't want to fail if the user has not consented to the privacy policy. We fix this by adding a check to test if the requester has an associated access_token, which is used as a proxy to answer the question of whether the action is being done on behalf of a real request from the user. --- synapse/handlers/message.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 55787563c0..ac9d9c1a83 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -316,8 +316,12 @@ class EventCreationHandler(object): target, e ) + # Check if the user has accepted the privacy policy. We only do this if + # the requester has an associated access_token_id, which indicates that + # this action came from a user request rather than an automatice server + # or admin action. is_exempt = yield self._is_exempt_from_privacy_policy(builder, requester) - if not is_exempt: + if requester.access_token_id and not is_exempt: yield self.assert_accepted_privacy_policy(requester) if token_id is not None: -- cgit 1.5.1 From 7d47cc1305e1504af78ff13c49b43b81b1ac5791 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 17:08:36 +0000 Subject: Move requester check into assert_accepted_privacy_policy --- synapse/handlers/message.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ac9d9c1a83..345a3e0ecd 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -316,12 +316,8 @@ class EventCreationHandler(object): target, e ) - # Check if the user has accepted the privacy policy. We only do this if - # the requester has an associated access_token_id, which indicates that - # this action came from a user request rather than an automatice server - # or admin action. is_exempt = yield self._is_exempt_from_privacy_policy(builder, requester) - if requester.access_token_id and not is_exempt: + if not is_exempt: yield self.assert_accepted_privacy_policy(requester) if token_id is not None: @@ -396,6 +392,13 @@ class EventCreationHandler(object): if requester.app_service is not None: return + # Check if the user has accepted the privacy policy. We only do this if + # the requester has an associated access_token_id, which indicates that + # this action came from a user request rather than an automatice server + # or admin action. + if requester.access_token_id is None: + return + user_id = requester.user.to_string() # exempt the system notices user -- cgit 1.5.1 From aa959a6c0705067cd01d1fd0ba42f51f320ed51b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 17:39:29 +0000 Subject: Use flags --- synapse/handlers/_base.py | 1 + synapse/handlers/deactivate_account.py | 1 + synapse/handlers/message.py | 18 +++++------------- synapse/handlers/room_member.py | 6 ++++++ synapse/rest/client/v1/admin.py | 6 ++++-- 5 files changed, 17 insertions(+), 15 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index d8d86d6ff3..ac09d03ba9 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -165,6 +165,7 @@ class BaseHandler(object): member_event.room_id, "leave", ratelimit=False, + require_consent=False, ) except Exception as e: logger.exception("Error kicking guest user: %s" % (e,)) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 75fe50c42c..97d3f31d98 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -164,6 +164,7 @@ class DeactivateAccountHandler(BaseHandler): room_id, "leave", ratelimit=False, + require_consent=False, ) except Exception: logger.exception( diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 345a3e0ecd..587fbfbe86 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -255,7 +255,7 @@ class EventCreationHandler(object): @defer.inlineCallbacks def create_event(self, requester, event_dict, token_id=None, txn_id=None, - prev_events_and_hashes=None): + prev_events_and_hashes=None, require_consent=True): """ Given a dict from a client, create a new event. @@ -276,6 +276,9 @@ class EventCreationHandler(object): where *hashes* is a map from algorithm to hash. If None, they will be requested from the database. + + require_consent (bool): Whether to check if the requester has + consented to privacy policy. Raises: ResourceLimitError if server is blocked to some resource being exceeded @@ -317,7 +320,7 @@ class EventCreationHandler(object): ) is_exempt = yield self._is_exempt_from_privacy_policy(builder, requester) - if not is_exempt: + if require_consent and not is_exempt: yield self.assert_accepted_privacy_policy(requester) if token_id is not None: @@ -388,17 +391,6 @@ class EventCreationHandler(object): if self._block_events_without_consent_error is None: return - # exempt AS users from needing consent - if requester.app_service is not None: - return - - # Check if the user has accepted the privacy policy. We only do this if - # the requester has an associated access_token_id, which indicates that - # this action came from a user request rather than an automatice server - # or admin action. - if requester.access_token_id is None: - return - user_id = requester.user.to_string() # exempt the system notices user diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index aead9e4608..71ce5b54e5 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -160,6 +160,7 @@ class RoomMemberHandler(object): txn_id=None, ratelimit=True, content=None, + require_consent=True, ): user_id = target.to_string() @@ -185,6 +186,7 @@ class RoomMemberHandler(object): token_id=requester.access_token_id, txn_id=txn_id, prev_events_and_hashes=prev_events_and_hashes, + require_consent=require_consent, ) # Check if this event matches the previous membership event for the user. @@ -305,6 +307,7 @@ class RoomMemberHandler(object): third_party_signed=None, ratelimit=True, content=None, + require_consent=True, ): key = (room_id,) @@ -319,6 +322,7 @@ class RoomMemberHandler(object): third_party_signed=third_party_signed, ratelimit=ratelimit, content=content, + require_consent=require_consent, ) defer.returnValue(result) @@ -335,6 +339,7 @@ class RoomMemberHandler(object): third_party_signed=None, ratelimit=True, content=None, + require_consent=True, ): content_specified = bool(content) if content is None: @@ -516,6 +521,7 @@ class RoomMemberHandler(object): ratelimit=ratelimit, prev_events_and_hashes=prev_events_and_hashes, content=content, + require_consent=require_consent, ) defer.returnValue(res) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 56ad65515a..e788769639 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -516,7 +516,8 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): room_id=room_id, action=Membership.LEAVE, content={}, - ratelimit=False + ratelimit=False, + require_consent=False, ) yield self.room_member_handler.forget(target_requester.user, room_id) @@ -527,7 +528,8 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): room_id=new_room_id, action=Membership.JOIN, content={}, - ratelimit=False + ratelimit=False, + require_consent=False, ) kicked_users.append(user_id) -- cgit 1.5.1 From cd62981a6a083945547100c8d0f30380ea17c6e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 17:51:27 +0000 Subject: Revert spurious delete --- synapse/handlers/message.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse/handlers') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 587fbfbe86..9b41c7b205 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -391,6 +391,10 @@ class EventCreationHandler(object): if self._block_events_without_consent_error is None: return + # exempt AS users from needing consent + if requester.app_service is not None: + return + user_id = requester.user.to_string() # exempt the system notices user -- cgit 1.5.1