From 71b625d80806886794c5e72f7ff11432e99b736c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Feb 2019 16:54:35 +0000 Subject: Stop backpaginating when events not visible --- synapse/handlers/federation.py | 31 +++++++++++++++++++++++++++++++ synapse/storage/event_federation.py | 22 ++++++++++++++++++++++ synapse/visibility.py | 30 +++++++++++++++++++++++++----- 3 files changed, 78 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 083f2e0ac3..7b3834a915 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -830,6 +830,37 @@ class FederationHandler(BaseHandler): logger.debug("Not backfilling as no extremeties found.") return + # We only want to paginate if we can actually see the events we'll get, + # as otherwise we'll just spend a lot of resources to get redacted + # events. + # + # We do this by filtering all the extremities and seeing if any remain. + # Given we don't have the extremity events themselves, we need to + # actually check the events that references them. + # + # TODO: Filter the list of extremities if we do do a backfill + # TODO: Correctly handle the case where we are allowed to see the + # forward event but not the extremity, e.g. in the case of initial + # join of the server. + + forward_events = yield self.store.get_forward_events( + list(extremities), + ) + + extremities_events = yield self.store.get_events( + forward_events, + check_redacted=False, + get_prev_content=False, + ) + + filtered_extremities = yield filter_events_for_server( + self.store, self.server_name, list(extremities_events.values()), + redact=False, + ) + + if not filtered_extremities: + defer.returnValue(False) + # Check if we reached a point where we should start backfilling. sorted_extremeties_tuple = sorted( extremities.items(), diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 38809ed0fc..830b171caa 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -442,6 +442,28 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, event_results.reverse() return event_results + @defer.inlineCallbacks + def get_forward_events(self, event_ids): + """Fetch all events that have the given events as a prev event + + Args: + event_ids (iterable[str]) + + Returns: + Deferred[list[str]] + """ + rows = yield self._simple_select_many_batch( + table="event_edges", + column="prev_event_id", + iterable=event_ids, + retcols=("event_id",), + desc="get_forward_events" + ) + + defer.returnValue([ + row["event_id"] for row in rows + ]) + class EventFederationStore(EventFederationWorkerStore): """ Responsible for storing and serving up the various graphs associated diff --git a/synapse/visibility.py b/synapse/visibility.py index 0281a7c919..f6dcc96630 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,20 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events): +def filter_events_for_server(store, server_name, events, redact=True): + """Filter a list of events based on whether given server is allowed to + see them. + + Args: + store (DataStore) + server_name (str) + events (iterable[FrozenEvent]) + redact (bool): Whether to return a redacted version of the event, or + to filter them out entirely. + + Returns + Deferred[list[FrozenEvent]] + """ # Whatever else we do, we need to check for senders which have requested # erasure of their data. erased_senders = yield store.are_users_erased( @@ -231,7 +244,10 @@ def filter_events_for_server(store, server_name, events): "Sender of %s has been erased, redacting", event.event_id, ) - return prune_event(event) + if redact: + return prune_event(event) + else: + return None # state will be None if we decided we didn't need to filter by # room membership. @@ -265,7 +281,10 @@ def filter_events_for_server(store, server_name, events): return event else: # server has no users in the room: redact - return prune_event(event) + if redact: + return prune_event(event) + else: + return None return event @@ -361,7 +380,8 @@ def filter_events_for_server(store, server_name, events): for e_id, key_to_eid in iteritems(event_to_state_ids) } - defer.returnValue([ + to_return = ( redact_disallowed(e, event_to_state[e.event_id]) for e in events - ]) + ) + defer.returnValue([e for e in to_return if e is not None]) -- cgit 1.5.1 From 313987187ee04dce5e70db17c1ab9377f283be7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 26 Feb 2019 15:04:34 +0000 Subject: Fix tightloop over connecting to replication server If the client failed to process incoming commands during the initial set up of the replication connection it would immediately disconnect and reconnect, resulting in a tightloop. This can happen, for example, when subscribing to a stream that has a row that is too long in the backlog. The fix here is to not consider the connection successfully set up until the client has succesfully subscribed and caught up with the streams. This ensures that the retry logic timers aren't reset until then, meaning that if an error does happen during start up the client will continue backing off before retrying again. --- docs/tcp_replication.rst | 4 +++- synapse/replication/tcp/client.py | 38 ++++++++++++++++++++++++++++++++++--- synapse/replication/tcp/commands.py | 5 ++++- 3 files changed, 42 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/docs/tcp_replication.rst b/docs/tcp_replication.rst index 73436cea62..75e723484c 100644 --- a/docs/tcp_replication.rst +++ b/docs/tcp_replication.rst @@ -188,7 +188,9 @@ RDATA (S) A single update in a stream POSITION (S) - The position of the stream has been updated + The position of the stream has been updated. Sent to the client after all + missing updates for a stream have been sent to the client and they're now + up to date. ERROR (S, C) There was an error diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 586dddb40b..914cd24b55 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -54,7 +54,6 @@ class ReplicationClientFactory(ReconnectingClientFactory): def buildProtocol(self, addr): logger.info("Connected to replication: %r", addr) - self.resetDelay() return ClientReplicationStreamProtocol( self.client_name, self.server_name, self._clock, self.handler ) @@ -90,15 +89,23 @@ class ReplicationClientHandler(object): # Used for tests. self.awaiting_syncs = {} + # Set of stream names that have been subscribe to, but haven't yet + # caught up with. This is used to track when the client has been fully + # connected to the remote. + self.streams_connecting = None + + # The factory used to create connections. + self.factory = None + def start_replication(self, hs): """Helper method to start a replication connection to the remote server using TCP. """ client_name = hs.config.worker_name - factory = ReplicationClientFactory(hs, client_name, self) + self.factory = ReplicationClientFactory(hs, client_name, self) host = hs.config.worker_replication_host port = hs.config.worker_replication_port - hs.get_reactor().connectTCP(host, port, factory) + hs.get_reactor().connectTCP(host, port, self.factory) def on_rdata(self, stream_name, token, rows): """Called when we get new replication data. By default this just pokes @@ -115,6 +122,12 @@ class ReplicationClientHandler(object): Can be overriden in subclasses to handle more. """ + # When we get a `POSITION` command it means we've finished getting + # missing updates for the given stream, and are now up to date. + self.streams_connecting.discard(stream_name) + if not self.streams_connecting: + self.finished_connecting() + return self.store.process_replication_rows(stream_name, token, []) def on_sync(self, data): @@ -140,6 +153,10 @@ class ReplicationClientHandler(object): args["account_data"] = user_account_data elif room_account_data: args["account_data"] = room_account_data + + # Record which streams we're in the process of subscribing to + self.streams_connecting = set(args.keys()) + return args def get_currently_syncing_users(self): @@ -204,3 +221,18 @@ class ReplicationClientHandler(object): for cmd in self.pending_commands: connection.send_command(cmd) self.pending_commands = [] + + # This will happen if we don't actually subscribe to any streams + if not self.streams_connecting: + self.finished_connecting() + + def finished_connecting(self): + """Called when we have successfully subscribed and caught up to all + streams we're interested in. + """ + logger.info("Finished connecting to server") + + # We don't reset the delay any earlier as otherwise if there is a + # problem during start up we'll end up tight looping connecting to the + # server. + self.factory.resetDelay() diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index 327556f6a1..2098c32a77 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -127,8 +127,11 @@ class RdataCommand(Command): class PositionCommand(Command): - """Sent by the client to tell the client the stream postition without + """Sent by the server to tell the client the stream postition without needing to send an RDATA. + + Sent to the client after all missing updates for a stream have been sent + to the client and they're now up to date. """ NAME = "POSITION" -- cgit 1.5.1 From 25814921f1900e92b50830d6616762b174e82773 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 26 Feb 2019 15:12:29 +0000 Subject: Increase the max delay between retry attempts Otherwise if you have many workers they can easily take out master with their connection attempts --- synapse/replication/tcp/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 914cd24b55..51f90655d0 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -39,7 +39,7 @@ class ReplicationClientFactory(ReconnectingClientFactory): Accepts a handler that will be called when new data is available or data is required. """ - maxDelay = 5 # Try at least once every N seconds + maxDelay = 30 # Try at least once every N seconds def __init__(self, hs, client_name, handler): self.client_name = client_name -- cgit 1.5.1 From 6870fc496ff3da5075fec74e40515c03c929915f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 10:22:52 +0000 Subject: Move connecting logic into ClientReplicationStreamProtocol --- synapse/replication/tcp/client.py | 18 ------------------ synapse/replication/tcp/protocol.py | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 18 deletions(-) (limited to 'synapse') diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 51f90655d0..e558f90e1a 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -89,11 +89,6 @@ class ReplicationClientHandler(object): # Used for tests. self.awaiting_syncs = {} - # Set of stream names that have been subscribe to, but haven't yet - # caught up with. This is used to track when the client has been fully - # connected to the remote. - self.streams_connecting = None - # The factory used to create connections. self.factory = None @@ -122,12 +117,6 @@ class ReplicationClientHandler(object): Can be overriden in subclasses to handle more. """ - # When we get a `POSITION` command it means we've finished getting - # missing updates for the given stream, and are now up to date. - self.streams_connecting.discard(stream_name) - if not self.streams_connecting: - self.finished_connecting() - return self.store.process_replication_rows(stream_name, token, []) def on_sync(self, data): @@ -154,9 +143,6 @@ class ReplicationClientHandler(object): elif room_account_data: args["account_data"] = room_account_data - # Record which streams we're in the process of subscribing to - self.streams_connecting = set(args.keys()) - return args def get_currently_syncing_users(self): @@ -222,10 +208,6 @@ class ReplicationClientHandler(object): connection.send_command(cmd) self.pending_commands = [] - # This will happen if we don't actually subscribe to any streams - if not self.streams_connecting: - self.finished_connecting() - def finished_connecting(self): """Called when we have successfully subscribed and caught up to all streams we're interested in. diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 0b3fe6cbf5..6123c995b9 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -511,6 +511,11 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.server_name = server_name self.handler = handler + # Set of stream names that have been subscribe to, but haven't yet + # caught up with. This is used to track when the client has been fully + # connected to the remote. + self.streams_connecting = set() + # Map of stream to batched updates. See RdataCommand for info on how # batching works. self.pending_batches = {} @@ -533,6 +538,10 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): # We've now finished connecting to so inform the client handler self.handler.update_connection(self) + # This will happen if we don't actually subscribe to any streams + if not self.streams_connecting: + self.handler.finished_connecting() + def on_SERVER(self, cmd): if cmd.data != self.server_name: logger.error("[%s] Connected to wrong remote: %r", self.id(), cmd.data) @@ -562,6 +571,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): return self.handler.on_rdata(stream_name, cmd.token, rows) def on_POSITION(self, cmd): + # When we get a `POSITION` command it means we've finished getting + # missing updates for the given stream, and are now up to date. + self.streams_connecting.discard(cmd.stream_name) + if not self.streams_connecting: + self.handler.finished_connecting() + return self.handler.on_position(cmd.stream_name, cmd.token) def on_SYNC(self, cmd): @@ -578,6 +593,8 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): self.id(), stream_name, token ) + self.streams_connecting.add(stream_name) + self.send_command(ReplicateCommand(stream_name, token)) def on_connection_closed(self): -- cgit 1.5.1 From b183fef9ac8075aaab892d1042596e0bba824167 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 13:06:10 +0000 Subject: Update comments --- synapse/handlers/federation.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 7b3834a915..de839ca527 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -836,12 +836,22 @@ class FederationHandler(BaseHandler): # # We do this by filtering all the extremities and seeing if any remain. # Given we don't have the extremity events themselves, we need to - # actually check the events that references them. + # actually check the events that reference them. + # + # *Note*: the spec wants us to keep backfilling until we reach the start + # of the room in case we are allowed to see some of the history. However + # in practice that causes more issues than its worth, as a) its + # relatively rare for there to be any visible history and b) even when + # there is its often sufficiently long ago that clients would stop + # attempting to paginate before backfill reached the visible history. + # + # TODO: If we do do a backfill the we should filter the extremities to + # only include those that point to visible portions of history. # - # TODO: Filter the list of extremities if we do do a backfill # TODO: Correctly handle the case where we are allowed to see the # forward event but not the extremity, e.g. in the case of initial - # join of the server. + # join of the server where we are allowed to see the join event but + # not anything before it. forward_events = yield self.store.get_forward_events( list(extremities), -- cgit 1.5.1 From 4cff9376f7b07d3c60b1f882cde5848149862dd9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 13:43:53 +0000 Subject: Move server key queries to federation reader --- docs/workers.rst | 1 + synapse/app/federation_reader.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/docs/workers.rst b/docs/workers.rst index 3ba5879f76..7552455a4e 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -182,6 +182,7 @@ endpoints matching the following regular expressions:: ^/_matrix/federation/v1/event_auth/ ^/_matrix/federation/v1/exchange_third_party_invite/ ^/_matrix/federation/v1/send/ + ^/_matrix/key/v2/query The above endpoints should all be routed to the federation_reader worker by the reverse-proxy configuration. diff --git a/synapse/app/federation_reader.py b/synapse/app/federation_reader.py index b116c17669..7da79dc827 100644 --- a/synapse/app/federation_reader.py +++ b/synapse/app/federation_reader.py @@ -21,7 +21,7 @@ from twisted.web.resource import NoResource import synapse from synapse import events -from synapse.api.urls import FEDERATION_PREFIX +from synapse.api.urls import FEDERATION_PREFIX, SERVER_KEY_V2_PREFIX from synapse.app import _base from synapse.config._base import ConfigError from synapse.config.homeserver import HomeServerConfig @@ -44,6 +44,7 @@ from synapse.replication.slave.storage.registration import SlavedRegistrationSto from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.rest.key.v2 import KeyApiV2Resource from synapse.server import HomeServer from synapse.storage.engines import create_engine from synapse.util.httpresourcetree import create_resource_tree @@ -99,6 +100,9 @@ class FederationReaderServer(HomeServer): ), }) + if name in ["keys", "federation"]: + resources[SERVER_KEY_V2_PREFIX] = KeyApiV2Resource(self) + root_resource = create_resource_tree(resources, NoResource()) _base.listen_tcp( -- cgit 1.5.1 From 1e315017d3c0dcde20e781fb3c87bc0e54c53cd1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 13:53:46 +0000 Subject: When presence is enabled don't send over replication --- synapse/federation/federation_server.py | 3 +++ synapse/replication/slave/storage/presence.py | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 569eb277a9..81f3b4b1ff 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -886,6 +886,9 @@ class ReplicationFederationHandlerRegistry(FederationHandlerRegistry): def on_edu(self, edu_type, origin, content): """Overrides FederationHandlerRegistry """ + if not self.config.use_presence and edu_type == "m.presence": + return + handler = self.edu_handlers.get(edu_type) if handler: return super(ReplicationFederationHandlerRegistry, self).on_edu( diff --git a/synapse/replication/slave/storage/presence.py b/synapse/replication/slave/storage/presence.py index 92447b00d4..9e530defe0 100644 --- a/synapse/replication/slave/storage/presence.py +++ b/synapse/replication/slave/storage/presence.py @@ -54,8 +54,11 @@ class SlavedPresenceStore(BaseSlavedStore): def stream_positions(self): result = super(SlavedPresenceStore, self).stream_positions() - position = self._presence_id_gen.get_current_token() - result["presence"] = position + + if self.hs.config.use_presence: + position = self._presence_id_gen.get_current_token() + result["presence"] = position + return result def process_replication_rows(self, stream_name, token, rows): -- cgit 1.5.1 From 54f9ce11a7100e7207bb068a84dfaba886995c2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 27 Feb 2019 14:26:08 +0000 Subject: Move /account/3pid to client_reader --- docs/workers.rst | 1 + synapse/app/client_reader.py | 2 ++ synapse/storage/registration.py | 66 ++++++++++++++++++++--------------------- 3 files changed, 36 insertions(+), 33 deletions(-) (limited to 'synapse') diff --git a/docs/workers.rst b/docs/workers.rst index 3ba5879f76..0170fba898 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -223,6 +223,7 @@ following regular expressions:: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/members$ ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ ^/_matrix/client/(api/v1|r0|unstable)/login$ + ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ Additionally, the following REST endpoints can be handled, but all requests must be routed to the same instance:: diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 043b48f8f3..5070094cad 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -48,6 +48,7 @@ from synapse.rest.client.v1.room import ( RoomMemberListRestServlet, RoomStateRestServlet, ) +from synapse.rest.client.v2_alpha.account import ThreepidRestServlet from synapse.rest.client.v2_alpha.register import RegisterRestServlet from synapse.server import HomeServer from synapse.storage.engines import create_engine @@ -96,6 +97,7 @@ class ClientReaderServer(HomeServer): RoomEventContextServlet(self).register(resource) RegisterRestServlet(self).register(resource) LoginRestServlet(self).register(resource) + ThreepidRestServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9b9572890b..9b6c28892c 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -295,6 +295,39 @@ class RegistrationWorkerStore(SQLBaseStore): return ret['user_id'] return None + @defer.inlineCallbacks + def user_add_threepid(self, user_id, medium, address, validated_at, added_at): + yield self._simple_upsert("user_threepids", { + "medium": medium, + "address": address, + }, { + "user_id": user_id, + "validated_at": validated_at, + "added_at": added_at, + }) + + @defer.inlineCallbacks + def user_get_threepids(self, user_id): + ret = yield self._simple_select_list( + "user_threepids", { + "user_id": user_id + }, + ['medium', 'address', 'validated_at', 'added_at'], + 'user_get_threepids' + ) + defer.returnValue(ret) + + def user_delete_threepid(self, user_id, medium, address): + return self._simple_delete( + "user_threepids", + keyvalues={ + "user_id": user_id, + "medium": medium, + "address": address, + }, + desc="user_delete_threepids", + ) + class RegistrationStore(RegistrationWorkerStore, background_updates.BackgroundUpdateStore): @@ -632,39 +665,6 @@ class RegistrationStore(RegistrationWorkerStore, defer.returnValue(res if res else False) - @defer.inlineCallbacks - def user_add_threepid(self, user_id, medium, address, validated_at, added_at): - yield self._simple_upsert("user_threepids", { - "medium": medium, - "address": address, - }, { - "user_id": user_id, - "validated_at": validated_at, - "added_at": added_at, - }) - - @defer.inlineCallbacks - def user_get_threepids(self, user_id): - ret = yield self._simple_select_list( - "user_threepids", { - "user_id": user_id - }, - ['medium', 'address', 'validated_at', 'added_at'], - 'user_get_threepids' - ) - defer.returnValue(ret) - - def user_delete_threepid(self, user_id, medium, address): - return self._simple_delete( - "user_threepids", - keyvalues={ - "user_id": user_id, - "medium": medium, - "address": address, - }, - desc="user_delete_threepids", - ) - @defer.inlineCallbacks def save_or_get_3pid_guest_access_token( self, medium, address, access_token, inviter_user_id -- cgit 1.5.1 From f2a753ea3833a0f9ab8f1d6ca5e7d0282adb109d Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 27 Feb 2019 13:03:14 -0800 Subject: Move from TravisCI to BuildKite (#4752) --- .buildkite/.env | 13 +++ .buildkite/docker-compose.py27.pg94.yaml | 21 +++++ .buildkite/docker-compose.py27.pg95.yaml | 21 +++++ .buildkite/docker-compose.py35.pg94.yaml | 21 +++++ .buildkite/docker-compose.py35.pg95.yaml | 21 +++++ .buildkite/docker-compose.py37.pg11.yaml | 21 +++++ .buildkite/docker-compose.py37.pg95.yaml | 21 +++++ .buildkite/pipeline.yml | 149 +++++++++++++++++++++++++++++++ .travis.yml | 97 -------------------- MANIFEST.in | 1 + changelog.d/4752.misc | 1 + synapse/server.pyi | 2 +- tests/utils.py | 44 ++++++--- 13 files changed, 325 insertions(+), 108 deletions(-) create mode 100644 .buildkite/.env create mode 100644 .buildkite/docker-compose.py27.pg94.yaml create mode 100644 .buildkite/docker-compose.py27.pg95.yaml create mode 100644 .buildkite/docker-compose.py35.pg94.yaml create mode 100644 .buildkite/docker-compose.py35.pg95.yaml create mode 100644 .buildkite/docker-compose.py37.pg11.yaml create mode 100644 .buildkite/docker-compose.py37.pg95.yaml create mode 100644 .buildkite/pipeline.yml delete mode 100644 .travis.yml create mode 100644 changelog.d/4752.misc (limited to 'synapse') diff --git a/.buildkite/.env b/.buildkite/.env new file mode 100644 index 0000000000..85b102d07f --- /dev/null +++ b/.buildkite/.env @@ -0,0 +1,13 @@ +CI +BUILDKITE +BUILDKITE_BUILD_NUMBER +BUILDKITE_BRANCH +BUILDKITE_BUILD_NUMBER +BUILDKITE_JOB_ID +BUILDKITE_BUILD_URL +BUILDKITE_PROJECT_SLUG +BUILDKITE_COMMIT +BUILDKITE_PULL_REQUEST +BUILDKITE_TAG +CODECOV_TOKEN +TRIAL_FLAGS diff --git a/.buildkite/docker-compose.py27.pg94.yaml b/.buildkite/docker-compose.py27.pg94.yaml new file mode 100644 index 0000000000..2d4b9eadd9 --- /dev/null +++ b/.buildkite/docker-compose.py27.pg94.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.4 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:2.7 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py27.pg95.yaml b/.buildkite/docker-compose.py27.pg95.yaml new file mode 100644 index 0000000000..c6a41f1da0 --- /dev/null +++ b/.buildkite/docker-compose.py27.pg95.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.5 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:2.7 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py35.pg94.yaml b/.buildkite/docker-compose.py35.pg94.yaml new file mode 100644 index 0000000000..978aedd115 --- /dev/null +++ b/.buildkite/docker-compose.py35.pg94.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.4 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:3.5 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py35.pg95.yaml b/.buildkite/docker-compose.py35.pg95.yaml new file mode 100644 index 0000000000..2f14387fbc --- /dev/null +++ b/.buildkite/docker-compose.py35.pg95.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.5 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:3.5 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py37.pg11.yaml b/.buildkite/docker-compose.py37.pg11.yaml new file mode 100644 index 0000000000..f3eec05ceb --- /dev/null +++ b/.buildkite/docker-compose.py37.pg11.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:11 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:3.7 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/docker-compose.py37.pg95.yaml b/.buildkite/docker-compose.py37.pg95.yaml new file mode 100644 index 0000000000..2a41db8eba --- /dev/null +++ b/.buildkite/docker-compose.py37.pg95.yaml @@ -0,0 +1,21 @@ +version: '3.1' + +services: + + postgres: + image: postgres:9.5 + environment: + POSTGRES_PASSWORD: postgres + + testenv: + image: python:3.7 + depends_on: + - postgres + env_file: .env + environment: + SYNAPSE_POSTGRES_HOST: postgres + SYNAPSE_POSTGRES_USER: postgres + SYNAPSE_POSTGRES_PASSWORD: postgres + working_dir: /app + volumes: + - ..:/app diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 0000000000..24f22c85b4 --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,149 @@ +env: + CODECOV_TOKEN: "2dd7eb9b-0eda-45fe-a47c-9b5ac040045f" + +steps: + - command: + - "python -m pip install tox" + - "tox -e pep8" + label: "\U0001F9F9 PEP-8" + plugins: + - docker#v3.0.1: + image: "python:3.6" + + - command: + - "python -m pip install tox" + - "tox -e packaging" + label: "\U0001F9F9 packaging" + plugins: + - docker#v3.0.1: + image: "python:3.6" + + - command: + - "python -m pip install tox" + - "tox -e check_isort" + label: "\U0001F9F9 isort" + plugins: + - docker#v3.0.1: + image: "python:3.6" + + - command: + - "python -m pip install tox" + - "scripts-dev/check-newsfragment" + label: ":newspaper: Newsfile" + branches: "!master !develop !release-*" + plugins: + - docker#v3.0.1: + image: "python:3.6" + propagate-environment: true + + - wait + + - command: + - "python -m pip install tox" + - "tox -e py27,codecov" + label: ":python: 2.7 / SQLite" + env: + TRIAL_FLAGS: "-j 2" + plugins: + - docker#v3.0.1: + image: "python:2.7" + propagate-environment: true + + - command: + - "python -m pip install tox" + - "tox -e py35,codecov" + label: ":python: 3.5 / SQLite" + env: + TRIAL_FLAGS: "-j 2" + plugins: + - docker#v3.0.1: + image: "python:3.5" + propagate-environment: true + + - command: + - "python -m pip install tox" + - "tox -e py36,codecov" + label: ":python: 3.6 / SQLite" + env: + TRIAL_FLAGS: "-j 2" + plugins: + - docker#v3.0.1: + image: "python:3.6" + propagate-environment: true + + - command: + - "python -m pip install tox" + - "tox -e py37,codecov" + label: ":python: 3.7 / SQLite" + env: + TRIAL_FLAGS: "-j 2" + plugins: + - docker#v3.0.1: + image: "python:3.7" + propagate-environment: true + + - label: ":python: 2.7 / :postgres: 9.4" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py27-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py27.pg94.yaml + + - label: ":python: 2.7 / :postgres: 9.5" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py27-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py27.pg95.yaml + + - label: ":python: 3.5 / :postgres: 9.4" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py35-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py35.pg94.yaml + + - label: ":python: 3.5 / :postgres: 9.5" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py35-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py35.pg95.yaml + + - label: ":python: 3.7 / :postgres: 9.5" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py37-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py37.pg95.yaml + + - label: ":python: 3.7 / :postgres: 11" + env: + TRIAL_FLAGS: "-j 4" + command: + - "bash -c 'python -m pip install tox && python -m tox -e py37-postgres,codecov'" + plugins: + - docker-compose#v2.1.0: + run: testenv + config: + - .buildkite/docker-compose.py37.pg11.yaml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0d0fa7082a..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,97 +0,0 @@ -dist: xenial -language: python - -cache: - directories: - # we only bother to cache the wheels; parts of the http cache get - # invalidated every build (because they get served with a max-age of 600 - # seconds), which means that we end up re-uploading the whole cache for - # every build, which is time-consuming In any case, it's not obvious that - # downloading the cache from S3 would be much faster than downloading the - # originals from pypi. - # - - $HOME/.cache/pip/wheels - -# don't clone the whole repo history, one commit will do -git: - depth: 1 - -# only build branches we care about (PRs are built seperately) -branches: - only: - - master - - develop - - /^release-v/ - - rav/pg95 - -# When running the tox environments that call Twisted Trial, we can pass the -j -# flag to run the tests concurrently. We set this to 2 for CPU bound tests -# (SQLite) and 4 for I/O bound tests (PostgreSQL). -matrix: - fast_finish: true - include: - - name: "pep8" - python: 3.6 - env: TOX_ENV="pep8,check_isort,packaging" - - - name: "py2.7 / sqlite" - python: 2.7 - env: TOX_ENV=py27,codecov TRIAL_FLAGS="-j 2" - - - name: "py2.7 / sqlite / olddeps" - python: 2.7 - env: TOX_ENV=py27-old TRIAL_FLAGS="-j 2" - - - name: "py2.7 / postgres9.5" - python: 2.7 - addons: - postgresql: "9.5" - env: TOX_ENV=py27-postgres,codecov TRIAL_FLAGS="-j 4" - services: - - postgresql - - - name: "py3.5 / sqlite" - python: 3.5 - env: TOX_ENV=py35,codecov TRIAL_FLAGS="-j 2" - - - name: "py3.7 / sqlite" - python: 3.7 - env: TOX_ENV=py37,codecov TRIAL_FLAGS="-j 2" - - - name: "py3.7 / postgres9.4" - python: 3.7 - addons: - postgresql: "9.4" - env: TOX_ENV=py37-postgres TRIAL_FLAGS="-j 4" - services: - - postgresql - - - name: "py3.7 / postgres9.5" - python: 3.7 - addons: - postgresql: "9.5" - env: TOX_ENV=py37-postgres,codecov TRIAL_FLAGS="-j 4" - services: - - postgresql - - - # we only need to check for the newsfragment if it's a PR build - if: type = pull_request - name: "check-newsfragment" - python: 3.6 - script: scripts-dev/check-newsfragment - -install: - # this just logs the postgres version we will be testing against (if any) - - psql -At -U postgres -c 'select version();' || true - - - pip install tox - - # if we don't have python3.6 in this environment, travis unhelpfully gives us - # a `python3.6` on our path which does nothing but spit out a warning. Tox - # tries to run it (even if we're not running a py36 env), so the build logs - # then have warnings which look like errors. To reduce the noise, remove the - # non-functional python3.6. - - ( ! command -v python3.6 || python3.6 --version ) &>/dev/null || rm -f $(command -v python3.6) - -script: - - tox -e $TOX_ENV diff --git a/MANIFEST.in b/MANIFEST.in index eb2de60f72..0500dd6b87 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -39,6 +39,7 @@ prune .circleci prune .coveragerc prune debian prune .codecov.yml +prune .buildkite exclude jenkins* recursive-exclude jenkins *.sh diff --git a/changelog.d/4752.misc b/changelog.d/4752.misc new file mode 100644 index 0000000000..fb1e76edce --- /dev/null +++ b/changelog.d/4752.misc @@ -0,0 +1 @@ +Change from TravisCI to Buildkite for CI. diff --git a/synapse/server.pyi b/synapse/server.pyi index 06cd083a74..fb8df56cd5 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -7,9 +7,9 @@ import synapse.handlers.auth import synapse.handlers.deactivate_account import synapse.handlers.device import synapse.handlers.e2e_keys +import synapse.handlers.message import synapse.handlers.room import synapse.handlers.room_member -import synapse.handlers.message import synapse.handlers.set_password import synapse.rest.media.v1.media_repository import synapse.server_notices.server_notices_manager diff --git a/tests/utils.py b/tests/utils.py index e8ab312528..cf49833a43 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -45,7 +45,9 @@ from synapse.util.ratelimitutils import FederationRateLimiter # set this to True to run the tests against postgres instead of sqlite. USE_POSTGRES_FOR_TESTS = os.environ.get("SYNAPSE_POSTGRES", False) LEAVE_DB = os.environ.get("SYNAPSE_LEAVE_DB", False) -POSTGRES_USER = os.environ.get("SYNAPSE_POSTGRES_USER", "postgres") +POSTGRES_USER = os.environ.get("SYNAPSE_POSTGRES_USER", None) +POSTGRES_HOST = os.environ.get("SYNAPSE_POSTGRES_HOST", None) +POSTGRES_PASSWORD = os.environ.get("SYNAPSE_POSTGRES_PASSWORD", None) POSTGRES_BASE_DB = "_synapse_unit_tests_base_%s" % (os.getpid(),) @@ -58,6 +60,8 @@ def setupdb(): "args": { "database": POSTGRES_BASE_DB, "user": POSTGRES_USER, + "host": POSTGRES_HOST, + "password": POSTGRES_PASSWORD, "cp_min": 1, "cp_max": 5, }, @@ -66,7 +70,9 @@ def setupdb(): config.password_providers = [] config.database_config = pgconfig db_engine = create_engine(pgconfig) - db_conn = db_engine.module.connect(user=POSTGRES_USER) + db_conn = db_engine.module.connect( + user=POSTGRES_USER, host=POSTGRES_HOST, password=POSTGRES_PASSWORD + ) db_conn.autocommit = True cur = db_conn.cursor() cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,)) @@ -76,7 +82,10 @@ def setupdb(): # Set up in the db db_conn = db_engine.module.connect( - database=POSTGRES_BASE_DB, user=POSTGRES_USER + database=POSTGRES_BASE_DB, + user=POSTGRES_USER, + host=POSTGRES_HOST, + password=POSTGRES_PASSWORD, ) cur = db_conn.cursor() _get_or_create_schema_state(cur, db_engine) @@ -86,7 +95,9 @@ def setupdb(): db_conn.close() def _cleanup(): - db_conn = db_engine.module.connect(user=POSTGRES_USER) + db_conn = db_engine.module.connect( + user=POSTGRES_USER, host=POSTGRES_HOST, password=POSTGRES_PASSWORD + ) db_conn.autocommit = True cur = db_conn.cursor() cur.execute("DROP DATABASE IF EXISTS %s;" % (POSTGRES_BASE_DB,)) @@ -206,7 +217,14 @@ def setup_test_homeserver( config.database_config = { "name": "psycopg2", - "args": {"database": test_db, "cp_min": 1, "cp_max": 5}, + "args": { + "database": test_db, + "host": POSTGRES_HOST, + "password": POSTGRES_PASSWORD, + "user": POSTGRES_USER, + "cp_min": 1, + "cp_max": 5, + }, } else: config.database_config = { @@ -220,7 +238,10 @@ def setup_test_homeserver( # the template database we generate in setupdb() if datastore is None and isinstance(db_engine, PostgresEngine): db_conn = db_engine.module.connect( - database=POSTGRES_BASE_DB, user=POSTGRES_USER + database=POSTGRES_BASE_DB, + user=POSTGRES_USER, + host=POSTGRES_HOST, + password=POSTGRES_PASSWORD, ) db_conn.autocommit = True cur = db_conn.cursor() @@ -270,7 +291,10 @@ def setup_test_homeserver( # Drop the test database db_conn = db_engine.module.connect( - database=POSTGRES_BASE_DB, user=POSTGRES_USER + database=POSTGRES_BASE_DB, + user=POSTGRES_USER, + host=POSTGRES_HOST, + password=POSTGRES_PASSWORD, ) db_conn.autocommit = True cur = db_conn.cursor() @@ -492,7 +516,7 @@ class MockClock(object): return t def looping_call(self, function, interval): - self.loopers.append([function, interval / 1000., self.now]) + self.loopers.append([function, interval / 1000.0, self.now]) def cancel_call_later(self, timer, ignore_errs=False): if timer[2]: @@ -528,7 +552,7 @@ class MockClock(object): looped[2] = self.now def advance_time_msec(self, ms): - self.advance_time(ms / 1000.) + self.advance_time(ms / 1000.0) def time_bound_deferred(self, d, *args, **kwargs): # We don't bother timing things out for now. @@ -637,7 +661,7 @@ def create_room(hs, room_id, creator_id): "sender": creator_id, "room_id": room_id, "content": {}, - } + }, ) event, context = yield event_creation_handler.create_new_client_event(builder) -- cgit 1.5.1 From 68f47d6744ac2b4c6ac8b59b8c52a537a5072b4c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 27 Feb 2019 22:29:10 +0000 Subject: Fix parsing of Content-Disposition headers (#4763) * Fix parsing of Content-Disposition headers TIL: filenames in content-dispostion headers can contain semicolons, and aren't %-encoded. * fix python2 incompatibility * Fix docstrings --- changelog.d/4763.bugfix | 1 + synapse/rest/media/v1/_base.py | 85 ++++++++++++++++++++++++++++++---------- tests/rest/media/v1/test_base.py | 45 +++++++++++++++++++++ 3 files changed, 111 insertions(+), 20 deletions(-) create mode 100644 changelog.d/4763.bugfix create mode 100644 tests/rest/media/v1/test_base.py (limited to 'synapse') diff --git a/changelog.d/4763.bugfix b/changelog.d/4763.bugfix new file mode 100644 index 0000000000..213ea44b70 --- /dev/null +++ b/changelog.d/4763.bugfix @@ -0,0 +1 @@ +Fix parsing of Content-Disposition headers on remote media requests and URL previews. diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index d16a30acd8..fece1ef0b8 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -213,8 +214,7 @@ def get_filename_from_headers(headers): Content-Disposition HTTP header. Args: - headers (twisted.web.http_headers.Headers): The HTTP - request headers. + headers (dict[bytes, list[bytes]]): The HTTP request headers. Returns: A Unicode string of the filename, or None. @@ -225,23 +225,12 @@ def get_filename_from_headers(headers): if not content_disposition[0]: return - # dict of unicode: bytes, corresponding to the key value sections of the - # Content-Disposition header. - params = {} - parts = content_disposition[0].split(b";") - for i in parts: - # Split into key-value pairs, if able - # We don't care about things like `inline`, so throw it out - if b"=" not in i: - continue - - key, value = i.strip().split(b"=") - params[key.decode('ascii')] = value + _, params = _parse_header(content_disposition[0]) upload_name = None # First check if there is a valid UTF-8 filename - upload_name_utf8 = params.get("filename*", None) + upload_name_utf8 = params.get(b"filename*", None) if upload_name_utf8: if upload_name_utf8.lower().startswith(b"utf-8''"): upload_name_utf8 = upload_name_utf8[7:] @@ -267,12 +256,68 @@ def get_filename_from_headers(headers): # If there isn't check for an ascii name. if not upload_name: - upload_name_ascii = params.get("filename", None) + upload_name_ascii = params.get(b"filename", None) if upload_name_ascii and is_ascii(upload_name_ascii): - # Make sure there's no %-quoted bytes. If there is, reject it as - # non-valid ASCII. - if b"%" not in upload_name_ascii: - upload_name = upload_name_ascii.decode('ascii') + upload_name = upload_name_ascii.decode('ascii') # This may be None here, indicating we did not find a matching name. return upload_name + + +def _parse_header(line): + """Parse a Content-type like header. + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + line (bytes): header to be parsed + + Returns: + Tuple[bytes, dict[bytes, bytes]]: + the main content-type, followed by the parameter dictionary + """ + parts = _parseparam(b';' + line) + key = next(parts) + pdict = {} + for p in parts: + i = p.find(b'=') + if i >= 0: + name = p[:i].strip().lower() + value = p[i + 1:].strip() + + # strip double-quotes + if len(value) >= 2 and value[0:1] == value[-1:] == b'"': + value = value[1:-1] + value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"') + pdict[name] = value + + return key, pdict + + +def _parseparam(s): + """Generator which splits the input on ;, respecting double-quoted sequences + + Cargo-culted from `cgi`, but works on bytes rather than strings. + + Args: + s (bytes): header to be parsed + + Returns: + Iterable[bytes]: the split input + """ + while s[:1] == b';': + s = s[1:] + + # look for the next ; + end = s.find(b';') + + # if there is an odd number of " marks between here and the next ;, skip to the + # next ; instead + while end > 0 and (s.count(b'"', 0, end) - s.count(b'\\"', 0, end)) % 2: + end = s.find(b';', end + 1) + + if end < 0: + end = len(s) + f = s[:end] + yield f.strip() + s = s[end:] diff --git a/tests/rest/media/v1/test_base.py b/tests/rest/media/v1/test_base.py new file mode 100644 index 0000000000..af8f74eb42 --- /dev/null +++ b/tests/rest/media/v1/test_base.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.rest.media.v1._base import get_filename_from_headers + +from tests import unittest + + +class GetFileNameFromHeadersTests(unittest.TestCase): + # input -> expected result + TEST_CASES = { + b"inline; filename=abc.txt": u"abc.txt", + b'inline; filename="azerty"': u"azerty", + b'inline; filename="aze%20rty"': u"aze%20rty", + b'inline; filename="aze\"rty"': u'aze"rty', + b'inline; filename="azer;ty"': u"azer;ty", + + b"inline; filename*=utf-8''foo%C2%A3bar": u"foo£bar", + } + + def tests(self): + for hdr, expected in self.TEST_CASES.items(): + res = get_filename_from_headers( + { + b'Content-Disposition': [hdr], + }, + ) + self.assertEqual( + res, expected, + "expected output for %s to be %s but was %s" % ( + hdr, expected, res, + ) + ) -- cgit 1.5.1 From b131cc77dfd53c2f66cb6e2399a6b10024cf24cd Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 27 Feb 2019 14:35:47 -0800 Subject: Make 'event_id' a required parameter in federated state requests (#4741) * make 'event_id' a required parameter in federated state requests As per the spec: https://matrix.org/docs/spec/server_server/r0.1.1.html#id40 Signed-off-by: Joseph Weston * add changelog entry for bugfix Signed-off-by: Joseph Weston * Update server.py --- changelog.d/4740.bugfix | 1 + synapse/federation/transport/server.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/4740.bugfix (limited to 'synapse') diff --git a/changelog.d/4740.bugfix b/changelog.d/4740.bugfix new file mode 100644 index 0000000000..f82bb4227a --- /dev/null +++ b/changelog.d/4740.bugfix @@ -0,0 +1 @@ +'event_id' is now a required parameter in federated state requests, as per the matrix spec. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 5ba94be2ec..ebb81be377 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -393,7 +393,7 @@ class FederationStateServlet(BaseFederationServlet): return self.handler.on_context_state_request( origin, context, - parse_string_from_args(query, "event_id", None), + parse_string_from_args(query, "event_id", None, required=True), ) @@ -404,7 +404,7 @@ class FederationStateIdsServlet(BaseFederationServlet): return self.handler.on_state_ids_request( origin, room_id, - parse_string_from_args(query, "event_id", None), + parse_string_from_args(query, "event_id", None, required=True), ) -- cgit 1.5.1 From ac61b45a75304e90c05a5dba153900cfc0adb206 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 28 Feb 2019 16:24:01 +0000 Subject: Minor docstring fixes for MatrixFederationAgent (#4765) --- changelog.d/4765.misc | 1 + synapse/http/federation/matrix_federation_agent.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4765.misc (limited to 'synapse') diff --git a/changelog.d/4765.misc b/changelog.d/4765.misc new file mode 100644 index 0000000000..c273fd0cc4 --- /dev/null +++ b/changelog.d/4765.misc @@ -0,0 +1 @@ +Minor docstring fixes for MatrixFederationAgent. \ No newline at end of file diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 384d8a37a2..1334c630cc 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -68,9 +68,13 @@ class MatrixFederationAgent(object): TLS policy to use for fetching .well-known files. None to use a default (browser-like) implementation. - srv_resolver (SrvResolver|None): + _srv_resolver (SrvResolver|None): SRVResolver impl to use for looking up SRV records. None to use a default implementation. + + _well_known_cache (TTLCache|None): + TTLCache impl for storing cached well-known lookups. None to use a default + implementation. """ def __init__( -- cgit 1.5.1 From 1e8388b311c54d754d6afbe639ed2825c1c1f285 Mon Sep 17 00:00:00 2001 From: Joseph Weston Date: Fri, 1 Mar 2019 04:05:47 +0100 Subject: Add 'server_version' endpoint to admin API This is required because the 'Server' HTTP header is not always passed through proxies. --- synapse/rest/client/v1/admin.py | 23 +++++++++++++++++++++++ tests/rest/client/v1/test_admin.py | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 82433a2aa9..0201cf1186 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -17,12 +17,14 @@ import hashlib import hmac import logging +import platform from six import text_type from six.moves import http_client from twisted.internet import defer +import synapse from synapse.api.constants import Membership, UserTypes from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.servlet import ( @@ -32,6 +34,7 @@ from synapse.http.servlet import ( parse_string, ) from synapse.types import UserID, create_requester +from synapse.util.versionstring import get_version_string from .base import ClientV1RestServlet, client_path_patterns @@ -66,6 +69,25 @@ class UsersRestServlet(ClientV1RestServlet): defer.returnValue((200, ret)) +class VersionServlet(ClientV1RestServlet): + PATTERNS = client_path_patterns("/admin/server_version") + + @defer.inlineCallbacks + def on_GET(self, request): + requester = yield self.auth.get_user_by_req(request) + is_admin = yield self.auth.is_server_admin(requester.user) + + if not is_admin: + raise AuthError(403, "You are not a server admin") + + ret = { + 'server_version': get_version_string(synapse), + 'python_version': platform.python_version(), + } + + defer.returnValue((200, ret)) + + class UserRegisterServlet(ClientV1RestServlet): """ Attributes: @@ -763,3 +785,4 @@ def register_servlets(hs, http_server): QuarantineMediaInRoom(hs).register(http_server) ListMediaInRoom(hs).register(http_server) UserRegisterServlet(hs).register(http_server) + VersionServlet(hs).register(http_server) diff --git a/tests/rest/client/v1/test_admin.py b/tests/rest/client/v1/test_admin.py index c926836206..ea03b7e523 100644 --- a/tests/rest/client/v1/test_admin.py +++ b/tests/rest/client/v1/test_admin.py @@ -20,11 +20,45 @@ import json from mock import Mock from synapse.api.constants import UserTypes -from synapse.rest.client.v1 import admin +from synapse.rest.client.v1 import admin, login from tests import unittest +class VersionTestCase(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + login.register_servlets, + ] + + url = '/_matrix/client/r0/admin/server_version' + + def test_version_string(self): + self.register_user("admin", "pass", admin=True) + self.admin_token = self.login("admin", "pass") + + request, channel = self.make_request("GET", self.url, + access_token=self.admin_token) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), + msg=channel.result["body"]) + self.assertEqual({'server_version', 'python_version'}, + set(channel.json_body.keys())) + + def test_inaccessible_to_non_admins(self): + self.register_user("unprivileged-user", "pass", admin=False) + user_token = self.login("unprivileged-user", "pass") + + request, channel = self.make_request("GET", self.url, + access_token=user_token) + self.render(request) + + self.assertEqual(403, int(channel.result['code']), + msg=channel.result['body']) + + class UserRegisterTestCase(unittest.HomeserverTestCase): servlets = [admin.register_servlets] -- cgit 1.5.1 From 30649529398a2a57c52b6878a5753a5bf650cf25 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 1 Mar 2019 16:47:12 +0000 Subject: Fix incorrect log about not persisting duplicate state event. (#4776) We were logging this when it was not true. --- changelog.d/4776.bugfix | 1 + synapse/handlers/message.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 changelog.d/4776.bugfix (limited to 'synapse') diff --git a/changelog.d/4776.bugfix b/changelog.d/4776.bugfix new file mode 100644 index 0000000000..ce3e6ce33c --- /dev/null +++ b/changelog.d/4776.bugfix @@ -0,0 +1 @@ +Fix incorrect log about not persisting duplicate state event. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3981fe69ce..c762b58902 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -436,10 +436,11 @@ class EventCreationHandler(object): if event.is_state(): prev_state = yield self.deduplicate_state_event(event, context) - logger.info( - "Not bothering to persist duplicate state event %s", event.event_id, - ) if prev_state is not None: + logger.info( + "Not bothering to persist state event %s duplicated by %s", + event.event_id, prev_state.event_id, + ) defer.returnValue(prev_state) yield self.handle_new_client_event( -- cgit 1.5.1 From 4dfbae18fe10c22a54421f211ad4a46a11777c16 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Fri, 1 Mar 2019 15:02:02 -0700 Subject: Use static locations for Riot icons See https://github.com/vector-im/riot-web/issues/9009 --- synapse/res/templates/notif.html | 6 +++--- synapse/res/templates/notif_mail.html | 2 +- synapse/res/templates/room.html | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/res/templates/notif.html b/synapse/res/templates/notif.html index 88b921ca9c..1a6c70b562 100644 --- a/synapse/res/templates/notif.html +++ b/synapse/res/templates/notif.html @@ -6,11 +6,11 @@ {% else %} {% if message.sender_hash % 3 == 0 %} - + {% elif message.sender_hash % 3 == 1 %} - + {% else %} - + {% endif %} {% endif %} {% endif %} diff --git a/synapse/res/templates/notif_mail.html b/synapse/res/templates/notif_mail.html index fcdb3109fe..019506e5fb 100644 --- a/synapse/res/templates/notif_mail.html +++ b/synapse/res/templates/notif_mail.html @@ -19,7 +19,7 @@ {% if app_name == "Riot" %} - [Riot] + [Riot] {% elif app_name == "Vector" %} [Vector] {% else %} diff --git a/synapse/res/templates/room.html b/synapse/res/templates/room.html index 723c222d25..b8525fef88 100644 --- a/synapse/res/templates/room.html +++ b/synapse/res/templates/room.html @@ -5,11 +5,11 @@ {% else %} {% if room.hash % 3 == 0 %} - + {% elif room.hash % 3 == 1 %} - + {% else %} - + {% endif %} {% endif %} -- cgit 1.5.1 From 8b63fe4c261a4451024dba7506b5872686c22282 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 11:56:03 +0000 Subject: s/get_forward_events/get_successor_events/ --- synapse/handlers/federation.py | 2 +- synapse/storage/event_federation.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0425380e55..32d7ba6cf5 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -881,7 +881,7 @@ class FederationHandler(BaseHandler): # join of the server where we are allowed to see the join event but # not anything before it. - forward_events = yield self.store.get_forward_events( + forward_events = yield self.store.get_successor_events( list(extremities), ) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 830b171caa..a8d90456e3 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -443,7 +443,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, return event_results @defer.inlineCallbacks - def get_forward_events(self, event_ids): + def get_successor_events(self, event_ids): """Fetch all events that have the given events as a prev event Args: @@ -457,7 +457,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, column="prev_event_id", iterable=event_ids, retcols=("event_id",), - desc="get_forward_events" + desc="get_successor_events" ) defer.returnValue([ -- cgit 1.5.1 From 856c83f5f85d2890ab7de26578464328241ec3ba Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 4 Mar 2019 12:57:44 +0000 Subject: Avoid rebuilding Edu objects in worker mode (#4770) In worker mode, on the federation sender, when we receive an edu for sending over the replication socket, it is parsed into an Edu object. There is no point extracting the contents of it so that we can then immediately build another Edu. --- changelog.d/4770.misc | 1 + synapse/federation/send_queue.py | 14 +++++++------- synapse/federation/transaction_queue.py | 31 ++++++++++++++++++++++++------- synapse/handlers/presence.py | 6 +++--- synapse/handlers/receipts.py | 2 +- synapse/handlers/typing.py | 2 +- 6 files changed, 37 insertions(+), 19 deletions(-) create mode 100644 changelog.d/4770.misc (limited to 'synapse') diff --git a/changelog.d/4770.misc b/changelog.d/4770.misc new file mode 100644 index 0000000000..144d819958 --- /dev/null +++ b/changelog.d/4770.misc @@ -0,0 +1 @@ +Optimise EDU transmission for the federation_sender worker. diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 6f5995735a..b7d0b25781 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -159,8 +159,12 @@ class FederationRemoteSendQueue(object): # stream. pass - def send_edu(self, destination, edu_type, content, key=None): + def build_and_send_edu(self, destination, edu_type, content, key=None): """As per TransactionQueue""" + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + pos = self._next_pos() edu = Edu( @@ -465,15 +469,11 @@ def process_rows_for_federation(transaction_queue, rows): for destination, edu_map in iteritems(buff.keyed_edus): for key, edu in edu_map.items(): - transaction_queue.send_edu( - edu.destination, edu.edu_type, edu.content, key=key, - ) + transaction_queue.send_edu(edu, key) for destination, edu_list in iteritems(buff.edus): for edu in edu_list: - transaction_queue.send_edu( - edu.destination, edu.edu_type, edu.content, key=None, - ) + transaction_queue.send_edu(edu, None) for destination in buff.device_destinations: transaction_queue.send_device_messages(destination) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 30941f5ad6..e5e42c647d 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -361,7 +361,19 @@ class TransactionQueue(object): self._attempt_new_transaction(destination) - def send_edu(self, destination, edu_type, content, key=None): + def build_and_send_edu(self, destination, edu_type, content, key=None): + """Construct an Edu object, and queue it for sending + + Args: + destination (str): name of server to send to + edu_type (str): type of EDU to send + content (dict): content of EDU + key (Any|None): clobbering key for this edu + """ + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + edu = Edu( origin=self.server_name, destination=destination, @@ -369,18 +381,23 @@ class TransactionQueue(object): content=content, ) - if destination == self.server_name: - logger.info("Not sending EDU to ourselves") - return + self.send_edu(edu, key) + + def send_edu(self, edu, key): + """Queue an EDU for sending + Args: + edu (Edu): edu to send + key (Any|None): clobbering key for this edu + """ if key: self.pending_edus_keyed_by_dest.setdefault( - destination, {} + edu.destination, {} )[(edu.edu_type, key)] = edu else: - self.pending_edus_by_dest.setdefault(destination, []).append(edu) + self.pending_edus_by_dest.setdefault(edu.destination, []).append(edu) - self._attempt_new_transaction(destination) + self._attempt_new_transaction(edu.destination) def send_device_messages(self, destination): if destination == self.server_name: diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index ba3856674d..37e87fc054 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -816,7 +816,7 @@ class PresenceHandler(object): if self.is_mine(observed_user): yield self.invite_presence(observed_user, observer_user) else: - yield self.federation.send_edu( + yield self.federation.build_and_send_edu( destination=observed_user.domain, edu_type="m.presence_invite", content={ @@ -836,7 +836,7 @@ class PresenceHandler(object): if self.is_mine(observer_user): yield self.accept_presence(observed_user, observer_user) else: - self.federation.send_edu( + self.federation.build_and_send_edu( destination=observer_user.domain, edu_type="m.presence_accept", content={ @@ -848,7 +848,7 @@ class PresenceHandler(object): state_dict = yield self.get_state(observed_user, as_event=False) state_dict = format_user_presence_state(state_dict, self.clock.time_msec()) - self.federation.send_edu( + self.federation.build_and_send_edu( destination=observer_user.domain, edu_type="m.presence", content={ diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 696469732c..8b2d03a756 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -148,7 +148,7 @@ class ReceiptsHandler(BaseHandler): logger.debug("Sending receipt to: %r", remotedomains) for domain in remotedomains: - self.federation.send_edu( + self.federation.build_and_send_edu( destination=domain, edu_type="m.receipt", content={ diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index a61bbf9392..39df960c31 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -231,7 +231,7 @@ class TypingHandler(object): for domain in set(get_domain_from_id(u) for u in users): if domain != self.server_name: logger.debug("sending typing update to %s", domain) - self.federation.send_edu( + self.federation.build_and_send_edu( destination=domain, edu_type="m.typing", content={ -- cgit 1.5.1 From 5f0c449dd50fa84ff741e09f34cad5330c6e4745 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 Mar 2019 13:56:49 +0000 Subject: Prevent replication wedging --- synapse/replication/tcp/protocol.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 49ae5b3355..a6df04d851 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -451,7 +451,7 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): @defer.inlineCallbacks def subscribe_to_stream(self, stream_name, token): - """Subscribe the remote to a streams. + """Subscribe the remote to a stream. This invloves checking if they've missed anything and sending those updates down if they have. During that time new updates for the stream @@ -478,10 +478,30 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): # Now we can send any updates that came in while we were subscribing pending_rdata = self.pending_rdata.pop(stream_name, []) + batch_updates = [] for token, update in pending_rdata: - # Only send updates newer than the current token - if token > current_token: - self.send_command(RdataCommand(stream_name, token, update)) + # If the token is null, it is part of a batch update. Batches + # are multiple updates that share a single token. To denote + # this, the token is set to None for all tokens in the batch + # except for the last. If we find a None token, we keep looking + # through tokens until we find one that is not None and then + # process all previous updates in the batch as if they had the + # final token. + if not token or len(batch_updates) > 0: + batch_updates.append(update) + if token and not token > current_token: + # This batch is older than current_token, dismiss + batch_updates = [] + continue + if token: + # Send all updates that are part of this batch with the + # found token + for update in batch_updates: + self.send_command(RdataCommand(stream_name, token, update)) + else: + # Only send updates newer than the current token + if token > current_token: + self.send_command(RdataCommand(stream_name, token, update)) # They're now fully subscribed self.replication_streams.add(stream_name) -- cgit 1.5.1 From 9f7cdf3da16e4e6c29229dcc80d9cf060cd64584 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 Mar 2019 14:36:52 +0000 Subject: Clearer branching, fix missing list clear --- synapse/replication/tcp/protocol.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index a6df04d851..53615b7ee3 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -488,16 +488,23 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): # process all previous updates in the batch as if they had the # final token. if not token or len(batch_updates) > 0: - batch_updates.append(update) - if token and not token > current_token: + if token is None: + # Store this update as part of the batch + batch_updates.append(update) + elif current_token <= current_token: # This batch is older than current_token, dismiss batch_updates = [] - continue - if token: + else: + # Append final update of this batch before sending + batch_updates.append(update) + # Send all updates that are part of this batch with the # found token for update in batch_updates: self.send_command(RdataCommand(stream_name, token, update)) + + # Clear saved batch updates + batch_updates = [] else: # Only send updates newer than the current token if token > current_token: -- cgit 1.5.1 From d1523aed6bebf00a4643d72eea611b029db65f08 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 14:34:34 +0000 Subject: Only check history visibility when filtering When filtering events to send to server we check more than just history visibility. However when deciding whether to backfill or not we only care about the history visibility. --- synapse/handlers/federation.py | 4 ++- synapse/visibility.py | 77 +++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 35 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 32d7ba6cf5..bf2989aefd 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -891,9 +891,11 @@ class FederationHandler(BaseHandler): get_prev_content=False, ) + # We set `check_history_visibility_only` as we might otherwise get false + # positives from users having been erased. filtered_extremities = yield filter_events_for_server( self.store, self.server_name, list(extremities_events.values()), - redact=False, + redact=False, check_history_visibility_only=True, ) if not filtered_extremities: diff --git a/synapse/visibility.py b/synapse/visibility.py index f6dcc96630..8b9c7180b6 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -216,7 +216,8 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, @defer.inlineCallbacks -def filter_events_for_server(store, server_name, events, redact=True): +def filter_events_for_server(store, server_name, events, redact=True, + check_history_visibility_only=False): """Filter a list of events based on whether given server is allowed to see them. @@ -226,34 +227,25 @@ def filter_events_for_server(store, server_name, events, redact=True): events (iterable[FrozenEvent]) redact (bool): Whether to return a redacted version of the event, or to filter them out entirely. + check_history_visibility_only (bool): Whether to only check the + history visibility, rather than things like if the sender has been + erased. This is used e.g. during pagination to decide whether to + backfill or not. Returns Deferred[list[FrozenEvent]] """ - # Whatever else we do, we need to check for senders which have requested - # erasure of their data. - erased_senders = yield store.are_users_erased( - (e.sender for e in events), - ) - def redact_disallowed(event, state): - # if the sender has been gdpr17ed, always return a redacted - # copy of the event. + def is_sender_erased(event, erased_senders): if erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, ) - if redact: - return prune_event(event) - else: - return None - - # state will be None if we decided we didn't need to filter by - # room membership. - if not state: - return event + return True + return False + def check_event_is_visible(event, state): history = state.get((EventTypes.RoomHistoryVisibility, ''), None) if history: visibility = history.content.get("history_visibility", "shared") @@ -275,18 +267,15 @@ def filter_events_for_server(store, server_name, events, redact=True): memtype = ev.membership if memtype == Membership.JOIN: - return event + return True elif memtype == Membership.INVITE: if visibility == "invited": - return event + return True else: # server has no users in the room: redact - if redact: - return prune_event(event) - else: - return None + return False - return event + return True # Next lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't @@ -315,16 +304,31 @@ def filter_events_for_server(store, server_name, events, redact=True): for e in itervalues(event_map) ) + if not check_history_visibility_only: + erased_senders = yield store.are_users_erased( + (e.sender for e in events), + ) + else: + # We don't want to check whether users are erased, which is equivalent + # to no users having been erased. + erased_senders = {} + if all_open: # all the history_visibility state affecting these events is open, so # we don't need to filter by membership state. We *do* need to check # for user erasure, though. if erased_senders: - events = [ - redact_disallowed(e, None) - for e in events - ] + to_return = [] + for e in events: + if not is_sender_erased(e, erased_senders): + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) + # If there are no erased users then we can just return the given list + # of events without having to copy it. defer.returnValue(events) # Ok, so we're dealing with events that have non-trivial visibility @@ -380,8 +384,13 @@ def filter_events_for_server(store, server_name, events, redact=True): for e_id, key_to_eid in iteritems(event_to_state_ids) } - to_return = ( - redact_disallowed(e, event_to_state[e.event_id]) - for e in events - ) - defer.returnValue([e for e in to_return if e is not None]) + to_return = [] + for e in events: + erased = is_sender_erased(e, erased_senders) + visible = check_event_is_visible(e, event_to_state[e.event_id]) + if visible and not erased: + to_return.append(e) + elif redact: + to_return.append(prune_event(e)) + + defer.returnValue(to_return) -- cgit 1.5.1 From fe7bd23a85988c5251fe17e78589b69f92f21dd7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 Mar 2019 15:08:15 +0000 Subject: Clean up logic and add comments --- synapse/replication/tcp/protocol.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'synapse') diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 53615b7ee3..dac4fbeef7 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -487,15 +487,19 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): # through tokens until we find one that is not None and then # process all previous updates in the batch as if they had the # final token. - if not token or len(batch_updates) > 0: - if token is None: - # Store this update as part of the batch - batch_updates.append(update) - elif current_token <= current_token: - # This batch is older than current_token, dismiss + if token is None: + # Store this update as part of a batch + batch_updates.append(update) + continue + + if len(batch_updates) > 0: + # There is an ongoing batch and this is the end + if current_token <= current_token: + # This batch is older than current_token, dismiss it batch_updates = [] else: - # Append final update of this batch before sending + # This is the end of the batch. Append final update of + # this batch before sending batch_updates.append(update) # Send all updates that are part of this batch with the @@ -505,10 +509,13 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): # Clear saved batch updates batch_updates = [] - else: - # Only send updates newer than the current token - if token > current_token: - self.send_command(RdataCommand(stream_name, token, update)) + continue + + # This is an update that's not part of a batch. + # + # Only send updates newer than the current token + if token > current_token: + self.send_command(RdataCommand(stream_name, token, update)) # They're now fully subscribed self.replication_streams.add(stream_name) -- cgit 1.5.1 From 336de1d45b7ac4ea2b489564b44dbdcdfbaded32 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 4 Mar 2019 15:25:12 +0000 Subject: Remove unnecessary dollar signs A dollar sign is already appended to the end of each PATH, so there's no need to add one in the PATH declaration as well. --- synapse/federation/transport/server.py | 44 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'synapse') diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index ebb81be377..96d680a5ad 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -759,7 +759,7 @@ class FederationVersionServlet(BaseFederationServlet): class FederationGroupsProfileServlet(BaseFederationServlet): """Get/set the basic profile of a group on behalf of a user """ - PATH = "/groups/(?P[^/]*)/profile$" + PATH = "/groups/(?P[^/]*)/profile" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -787,7 +787,7 @@ class FederationGroupsProfileServlet(BaseFederationServlet): class FederationGroupsSummaryServlet(BaseFederationServlet): - PATH = "/groups/(?P[^/]*)/summary$" + PATH = "/groups/(?P[^/]*)/summary" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -805,7 +805,7 @@ class FederationGroupsSummaryServlet(BaseFederationServlet): class FederationGroupsRoomsServlet(BaseFederationServlet): """Get the rooms in a group on behalf of a user """ - PATH = "/groups/(?P[^/]*)/rooms$" + PATH = "/groups/(?P[^/]*)/rooms" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -823,7 +823,7 @@ class FederationGroupsRoomsServlet(BaseFederationServlet): class FederationGroupsAddRoomsServlet(BaseFederationServlet): """Add/remove room from group """ - PATH = "/groups/(?P[^/]*)/room/(?P[^/]*)$" + PATH = "/groups/(?P[^/]*)/room/(?P[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, room_id): @@ -855,7 +855,7 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): """ PATH = ( "/groups/(?P[^/]*)/room/(?P[^/]*)" - "/config/(?P[^/]*)$" + "/config/(?P[^/]*)" ) @defer.inlineCallbacks @@ -874,7 +874,7 @@ class FederationGroupsAddRoomsConfigServlet(BaseFederationServlet): class FederationGroupsUsersServlet(BaseFederationServlet): """Get the users in a group on behalf of a user """ - PATH = "/groups/(?P[^/]*)/users$" + PATH = "/groups/(?P[^/]*)/users" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -892,7 +892,7 @@ class FederationGroupsUsersServlet(BaseFederationServlet): class FederationGroupsInvitedUsersServlet(BaseFederationServlet): """Get the users that have been invited to a group """ - PATH = "/groups/(?P[^/]*)/invited_users$" + PATH = "/groups/(?P[^/]*)/invited_users" @defer.inlineCallbacks def on_GET(self, origin, content, query, group_id): @@ -910,7 +910,7 @@ class FederationGroupsInvitedUsersServlet(BaseFederationServlet): class FederationGroupsInviteServlet(BaseFederationServlet): """Ask a group server to invite someone to the group """ - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/invite$" + PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -928,7 +928,7 @@ class FederationGroupsInviteServlet(BaseFederationServlet): class FederationGroupsAcceptInviteServlet(BaseFederationServlet): """Accept an invitation from the group server """ - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/accept_invite$" + PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/accept_invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -945,7 +945,7 @@ class FederationGroupsAcceptInviteServlet(BaseFederationServlet): class FederationGroupsJoinServlet(BaseFederationServlet): """Attempt to join a group """ - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/join$" + PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/join" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -962,7 +962,7 @@ class FederationGroupsJoinServlet(BaseFederationServlet): class FederationGroupsRemoveUserServlet(BaseFederationServlet): """Leave or kick a user from the group """ - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/remove$" + PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/remove" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -980,7 +980,7 @@ class FederationGroupsRemoveUserServlet(BaseFederationServlet): class FederationGroupsLocalInviteServlet(BaseFederationServlet): """A group server has invited a local user """ - PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/invite$" + PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/invite" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -997,7 +997,7 @@ class FederationGroupsLocalInviteServlet(BaseFederationServlet): class FederationGroupsRemoveLocalUserServlet(BaseFederationServlet): """A group server has removed a local user """ - PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/remove$" + PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/remove" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -1014,7 +1014,7 @@ class FederationGroupsRemoveLocalUserServlet(BaseFederationServlet): class FederationGroupsRenewAttestaionServlet(BaseFederationServlet): """A group or user's server renews their attestation """ - PATH = "/groups/(?P[^/]*)/renew_attestation/(?P[^/]*)$" + PATH = "/groups/(?P[^/]*)/renew_attestation/(?P[^/]*)" @defer.inlineCallbacks def on_POST(self, origin, content, query, group_id, user_id): @@ -1037,7 +1037,7 @@ class FederationGroupsSummaryRoomsServlet(BaseFederationServlet): PATH = ( "/groups/(?P[^/]*)/summary" "(/categories/(?P[^/]+))?" - "/rooms/(?P[^/]*)$" + "/rooms/(?P[^/]*)" ) @defer.inlineCallbacks @@ -1080,7 +1080,7 @@ class FederationGroupsCategoriesServlet(BaseFederationServlet): """Get all categories for a group """ PATH = ( - "/groups/(?P[^/]*)/categories/$" + "/groups/(?P[^/]*)/categories/" ) @defer.inlineCallbacks @@ -1100,7 +1100,7 @@ class FederationGroupsCategoryServlet(BaseFederationServlet): """Add/remove/get a category in a group """ PATH = ( - "/groups/(?P[^/]*)/categories/(?P[^/]+)$" + "/groups/(?P[^/]*)/categories/(?P[^/]+)" ) @defer.inlineCallbacks @@ -1150,7 +1150,7 @@ class FederationGroupsRolesServlet(BaseFederationServlet): """Get roles in a group """ PATH = ( - "/groups/(?P[^/]*)/roles/$" + "/groups/(?P[^/]*)/roles/" ) @defer.inlineCallbacks @@ -1170,7 +1170,7 @@ class FederationGroupsRoleServlet(BaseFederationServlet): """Add/remove/get a role in a group """ PATH = ( - "/groups/(?P[^/]*)/roles/(?P[^/]+)$" + "/groups/(?P[^/]*)/roles/(?P[^/]+)" ) @defer.inlineCallbacks @@ -1226,7 +1226,7 @@ class FederationGroupsSummaryUsersServlet(BaseFederationServlet): PATH = ( "/groups/(?P[^/]*)/summary" "(/roles/(?P[^/]+))?" - "/users/(?P[^/]*)$" + "/users/(?P[^/]*)" ) @defer.inlineCallbacks @@ -1269,7 +1269,7 @@ class FederationGroupsBulkPublicisedServlet(BaseFederationServlet): """Get roles in a group """ PATH = ( - "/get_groups_publicised$" + "/get_groups_publicised" ) @defer.inlineCallbacks @@ -1284,7 +1284,7 @@ class FederationGroupsBulkPublicisedServlet(BaseFederationServlet): class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): """Sets whether a group is joinable without an invite or knock """ - PATH = "/groups/(?P[^/]*)/settings/m.join_policy$" + PATH = "/groups/(?P[^/]*)/settings/m.join_policy" @defer.inlineCallbacks def on_PUT(self, origin, content, query, group_id): -- cgit 1.5.1 From 0d2d046709270797f46a65672a5702b194dabef9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 16:04:04 +0000 Subject: Fix missing null guard --- synapse/visibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/visibility.py b/synapse/visibility.py index 8b9c7180b6..e9dc73c25e 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -237,7 +237,7 @@ def filter_events_for_server(store, server_name, events, redact=True, """ def is_sender_erased(event, erased_senders): - if erased_senders[event.sender]: + if erased_senders and erased_senders[event.sender]: logger.info( "Sender of %s has been erased, redacting", event.event_id, -- cgit 1.5.1 From 8e28bc5eeecbc2c9130c05e8c8237a546fb4d3ea Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 4 Mar 2019 17:14:58 +0000 Subject: Include a default configuration file in the 'docs' directory. (#4791) --- .buildkite/pipeline.yml | 8 + changelog.d/4791.feature | 1 + docs/.sample_config_header.yaml | 7 + docs/sample_config.yaml | 1041 ++++++++++++++++++++++++++++++++++++ scripts-dev/generate_sample_config | 18 + scripts/generate_config | 11 + synapse/config/_base.py | 27 +- synapse/config/database.py | 3 +- synapse/config/logger.py | 4 +- synapse/config/server.py | 4 + tox.ini | 3 + 11 files changed, 1114 insertions(+), 13 deletions(-) create mode 100644 changelog.d/4791.feature create mode 100644 docs/.sample_config_header.yaml create mode 100644 docs/sample_config.yaml create mode 100755 scripts-dev/generate_sample_config (limited to 'synapse') diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 24f22c85b4..369a1ffed1 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -38,6 +38,14 @@ steps: - wait + - command: + - "python -m pip install tox" + - "tox -e check-sampleconfig" + label: "\U0001F9F9 check-sample-config" + plugins: + - docker#v3.0.1: + image: "python:3.6" + - command: - "python -m pip install tox" - "tox -e py27,codecov" diff --git a/changelog.d/4791.feature b/changelog.d/4791.feature new file mode 100644 index 0000000000..1e5fd32463 --- /dev/null +++ b/changelog.d/4791.feature @@ -0,0 +1 @@ +Include a default configuration file in the 'docs' directory. diff --git a/docs/.sample_config_header.yaml b/docs/.sample_config_header.yaml new file mode 100644 index 0000000000..576fc98609 --- /dev/null +++ b/docs/.sample_config_header.yaml @@ -0,0 +1,7 @@ +# This file is a reference to the configuration options which can be set in +# homeserver.yaml. +# +# Note that it is not quite ready to be used as-is. If you are starting from +# scratch, it is easier to generate the config files following the instructions +# in INSTALL.md. + diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml new file mode 100644 index 0000000000..7cf58d2182 --- /dev/null +++ b/docs/sample_config.yaml @@ -0,0 +1,1041 @@ +# This file is a reference to the configuration options which can be set in +# homeserver.yaml. +# +# Note that it is not quite ready to be used as-is. If you are starting from +# scratch, it is easier to generate the config files following the instructions +# in INSTALL.md. + +## Server ## + +# The domain name of the server, with optional explicit port. +# This is used by remote servers to connect to this server, +# e.g. matrix.org, localhost:8080, etc. +# This is also the last part of your UserID. +# +server_name: "SERVERNAME" + +# When running as a daemon, the file to store the pid in +# +pid_file: DATADIR/homeserver.pid + +# CPU affinity mask. Setting this restricts the CPUs on which the +# process will be scheduled. It is represented as a bitmask, with the +# lowest order bit corresponding to the first logical CPU and the +# highest order bit corresponding to the last logical CPU. Not all CPUs +# may exist on a given system but a mask may specify more CPUs than are +# present. +# +# For example: +# 0x00000001 is processor #0, +# 0x00000003 is processors #0 and #1, +# 0xFFFFFFFF is all processors (#0 through #31). +# +# Pinning a Python process to a single CPU is desirable, because Python +# is inherently single-threaded due to the GIL, and can suffer a +# 30-40% slowdown due to cache blow-out and thread context switching +# if the scheduler happens to schedule the underlying threads across +# different cores. See +# https://www.mirantis.com/blog/improve-performance-python-programs-restricting-single-cpu/. +# +# This setting requires the affinity package to be installed! +# +#cpu_affinity: 0xFFFFFFFF + +# The path to the web client which will be served at /_matrix/client/ +# if 'webclient' is configured under the 'listeners' configuration. +# +#web_client_location: "/path/to/web/root" + +# The public-facing base URL that clients use to access this HS +# (not including _matrix/...). This is the same URL a user would +# enter into the 'custom HS URL' field on their client. If you +# use synapse with a reverse proxy, this should be the URL to reach +# synapse via the proxy. +# +#public_baseurl: https://example.com/ + +# Set the soft limit on the number of file descriptors synapse can use +# Zero is used to indicate synapse should set the soft limit to the +# hard limit. +# +soft_file_limit: 0 + +# Set to false to disable presence tracking on this homeserver. +# +use_presence: true + +# The GC threshold parameters to pass to `gc.set_threshold`, if defined +# +#gc_thresholds: [700, 10, 10] + +# Set the limit on the returned events in the timeline in the get +# and sync operations. The default value is -1, means no upper limit. +# +#filter_timeline_limit: 5000 + +# Whether room invites to users on this server should be blocked +# (except those sent by local server admins). The default is False. +# +#block_non_admin_invites: True + +# Room searching +# +# If disabled, new messages will not be indexed for searching and users +# will receive errors when searching for messages. Defaults to enabled. +# +#enable_search: false + +# Restrict federation to the following whitelist of domains. +# N.B. we recommend also firewalling your federation listener to limit +# inbound federation traffic as early as possible, rather than relying +# purely on this application-layer restriction. If not specified, the +# default is to whitelist everything. +# +#federation_domain_whitelist: +# - lon.example.com +# - nyc.example.com +# - syd.example.com + +# List of ports that Synapse should listen on, their purpose and their +# configuration. +# +# Options for each listener include: +# +# port: the TCP port to bind to +# +# bind_addresses: a list of local addresses to listen on. The default is +# 'all local interfaces'. +# +# type: the type of listener. Normally 'http', but other valid options are: +# 'manhole' (see docs/manhole.md), +# 'metrics' (see docs/metrics-howto.rst), +# 'replication' (see docs/workers.rst). +# +# tls: set to true to enable TLS for this listener. Will use the TLS +# key/cert specified in tls_private_key_path / tls_certificate_path. +# +# x_forwarded: Only valid for an 'http' listener. Set to true to use the +# X-Forwarded-For header as the client IP. Useful when Synapse is +# behind a reverse-proxy. +# +# resources: Only valid for an 'http' listener. A list of resources to host +# on this port. Options for each resource are: +# +# names: a list of names of HTTP resources. See below for a list of +# valid resource names. +# +# compress: set to true to enable HTTP comression for this resource. +# +# additional_resources: Only valid for an 'http' listener. A map of +# additional endpoints which should be loaded via dynamic modules. +# +# Valid resource names are: +# +# client: the client-server API (/_matrix/client). Also implies 'media' and +# 'static'. +# +# consent: user consent forms (/_matrix/consent). See +# docs/consent_tracking.md. +# +# federation: the server-server API (/_matrix/federation). Also implies +# 'media', 'keys', 'openid' +# +# keys: the key discovery API (/_matrix/keys). +# +# media: the media API (/_matrix/media). +# +# metrics: the metrics interface. See docs/metrics-howto.rst. +# +# openid: OpenID authentication. +# +# replication: the HTTP replication API (/_synapse/replication). See +# docs/workers.rst. +# +# static: static resources under synapse/static (/_matrix/static). (Mostly +# useful for 'fallback authentication'.) +# +# webclient: A web client. Requires web_client_location to be set. +# +listeners: + # TLS-enabled listener: for when matrix traffic is sent directly to synapse. + # + # Disabled by default. To enable it, uncomment the following. (Note that you + # will also need to give Synapse a TLS key and certificate: see the TLS section + # below.) + # + #- port: 8448 + # type: http + # tls: true + # resources: + # - names: [client, federation] + + # Unsecure HTTP listener: for when matrix traffic passes through a reverse proxy + # that unwraps TLS. + # + # If you plan to use a reverse proxy, please see + # https://github.com/matrix-org/synapse/blob/master/docs/reverse_proxy.rst. + # + - port: 8008 + tls: false + bind_addresses: ['::1', '127.0.0.1'] + type: http + x_forwarded: true + + resources: + - names: [client, federation] + compress: false + + # example additonal_resources: + # + #additional_resources: + # "/_matrix/my/custom/endpoint": + # module: my_module.CustomRequestHandler + # config: {} + + # Turn on the twisted ssh manhole service on localhost on the given + # port. + # + #- port: 9000 + # bind_addresses: ['::1', '127.0.0.1'] + # type: manhole + + +## Homeserver blocking ## + +# How to reach the server admin, used in ResourceLimitError +# +#admin_contact: 'mailto:admin@server.com' + +# Global blocking +# +#hs_disabled: False +#hs_disabled_message: 'Human readable reason for why the HS is blocked' +#hs_disabled_limit_type: 'error code(str), to help clients decode reason' + +# Monthly Active User Blocking +# +#limit_usage_by_mau: False +#max_mau_value: 50 +#mau_trial_days: 2 + +# If enabled, the metrics for the number of monthly active users will +# be populated, however no one will be limited. If limit_usage_by_mau +# is true, this is implied to be true. +# +#mau_stats_only: False + +# Sometimes the server admin will want to ensure certain accounts are +# never blocked by mau checking. These accounts are specified here. +# +#mau_limit_reserved_threepids: +# - medium: 'email' +# address: 'reserved_user@example.com' + + +## TLS ## + +# PEM-encoded X509 certificate for TLS. +# This certificate, as of Synapse 1.0, will need to be a valid and verifiable +# certificate, signed by a recognised Certificate Authority. +# +# See 'ACME support' below to enable auto-provisioning this certificate via +# Let's Encrypt. +# +#tls_certificate_path: "CONFDIR/SERVERNAME.tls.crt" + +# PEM-encoded private key for TLS +# +#tls_private_key_path: "CONFDIR/SERVERNAME.tls.key" + +# ACME support: This will configure Synapse to request a valid TLS certificate +# for your configured `server_name` via Let's Encrypt. +# +# Note that provisioning a certificate in this way requires port 80 to be +# routed to Synapse so that it can complete the http-01 ACME challenge. +# By default, if you enable ACME support, Synapse will attempt to listen on +# port 80 for incoming http-01 challenges - however, this will likely fail +# with 'Permission denied' or a similar error. +# +# There are a couple of potential solutions to this: +# +# * If you already have an Apache, Nginx, or similar listening on port 80, +# you can configure Synapse to use an alternate port, and have your web +# server forward the requests. For example, assuming you set 'port: 8009' +# below, on Apache, you would write: +# +# ProxyPass /.well-known/acme-challenge http://localhost:8009/.well-known/acme-challenge +# +# * Alternatively, you can use something like `authbind` to give Synapse +# permission to listen on port 80. +# +acme: + # ACME support is disabled by default. Uncomment the following line + # (and tls_certificate_path and tls_private_key_path above) to enable it. + # + #enabled: true + + # Endpoint to use to request certificates. If you only want to test, + # use Let's Encrypt's staging url: + # https://acme-staging.api.letsencrypt.org/directory + # + #url: https://acme-v01.api.letsencrypt.org/directory + + # Port number to listen on for the HTTP-01 challenge. Change this if + # you are forwarding connections through Apache/Nginx/etc. + # + #port: 80 + + # Local addresses to listen on for incoming connections. + # Again, you may want to change this if you are forwarding connections + # through Apache/Nginx/etc. + # + #bind_addresses: ['::', '0.0.0.0'] + + # How many days remaining on a certificate before it is renewed. + # + #reprovision_threshold: 30 + + # The domain that the certificate should be for. Normally this + # should be the same as your Matrix domain (i.e., 'server_name'), but, + # by putting a file at 'https:///.well-known/matrix/server', + # you can delegate incoming traffic to another server. If you do that, + # you should give the target of the delegation here. + # + # For example: if your 'server_name' is 'example.com', but + # 'https://example.com/.well-known/matrix/server' delegates to + # 'matrix.example.com', you should put 'matrix.example.com' here. + # + # If not set, defaults to your 'server_name'. + # + #domain: matrix.example.com + +# List of allowed TLS fingerprints for this server to publish along +# with the signing keys for this server. Other matrix servers that +# make HTTPS requests to this server will check that the TLS +# certificates returned by this server match one of the fingerprints. +# +# Synapse automatically adds the fingerprint of its own certificate +# to the list. So if federation traffic is handled directly by synapse +# then no modification to the list is required. +# +# If synapse is run behind a load balancer that handles the TLS then it +# will be necessary to add the fingerprints of the certificates used by +# the loadbalancers to this list if they are different to the one +# synapse is using. +# +# Homeservers are permitted to cache the list of TLS fingerprints +# returned in the key responses up to the "valid_until_ts" returned in +# key. It may be necessary to publish the fingerprints of a new +# certificate and wait until the "valid_until_ts" of the previous key +# responses have passed before deploying it. +# +# You can calculate a fingerprint from a given TLS listener via: +# openssl s_client -connect $host:$port < /dev/null 2> /dev/null | +# openssl x509 -outform DER | openssl sha256 -binary | base64 | tr -d '=' +# or by checking matrix.org/federationtester/api/report?server_name=$host +# +#tls_fingerprints: [{"sha256": ""}] + + + +## Database ## + +database: + # The database engine name + name: "sqlite3" + # Arguments to pass to the engine + args: + # Path to the database + database: "DATADIR/homeserver.db" + +# Number of events to cache in memory. +event_cache_size: "10K" + + +## Logging ## + +# A yaml python logging config file +# +log_config: "CONFDIR/SERVERNAME.log.config" + + +## Ratelimiting ## + +# Number of messages a client can send per second +# +rc_messages_per_second: 0.2 + +# Number of message a client can send before being throttled +# +rc_message_burst_count: 10.0 + +# The federation window size in milliseconds +# +federation_rc_window_size: 1000 + +# The number of federation requests from a single server in a window +# before the server will delay processing the request. +# +federation_rc_sleep_limit: 10 + +# The duration in milliseconds to delay processing events from +# remote servers by if they go over the sleep limit. +# +federation_rc_sleep_delay: 500 + +# The maximum number of concurrent federation requests allowed +# from a single server +# +federation_rc_reject_limit: 50 + +# The number of federation requests to concurrently process from a +# single server +# +federation_rc_concurrent: 3 + + + +# Directory where uploaded images and attachments are stored. +# +media_store_path: "DATADIR/media_store" + +# Media storage providers allow media to be stored in different +# locations. +# +#media_storage_providers: +# - module: file_system +# # Whether to write new local files. +# store_local: false +# # Whether to write new remote media +# store_remote: false +# # Whether to block upload requests waiting for write to this +# # provider to complete +# store_synchronous: false +# config: +# directory: /mnt/some/other/directory + +# Directory where in-progress uploads are stored. +# +uploads_path: "DATADIR/uploads" + +# The largest allowed upload size in bytes +# +max_upload_size: "10M" + +# Maximum number of pixels that will be thumbnailed +# +max_image_pixels: "32M" + +# Whether to generate new thumbnails on the fly to precisely match +# the resolution requested by the client. If true then whenever +# a new resolution is requested by the client the server will +# generate a new thumbnail. If false the server will pick a thumbnail +# from a precalculated list. +# +dynamic_thumbnails: false + +# List of thumbnails to precalculate when an image is uploaded. +# +thumbnail_sizes: +- width: 32 + height: 32 + method: crop +- width: 96 + height: 96 + method: crop +- width: 320 + height: 240 + method: scale +- width: 640 + height: 480 + method: scale +- width: 800 + height: 600 + method: scale + +# Is the preview URL API enabled? If enabled, you *must* specify +# an explicit url_preview_ip_range_blacklist of IPs that the spider is +# denied from accessing. +# +url_preview_enabled: False + +# List of IP address CIDR ranges that the URL preview spider is denied +# from accessing. There are no defaults: you must explicitly +# specify a list for URL previewing to work. You should specify any +# internal services in your network that you do not want synapse to try +# to connect to, otherwise anyone in any Matrix room could cause your +# synapse to issue arbitrary GET requests to your internal services, +# causing serious security issues. +# +#url_preview_ip_range_blacklist: +# - '127.0.0.0/8' +# - '10.0.0.0/8' +# - '172.16.0.0/12' +# - '192.168.0.0/16' +# - '100.64.0.0/10' +# - '169.254.0.0/16' +# - '::1/128' +# - 'fe80::/64' +# - 'fc00::/7' +# +# List of IP address CIDR ranges that the URL preview spider is allowed +# to access even if they are specified in url_preview_ip_range_blacklist. +# This is useful for specifying exceptions to wide-ranging blacklisted +# target IP ranges - e.g. for enabling URL previews for a specific private +# website only visible in your network. +# +#url_preview_ip_range_whitelist: +# - '192.168.1.1' + +# Optional list of URL matches that the URL preview spider is +# denied from accessing. You should use url_preview_ip_range_blacklist +# in preference to this, otherwise someone could define a public DNS +# entry that points to a private IP address and circumvent the blacklist. +# This is more useful if you know there is an entire shape of URL that +# you know that will never want synapse to try to spider. +# +# Each list entry is a dictionary of url component attributes as returned +# by urlparse.urlsplit as applied to the absolute form of the URL. See +# https://docs.python.org/2/library/urlparse.html#urlparse.urlsplit +# The values of the dictionary are treated as an filename match pattern +# applied to that component of URLs, unless they start with a ^ in which +# case they are treated as a regular expression match. If all the +# specified component matches for a given list item succeed, the URL is +# blacklisted. +# +#url_preview_url_blacklist: +# # blacklist any URL with a username in its URI +# - username: '*' +# +# # blacklist all *.google.com URLs +# - netloc: 'google.com' +# - netloc: '*.google.com' +# +# # blacklist all plain HTTP URLs +# - scheme: 'http' +# +# # blacklist http(s)://www.acme.com/foo +# - netloc: 'www.acme.com' +# path: '/foo' +# +# # blacklist any URL with a literal IPv4 address +# - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' + +# The largest allowed URL preview spidering size in bytes +max_spider_size: "10M" + + + +## Captcha ## +# See docs/CAPTCHA_SETUP for full details of configuring this. + +# This Home Server's ReCAPTCHA public key. +# +recaptcha_public_key: "YOUR_PUBLIC_KEY" + +# This Home Server's ReCAPTCHA private key. +# +recaptcha_private_key: "YOUR_PRIVATE_KEY" + +# Enables ReCaptcha checks when registering, preventing signup +# unless a captcha is answered. Requires a valid ReCaptcha +# public/private key. +# +enable_registration_captcha: False + +# A secret key used to bypass the captcha test entirely. +#captcha_bypass_secret: "YOUR_SECRET_HERE" + +# The API endpoint to use for verifying m.login.recaptcha responses. +recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" + + +## TURN ## + +# The public URIs of the TURN server to give to clients +# +#turn_uris: [] + +# The shared secret used to compute passwords for the TURN server +# +#turn_shared_secret: "YOUR_SHARED_SECRET" + +# The Username and password if the TURN server needs them and +# does not use a token +# +#turn_username: "TURNSERVER_USERNAME" +#turn_password: "TURNSERVER_PASSWORD" + +# How long generated TURN credentials last +# +turn_user_lifetime: "1h" + +# Whether guests should be allowed to use the TURN server. +# This defaults to True, otherwise VoIP will be unreliable for guests. +# However, it does introduce a slight security risk as it allows users to +# connect to arbitrary endpoints without having first signed up for a +# valid account (e.g. by passing a CAPTCHA). +# +turn_allow_guests: True + + +## Registration ## + +# Enable registration for new users. +enable_registration: False + +# The user must provide all of the below types of 3PID when registering. +# +#registrations_require_3pid: +# - email +# - msisdn + +# Explicitly disable asking for MSISDNs from the registration +# flow (overrides registrations_require_3pid if MSISDNs are set as required) +# +#disable_msisdn_registration: True + +# Mandate that users are only allowed to associate certain formats of +# 3PIDs with accounts on this server. +# +#allowed_local_3pids: +# - medium: email +# pattern: '.*@matrix\.org' +# - medium: email +# pattern: '.*@vector\.im' +# - medium: msisdn +# pattern: '\+44' + +# If set, allows registration by anyone who also has the shared +# secret, even if registration is otherwise disabled. +# +# registration_shared_secret: + +# Set the number of bcrypt rounds used to generate password hash. +# Larger numbers increase the work factor needed to generate the hash. +# The default number is 12 (which equates to 2^12 rounds). +# N.B. that increasing this will exponentially increase the time required +# to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. +# +bcrypt_rounds: 12 + +# Allows users to register as guests without a password/email/etc, and +# participate in rooms hosted on this server which have been made +# accessible to anonymous users. +# +allow_guest_access: False + +# The identity server which we suggest that clients should use when users log +# in on this server. +# +# (By default, no suggestion is made, so it is left up to the client. +# This setting is ignored unless public_baseurl is also set.) +# +#default_identity_server: https://matrix.org + +# The list of identity servers trusted to verify third party +# identifiers by this server. +# +# Also defines the ID server which will be called when an account is +# deactivated (one will be picked arbitrarily). +# +trusted_third_party_id_servers: + - matrix.org + - vector.im + +# Users who register on this homeserver will automatically be joined +# to these rooms +# +#auto_join_rooms: +# - "#example:example.com" + +# Where auto_join_rooms are specified, setting this flag ensures that the +# the rooms exist by creating them when the first user on the +# homeserver registers. +# Setting to false means that if the rooms are not manually created, +# users cannot be auto-joined since they do not exist. +# +autocreate_auto_join_rooms: true + + +## Metrics ### + +# Enable collection and rendering of performance metrics +# +enable_metrics: False + +# Enable sentry integration +# NOTE: While attempts are made to ensure that the logs don't contain +# any sensitive information, this cannot be guaranteed. By enabling +# this option the sentry server may therefore receive sensitive +# information, and it in turn may then diseminate sensitive information +# through insecure notification channels if so configured. +# +#sentry: +# dsn: "..." + +# Whether or not to report anonymized homeserver usage statistics. +# report_stats: true|false + + +## API Configuration ## + +# A list of event types that will be included in the room_invite_state +# +room_invite_state_types: + - "m.room.join_rules" + - "m.room.canonical_alias" + - "m.room.avatar" + - "m.room.encryption" + - "m.room.name" + + +# A list of application service config file to use +# +app_service_config_files: [] + +# Whether or not to track application service IP addresses. Implicitly +# enables MAU tracking for application service users. +# +track_appservice_user_ips: False + + +# a secret which is used to sign access tokens. If none is specified, +# the registration_shared_secret is used, if one is given; otherwise, +# a secret key is derived from the signing key. +# +# macaroon_secret_key: + +# Used to enable access token expiration. +# +expire_access_token: False + +# a secret which is used to calculate HMACs for form values, to stop +# falsification of values. Must be specified for the User Consent +# forms to work. +# +# form_secret: + +## Signing Keys ## + +# Path to the signing key to sign messages with +# +signing_key_path: "CONFDIR/SERVERNAME.signing.key" + +# The keys that the server used to sign messages with but won't use +# to sign new messages. E.g. it has lost its private key +# +#old_signing_keys: +# "ed25519:auto": +# # Base64 encoded public key +# key: "The public part of your old signing key." +# # Millisecond POSIX timestamp when the key expired. +# expired_ts: 123456789123 + +# How long key response published by this server is valid for. +# Used to set the valid_until_ts in /key/v2 APIs. +# Determines how quickly servers will query to check which keys +# are still valid. +# +key_refresh_interval: "1d" # 1 Day. + +# The trusted servers to download signing keys from. +# +perspectives: + servers: + "matrix.org": + verify_keys: + "ed25519:auto": + key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" + + + +# Enable SAML2 for registration and login. Uses pysaml2. +# +# `sp_config` is the configuration for the pysaml2 Service Provider. +# See pysaml2 docs for format of config. +# +# Default values will be used for the 'entityid' and 'service' settings, +# so it is not normally necessary to specify them unless you need to +# override them. +# +#saml2_config: +# sp_config: +# # point this to the IdP's metadata. You can use either a local file or +# # (preferably) a URL. +# metadata: +# #local: ["saml2/idp.xml"] +# remote: +# - url: https://our_idp/metadata.xml +# +# # The rest of sp_config is just used to generate our metadata xml, and you +# # may well not need it, depending on your setup. Alternatively you +# # may need a whole lot more detail - see the pysaml2 docs! +# +# description: ["My awesome SP", "en"] +# name: ["Test SP", "en"] +# +# organization: +# name: Example com +# display_name: +# - ["Example co", "en"] +# url: "http://example.com" +# +# contact_person: +# - given_name: Bob +# sur_name: "the Sysadmin" +# email_address": ["admin@example.com"] +# contact_type": technical +# +# # Instead of putting the config inline as above, you can specify a +# # separate pysaml2 configuration file: +# # +# config_path: "CONFDIR/sp_conf.py" + + + +# Enable CAS for registration and login. +# +#cas_config: +# enabled: true +# server_url: "https://cas-server.com" +# service_url: "https://homeserver.domain.com:8448" +# #required_attributes: +# # name: value + + +# The JWT needs to contain a globally unique "sub" (subject) claim. +# +#jwt_config: +# enabled: true +# secret: "a secret" +# algorithm: "HS256" + + + +# Enable password for login. +# +password_config: + enabled: true + # Uncomment and change to a secret random string for extra security. + # DO NOT CHANGE THIS AFTER INITIAL SETUP! + #pepper: "" + + + +# Enable sending emails for notification events +# Defining a custom URL for Riot is only needed if email notifications +# should contain links to a self-hosted installation of Riot; when set +# the "app_name" setting is ignored. +# +# If your SMTP server requires authentication, the optional smtp_user & +# smtp_pass variables should be used +# +#email: +# enable_notifs: false +# smtp_host: "localhost" +# smtp_port: 25 +# smtp_user: "exampleusername" +# smtp_pass: "examplepassword" +# require_transport_security: False +# notif_from: "Your Friendly %(app)s Home Server " +# app_name: Matrix +# # if template_dir is unset, uses the example templates that are part of +# # the Synapse distribution. +# #template_dir: res/templates +# notif_template_html: notif_mail.html +# notif_template_text: notif_mail.txt +# notif_for_new_users: True +# riot_base_url: "http://localhost/riot" + + +#password_providers: +# - module: "ldap_auth_provider.LdapAuthProvider" +# config: +# enabled: true +# uri: "ldap://ldap.example.com:389" +# start_tls: true +# base: "ou=users,dc=example,dc=com" +# attributes: +# uid: "cn" +# mail: "email" +# name: "givenName" +# #bind_dn: +# #bind_password: +# #filter: "(objectClass=posixAccount)" + + + +# Clients requesting push notifications can either have the body of +# the message sent in the notification poke along with other details +# like the sender, or just the event ID and room ID (`event_id_only`). +# If clients choose the former, this option controls whether the +# notification request includes the content of the event (other details +# like the sender are still included). For `event_id_only` push, it +# has no effect. +# +# For modern android devices the notification content will still appear +# because it is loaded by the app. iPhone, however will send a +# notification saying only that a message arrived and who it came from. +# +#push: +# include_content: true + + +#spam_checker: +# module: "my_custom_project.SuperSpamChecker" +# config: +# example_option: 'things' + + +# Whether to allow non server admins to create groups on this server +# +enable_group_creation: false + +# If enabled, non server admins can only create groups with local parts +# starting with this prefix +# +#group_creation_prefix: "unofficial/" + + + +# User Directory configuration +# +# 'search_all_users' defines whether to search all users visible to your HS +# when searching the user directory, rather than limiting to users visible +# in public rooms. Defaults to false. If you set it True, you'll have to run +# UPDATE user_directory_stream_pos SET stream_id = NULL; +# on your database to tell it to rebuild the user_directory search indexes. +# +#user_directory: +# search_all_users: false + + +# User Consent configuration +# +# for detailed instructions, see +# https://github.com/matrix-org/synapse/blob/master/docs/consent_tracking.md +# +# Parts of this section are required if enabling the 'consent' resource under +# 'listeners', in particular 'template_dir' and 'version'. +# +# 'template_dir' gives the location of the templates for the HTML forms. +# This directory should contain one subdirectory per language (eg, 'en', 'fr'), +# and each language directory should contain the policy document (named as +# '.html') and a success page (success.html). +# +# 'version' specifies the 'current' version of the policy document. It defines +# the version to be served by the consent resource if there is no 'v' +# parameter. +# +# 'server_notice_content', if enabled, will send a user a "Server Notice" +# asking them to consent to the privacy policy. The 'server_notices' section +# must also be configured for this to work. Notices will *not* be sent to +# guest users unless 'send_server_notice_to_guests' is set to true. +# +# 'block_events_error', if set, will block any attempts to send events +# until the user consents to the privacy policy. The value of the setting is +# used as the text of the error. +# +# 'require_at_registration', if enabled, will add a step to the registration +# process, similar to how captcha works. Users will be required to accept the +# policy before their account is created. +# +# 'policy_name' is the display name of the policy users will see when registering +# for an account. Has no effect unless `require_at_registration` is enabled. +# Defaults to "Privacy Policy". +# +#user_consent: +# template_dir: res/templates/privacy +# version: 1.0 +# server_notice_content: +# msgtype: m.text +# body: >- +# To continue using this homeserver you must review and agree to the +# terms and conditions at %(consent_uri)s +# send_server_notice_to_guests: True +# block_events_error: >- +# To continue using this homeserver you must review and agree to the +# terms and conditions at %(consent_uri)s +# require_at_registration: False +# policy_name: Privacy Policy +# + + +# Server Notices room configuration +# +# Uncomment this section to enable a room which can be used to send notices +# from the server to users. It is a special room which cannot be left; notices +# come from a special "notices" user id. +# +# If you uncomment this section, you *must* define the system_mxid_localpart +# setting, which defines the id of the user which will be used to send the +# notices. +# +# It's also possible to override the room name, the display name of the +# "notices" user, and the avatar for the user. +# +#server_notices: +# system_mxid_localpart: notices +# system_mxid_display_name: "Server Notices" +# system_mxid_avatar_url: "mxc://server.com/oumMVlgDnLYFaPVkExemNVVZ" +# room_name: "Server Notices" + + + +# The `alias_creation` option controls who's allowed to create aliases +# on this server. +# +# The format of this option is a list of rules that contain globs that +# match against user_id, room_id and the new alias (fully qualified with +# server name). The action in the first rule that matches is taken, +# which can currently either be "allow" or "deny". +# +# Missing user_id/room_id/alias fields default to "*". +# +# If no rules match the request is denied. An empty list means no one +# can create aliases. +# +# Options for the rules include: +# +# user_id: Matches against the creator of the alias +# alias: Matches against the alias being created +# room_id: Matches against the room ID the alias is being pointed at +# action: Whether to "allow" or "deny" the request if the rule matches +# +# The default is: +# +#alias_creation_rules: +# - user_id: "*" +# alias: "*" +# room_id: "*" +# action: allow + +# The `room_list_publication_rules` option controls who can publish and +# which rooms can be published in the public room list. +# +# The format of this option is the same as that for +# `alias_creation_rules`. +# +# If the room has one or more aliases associated with it, only one of +# the aliases needs to match the alias rule. If there are no aliases +# then only rules with `alias: *` match. +# +# If no rules match the request is denied. An empty list means no one +# can publish rooms. +# +# Options for the rules include: +# +# user_id: Matches agaisnt the creator of the alias +# room_id: Matches against the room ID being published +# alias: Matches against any current local or canonical aliases +# associated with the room +# action: Whether to "allow" or "deny" the request if the rule matches +# +# The default is: +# +#room_list_publication_rules: +# - user_id: "*" +# alias: "*" +# room_id: "*" +# action: allow diff --git a/scripts-dev/generate_sample_config b/scripts-dev/generate_sample_config new file mode 100755 index 0000000000..5e33b9b549 --- /dev/null +++ b/scripts-dev/generate_sample_config @@ -0,0 +1,18 @@ +#!/bin/bash +# +# Update/check the docs/sample_config.yaml + +set -e + +cd `dirname $0`/.. + +SAMPLE_CONFIG="docs/sample_config.yaml" + +if [ "$1" == "--check" ]; then + diff -u "$SAMPLE_CONFIG" <(./scripts/generate_config --header-file docs/.sample_config_header.yaml) >/dev/null || { + echo -e "\e[1m\e[31m$SAMPLE_CONFIG is not up-to-date. Regenerate it with \`scripts-dev/generate_sample_config\`.\e[0m" >&2 + exit 1 + } +else + ./scripts/generate_config --header-file docs/.sample_config_header.yaml -o "$SAMPLE_CONFIG" +fi diff --git a/scripts/generate_config b/scripts/generate_config index 61c5f049e8..93b6406992 100755 --- a/scripts/generate_config +++ b/scripts/generate_config @@ -1,6 +1,7 @@ #!/usr/bin/env python import argparse +import shutil import sys from synapse.config.homeserver import HomeServerConfig @@ -50,6 +51,13 @@ if __name__ == "__main__": help="File to write the configuration to. Default: stdout", ) + parser.add_argument( + "--header-file", + type=argparse.FileType('r'), + help="File from which to read a header, which will be printed before the " + "generated config.", + ) + args = parser.parse_args() report_stats = args.report_stats @@ -64,4 +72,7 @@ if __name__ == "__main__": report_stats=report_stats, ) + if args.header_file: + shutil.copyfileobj(args.header_file, args.output_file) + args.output_file.write(conf) diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 5aec43b702..c4d3087fa4 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -180,9 +180,7 @@ class Config(object): Returns: str: the yaml config file """ - default_config = "# vim:ft=yaml\n" - - default_config += "\n\n".join( + default_config = "\n\n".join( dedent(conf) for conf in self.invoke_all( "default_config", @@ -297,19 +295,26 @@ class Config(object): "Must specify a server_name to a generate config for." " Pass -H server.name." ) + + config_str = obj.generate_config( + config_dir_path=config_dir_path, + data_dir_path=os.getcwd(), + server_name=server_name, + report_stats=(config_args.report_stats == "yes"), + generate_secrets=True, + ) + if not cls.path_exists(config_dir_path): os.makedirs(config_dir_path) with open(config_path, "w") as config_file: - config_str = obj.generate_config( - config_dir_path=config_dir_path, - data_dir_path=os.getcwd(), - server_name=server_name, - report_stats=(config_args.report_stats == "yes"), - generate_secrets=True, + config_file.write( + "# vim:ft=yaml\n\n" ) - config = yaml.load(config_str) - obj.invoke_all("generate_files", config) config_file.write(config_str) + + config = yaml.load(config_str) + obj.invoke_all("generate_files", config) + print( ( "A config file has been generated in %r for server name" diff --git a/synapse/config/database.py b/synapse/config/database.py index c8890147a6..63e9cb63f8 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -49,7 +49,8 @@ class DatabaseConfig(Config): def default_config(self, data_dir_path, **kwargs): database_path = os.path.join(data_dir_path, "homeserver.db") return """\ - # Database configuration + ## Database ## + database: # The database engine name name: "sqlite3" diff --git a/synapse/config/logger.py b/synapse/config/logger.py index f6940b65fd..464c28c2d9 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -81,7 +81,9 @@ class LoggingConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): log_config = os.path.join(config_dir_path, server_name + ".log.config") - return """ + return """\ + ## Logging ## + # A yaml python logging config file # log_config: "%(log_config)s" diff --git a/synapse/config/server.py b/synapse/config/server.py index 4200f10da3..35a322fee0 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -260,9 +260,11 @@ class ServerConfig(Config): # This is used by remote servers to connect to this server, # e.g. matrix.org, localhost:8080, etc. # This is also the last part of your UserID. + # server_name: "%(server_name)s" # When running as a daemon, the file to store the pid in + # pid_file: %(pid_file)s # CPU affinity mask. Setting this restricts the CPUs on which the @@ -304,9 +306,11 @@ class ServerConfig(Config): # Set the soft limit on the number of file descriptors synapse can use # Zero is used to indicate synapse should set the soft limit to the # hard limit. + # soft_file_limit: 0 # Set to false to disable presence tracking on this homeserver. + # use_presence: true # The GC threshold parameters to pass to `gc.set_threshold`, if defined diff --git a/tox.ini b/tox.ini index 14437e7334..19080a648f 100644 --- a/tox.ini +++ b/tox.ini @@ -118,6 +118,9 @@ commands = python -m towncrier.check --compare-with=origin/develop basepython = python3.6 +[testenv:check-sampleconfig] +commands = {toxinidir}/scripts-dev/generate_sample_config --check + [testenv:codecov] skip_install = True deps = -- cgit 1.5.1 From a84b8d56c2cfec62c87a13771c817b3205b5ec4b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 18:03:29 +0000 Subject: Fixup slave stores --- synapse/replication/slave/storage/deviceinbox.py | 15 +- synapse/replication/slave/storage/devices.py | 45 +- synapse/replication/slave/storage/push_rule.py | 2 +- synapse/storage/deviceinbox.py | 324 ++++---- synapse/storage/devices.py | 987 ++++++++++++----------- synapse/storage/end_to_end_keys.py | 88 +- 6 files changed, 728 insertions(+), 733 deletions(-) (limited to 'synapse') diff --git a/synapse/replication/slave/storage/deviceinbox.py b/synapse/replication/slave/storage/deviceinbox.py index 4f19fd35aa..4d59778863 100644 --- a/synapse/replication/slave/storage/deviceinbox.py +++ b/synapse/replication/slave/storage/deviceinbox.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.storage import DataStore +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker +from synapse.storage.deviceinbox import DeviceInboxWorkerStore from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import BaseSlavedStore, __func__ -from ._slaved_id_tracker import SlavedIdTracker - -class SlavedDeviceInboxStore(BaseSlavedStore): +class SlavedDeviceInboxStore(DeviceInboxWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedDeviceInboxStore, self).__init__(db_conn, hs) self._device_inbox_id_gen = SlavedIdTracker( @@ -43,12 +42,6 @@ class SlavedDeviceInboxStore(BaseSlavedStore): expiry_ms=30 * 60 * 1000, ) - get_to_device_stream_token = __func__(DataStore.get_to_device_stream_token) - get_new_messages_for_device = __func__(DataStore.get_new_messages_for_device) - get_new_device_msgs_for_remote = __func__(DataStore.get_new_device_msgs_for_remote) - delete_messages_for_device = __func__(DataStore.delete_messages_for_device) - delete_device_msgs_for_remote = __func__(DataStore.delete_device_msgs_for_remote) - def stream_positions(self): result = super(SlavedDeviceInboxStore, self).stream_positions() result["to_device"] = self._device_inbox_id_gen.get_current_token() diff --git a/synapse/replication/slave/storage/devices.py b/synapse/replication/slave/storage/devices.py index ec2fd561cc..16c9a162c5 100644 --- a/synapse/replication/slave/storage/devices.py +++ b/synapse/replication/slave/storage/devices.py @@ -13,15 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.storage import DataStore -from synapse.storage.end_to_end_keys import EndToEndKeyStore +from synapse.replication.slave.storage._base import BaseSlavedStore +from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker +from synapse.storage.devices import DeviceWorkerStore +from synapse.storage.end_to_end_keys import EndToEndKeyWorkerStore from synapse.util.caches.stream_change_cache import StreamChangeCache -from ._base import BaseSlavedStore, __func__ -from ._slaved_id_tracker import SlavedIdTracker - -class SlavedDeviceStore(BaseSlavedStore): +class SlavedDeviceStore(EndToEndKeyWorkerStore, DeviceWorkerStore, BaseSlavedStore): def __init__(self, db_conn, hs): super(SlavedDeviceStore, self).__init__(db_conn, hs) @@ -38,17 +37,6 @@ class SlavedDeviceStore(BaseSlavedStore): "DeviceListFederationStreamChangeCache", device_list_max, ) - get_device_stream_token = __func__(DataStore.get_device_stream_token) - get_user_whose_devices_changed = __func__(DataStore.get_user_whose_devices_changed) - get_devices_by_remote = __func__(DataStore.get_devices_by_remote) - _get_devices_by_remote_txn = __func__(DataStore._get_devices_by_remote_txn) - _get_e2e_device_keys_txn = __func__(DataStore._get_e2e_device_keys_txn) - mark_as_sent_devices_by_remote = __func__(DataStore.mark_as_sent_devices_by_remote) - _mark_as_sent_devices_by_remote_txn = ( - __func__(DataStore._mark_as_sent_devices_by_remote_txn) - ) - count_e2e_one_time_keys = EndToEndKeyStore.__dict__["count_e2e_one_time_keys"] - def stream_positions(self): result = super(SlavedDeviceStore, self).stream_positions() result["device_lists"] = self._device_list_id_gen.get_current_token() @@ -58,14 +46,23 @@ class SlavedDeviceStore(BaseSlavedStore): if stream_name == "device_lists": self._device_list_id_gen.advance(token) for row in rows: - self._device_list_stream_cache.entity_has_changed( - row.user_id, token + self._invalidate_caches_for_devices( + token, row.user_id, row.destination, ) - - if row.destination: - self._device_list_federation_stream_cache.entity_has_changed( - row.destination, token - ) return super(SlavedDeviceStore, self).process_replication_rows( stream_name, token, rows ) + + def _invalidate_caches_for_devices(self, token, user_id, destination): + self._device_list_stream_cache.entity_has_changed( + user_id, token + ) + + if destination: + self._device_list_federation_stream_cache.entity_has_changed( + destination, token + ) + + self._get_cached_devices_for_user.invalidate((user_id,)) + self._get_cached_user_device.invalidate_many((user_id,)) + self.get_device_list_last_stream_id_for_remote.invalidate((user_id,)) diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index f0200c1e98..45fc913c52 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -20,7 +20,7 @@ from ._slaved_id_tracker import SlavedIdTracker from .events import SlavedEventStore -class SlavedPushRuleStore(PushRulesWorkerStore, SlavedEventStore): +class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore): def __init__(self, db_conn, hs): self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id", diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index e06b0bc56d..e6a42a53bb 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -19,14 +19,174 @@ from canonicaljson import json from twisted.internet import defer +from synapse.storage._base import SQLBaseStore +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.expiringcache import ExpiringCache -from .background_updates import BackgroundUpdateStore - logger = logging.getLogger(__name__) -class DeviceInboxStore(BackgroundUpdateStore): +class DeviceInboxWorkerStore(SQLBaseStore): + def get_to_device_stream_token(self): + return self._device_inbox_id_gen.get_current_token() + + def get_new_messages_for_device( + self, user_id, device_id, last_stream_id, current_stream_id, limit=100 + ): + """ + Args: + user_id(str): The recipient user_id. + device_id(str): The recipient device_id. + current_stream_id(int): The current position of the to device + message stream. + Returns: + Deferred ([dict], int): List of messages for the device and where + in the stream the messages got to. + """ + has_changed = self._device_inbox_stream_cache.has_entity_changed( + user_id, last_stream_id + ) + if not has_changed: + return defer.succeed(([], current_stream_id)) + + def get_new_messages_for_device_txn(txn): + sql = ( + "SELECT stream_id, message_json FROM device_inbox" + " WHERE user_id = ? AND device_id = ?" + " AND ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + " LIMIT ?" + ) + txn.execute(sql, ( + user_id, device_id, last_stream_id, current_stream_id, limit + )) + messages = [] + for row in txn: + stream_pos = row[0] + messages.append(json.loads(row[1])) + if len(messages) < limit: + stream_pos = current_stream_id + return (messages, stream_pos) + + return self.runInteraction( + "get_new_messages_for_device", get_new_messages_for_device_txn, + ) + + @defer.inlineCallbacks + def delete_messages_for_device(self, user_id, device_id, up_to_stream_id): + """ + Args: + user_id(str): The recipient user_id. + device_id(str): The recipient device_id. + up_to_stream_id(int): Where to delete messages up to. + Returns: + A deferred that resolves to the number of messages deleted. + """ + # If we have cached the last stream id we've deleted up to, we can + # check if there is likely to be anything that needs deleting + last_deleted_stream_id = self._last_device_delete_cache.get( + (user_id, device_id), None + ) + if last_deleted_stream_id: + has_changed = self._device_inbox_stream_cache.has_entity_changed( + user_id, last_deleted_stream_id + ) + if not has_changed: + defer.returnValue(0) + + def delete_messages_for_device_txn(txn): + sql = ( + "DELETE FROM device_inbox" + " WHERE user_id = ? AND device_id = ?" + " AND stream_id <= ?" + ) + txn.execute(sql, (user_id, device_id, up_to_stream_id)) + return txn.rowcount + + count = yield self.runInteraction( + "delete_messages_for_device", delete_messages_for_device_txn + ) + + # Update the cache, ensuring that we only ever increase the value + last_deleted_stream_id = self._last_device_delete_cache.get( + (user_id, device_id), 0 + ) + self._last_device_delete_cache[(user_id, device_id)] = max( + last_deleted_stream_id, up_to_stream_id + ) + + defer.returnValue(count) + + def get_new_device_msgs_for_remote( + self, destination, last_stream_id, current_stream_id, limit=100 + ): + """ + Args: + destination(str): The name of the remote server. + last_stream_id(int|long): The last position of the device message stream + that the server sent up to. + current_stream_id(int|long): The current position of the device + message stream. + Returns: + Deferred ([dict], int|long): List of messages for the device and where + in the stream the messages got to. + """ + + has_changed = self._device_federation_outbox_stream_cache.has_entity_changed( + destination, last_stream_id + ) + if not has_changed or last_stream_id == current_stream_id: + return defer.succeed(([], current_stream_id)) + + def get_new_messages_for_remote_destination_txn(txn): + sql = ( + "SELECT stream_id, messages_json FROM device_federation_outbox" + " WHERE destination = ?" + " AND ? < stream_id AND stream_id <= ?" + " ORDER BY stream_id ASC" + " LIMIT ?" + ) + txn.execute(sql, ( + destination, last_stream_id, current_stream_id, limit + )) + messages = [] + for row in txn: + stream_pos = row[0] + messages.append(json.loads(row[1])) + if len(messages) < limit: + stream_pos = current_stream_id + return (messages, stream_pos) + + return self.runInteraction( + "get_new_device_msgs_for_remote", + get_new_messages_for_remote_destination_txn, + ) + + def delete_device_msgs_for_remote(self, destination, up_to_stream_id): + """Used to delete messages when the remote destination acknowledges + their receipt. + + Args: + destination(str): The destination server_name + up_to_stream_id(int): Where to delete messages up to. + Returns: + A deferred that resolves when the messages have been deleted. + """ + def delete_messages_for_remote_destination_txn(txn): + sql = ( + "DELETE FROM device_federation_outbox" + " WHERE destination = ?" + " AND stream_id <= ?" + ) + txn.execute(sql, (destination, up_to_stream_id)) + + return self.runInteraction( + "delete_device_msgs_for_remote", + delete_messages_for_remote_destination_txn + ) + + +class DeviceInboxStore(DeviceInboxWorkerStore, BackgroundUpdateStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" def __init__(self, db_conn, hs): @@ -220,93 +380,6 @@ class DeviceInboxStore(BackgroundUpdateStore): txn.executemany(sql, rows) - def get_new_messages_for_device( - self, user_id, device_id, last_stream_id, current_stream_id, limit=100 - ): - """ - Args: - user_id(str): The recipient user_id. - device_id(str): The recipient device_id. - current_stream_id(int): The current position of the to device - message stream. - Returns: - Deferred ([dict], int): List of messages for the device and where - in the stream the messages got to. - """ - has_changed = self._device_inbox_stream_cache.has_entity_changed( - user_id, last_stream_id - ) - if not has_changed: - return defer.succeed(([], current_stream_id)) - - def get_new_messages_for_device_txn(txn): - sql = ( - "SELECT stream_id, message_json FROM device_inbox" - " WHERE user_id = ? AND device_id = ?" - " AND ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - " LIMIT ?" - ) - txn.execute(sql, ( - user_id, device_id, last_stream_id, current_stream_id, limit - )) - messages = [] - for row in txn: - stream_pos = row[0] - messages.append(json.loads(row[1])) - if len(messages) < limit: - stream_pos = current_stream_id - return (messages, stream_pos) - - return self.runInteraction( - "get_new_messages_for_device", get_new_messages_for_device_txn, - ) - - @defer.inlineCallbacks - def delete_messages_for_device(self, user_id, device_id, up_to_stream_id): - """ - Args: - user_id(str): The recipient user_id. - device_id(str): The recipient device_id. - up_to_stream_id(int): Where to delete messages up to. - Returns: - A deferred that resolves to the number of messages deleted. - """ - # If we have cached the last stream id we've deleted up to, we can - # check if there is likely to be anything that needs deleting - last_deleted_stream_id = self._last_device_delete_cache.get( - (user_id, device_id), None - ) - if last_deleted_stream_id: - has_changed = self._device_inbox_stream_cache.has_entity_changed( - user_id, last_deleted_stream_id - ) - if not has_changed: - defer.returnValue(0) - - def delete_messages_for_device_txn(txn): - sql = ( - "DELETE FROM device_inbox" - " WHERE user_id = ? AND device_id = ?" - " AND stream_id <= ?" - ) - txn.execute(sql, (user_id, device_id, up_to_stream_id)) - return txn.rowcount - - count = yield self.runInteraction( - "delete_messages_for_device", delete_messages_for_device_txn - ) - - # Update the cache, ensuring that we only ever increase the value - last_deleted_stream_id = self._last_device_delete_cache.get( - (user_id, device_id), 0 - ) - self._last_device_delete_cache[(user_id, device_id)] = max( - last_deleted_stream_id, up_to_stream_id - ) - - defer.returnValue(count) - def get_all_new_device_messages(self, last_pos, current_pos, limit): """ Args: @@ -351,77 +424,6 @@ class DeviceInboxStore(BackgroundUpdateStore): "get_all_new_device_messages", get_all_new_device_messages_txn ) - def get_to_device_stream_token(self): - return self._device_inbox_id_gen.get_current_token() - - def get_new_device_msgs_for_remote( - self, destination, last_stream_id, current_stream_id, limit=100 - ): - """ - Args: - destination(str): The name of the remote server. - last_stream_id(int|long): The last position of the device message stream - that the server sent up to. - current_stream_id(int|long): The current position of the device - message stream. - Returns: - Deferred ([dict], int|long): List of messages for the device and where - in the stream the messages got to. - """ - - has_changed = self._device_federation_outbox_stream_cache.has_entity_changed( - destination, last_stream_id - ) - if not has_changed or last_stream_id == current_stream_id: - return defer.succeed(([], current_stream_id)) - - def get_new_messages_for_remote_destination_txn(txn): - sql = ( - "SELECT stream_id, messages_json FROM device_federation_outbox" - " WHERE destination = ?" - " AND ? < stream_id AND stream_id <= ?" - " ORDER BY stream_id ASC" - " LIMIT ?" - ) - txn.execute(sql, ( - destination, last_stream_id, current_stream_id, limit - )) - messages = [] - for row in txn: - stream_pos = row[0] - messages.append(json.loads(row[1])) - if len(messages) < limit: - stream_pos = current_stream_id - return (messages, stream_pos) - - return self.runInteraction( - "get_new_device_msgs_for_remote", - get_new_messages_for_remote_destination_txn, - ) - - def delete_device_msgs_for_remote(self, destination, up_to_stream_id): - """Used to delete messages when the remote destination acknowledges - their receipt. - - Args: - destination(str): The destination server_name - up_to_stream_id(int): Where to delete messages up to. - Returns: - A deferred that resolves when the messages have been deleted. - """ - def delete_messages_for_remote_destination_txn(txn): - sql = ( - "DELETE FROM device_federation_outbox" - " WHERE destination = ?" - " AND stream_id <= ?" - ) - txn.execute(sql, (destination, up_to_stream_id)) - - return self.runInteraction( - "delete_device_msgs_for_remote", - delete_messages_for_remote_destination_txn - ) - @defer.inlineCallbacks def _background_drop_index_device_inbox(self, progress, batch_size): def reindex_txn(conn): diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index ecdab34e7d..e716dc1437 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -22,11 +22,10 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage._base import Cache, SQLBaseStore, db_to_json from synapse.storage.background_updates import BackgroundUpdateStore from synapse.util.caches.descriptors import cached, cachedInlineCallbacks, cachedList -from ._base import Cache, db_to_json - logger = logging.getLogger(__name__) DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( @@ -34,93 +33,7 @@ DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES = ( ) -class DeviceStore(BackgroundUpdateStore): - def __init__(self, db_conn, hs): - super(DeviceStore, self).__init__(db_conn, hs) - - # Map of (user_id, device_id) -> bool. If there is an entry that implies - # the device exists. - self.device_id_exists_cache = Cache( - name="device_id_exists", - keylen=2, - max_entries=10000, - ) - - self._clock.looping_call( - self._prune_old_outbound_device_pokes, 60 * 60 * 1000 - ) - - self.register_background_index_update( - "device_lists_stream_idx", - index_name="device_lists_stream_user_id", - table="device_lists_stream", - columns=["user_id", "device_id"], - ) - - # create a unique index on device_lists_remote_cache - self.register_background_index_update( - "device_lists_remote_cache_unique_idx", - index_name="device_lists_remote_cache_unique_id", - table="device_lists_remote_cache", - columns=["user_id", "device_id"], - unique=True, - ) - - # And one on device_lists_remote_extremeties - self.register_background_index_update( - "device_lists_remote_extremeties_unique_idx", - index_name="device_lists_remote_extremeties_unique_idx", - table="device_lists_remote_extremeties", - columns=["user_id"], - unique=True, - ) - - # once they complete, we can remove the old non-unique indexes. - self.register_background_update_handler( - DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES, - self._drop_device_list_streams_non_unique_indexes, - ) - - @defer.inlineCallbacks - def store_device(self, user_id, device_id, - initial_device_display_name): - """Ensure the given device is known; add it to the store if not - - Args: - user_id (str): id of user associated with the device - device_id (str): id of device - initial_device_display_name (str): initial displayname of the - device. Ignored if device exists. - Returns: - defer.Deferred: boolean whether the device was inserted or an - existing device existed with that ID. - """ - key = (user_id, device_id) - if self.device_id_exists_cache.get(key, None): - defer.returnValue(False) - - try: - inserted = yield self._simple_insert( - "devices", - values={ - "user_id": user_id, - "device_id": device_id, - "display_name": initial_device_display_name - }, - desc="store_device", - or_ignore=True, - ) - self.device_id_exists_cache.prefill(key, True) - defer.returnValue(inserted) - except Exception as e: - logger.error("store_device with device_id=%s(%r) user_id=%s(%r)" - " display_name=%s(%r) failed: %s", - type(device_id).__name__, device_id, - type(user_id).__name__, user_id, - type(initial_device_display_name).__name__, - initial_device_display_name, e) - raise StoreError(500, "Problem storing device.") - +class DeviceWorkerStore(SQLBaseStore): def get_device(self, user_id, device_id): """Retrieve a device. @@ -139,69 +52,6 @@ class DeviceStore(BackgroundUpdateStore): desc="get_device", ) - @defer.inlineCallbacks - def delete_device(self, user_id, device_id): - """Delete a device. - - Args: - user_id (str): The ID of the user which owns the device - device_id (str): The ID of the device to delete - Returns: - defer.Deferred - """ - yield self._simple_delete_one( - table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, - desc="delete_device", - ) - - self.device_id_exists_cache.invalidate((user_id, device_id)) - - @defer.inlineCallbacks - def delete_devices(self, user_id, device_ids): - """Deletes several devices. - - Args: - user_id (str): The ID of the user which owns the devices - device_ids (list): The IDs of the devices to delete - Returns: - defer.Deferred - """ - yield self._simple_delete_many( - table="devices", - column="device_id", - iterable=device_ids, - keyvalues={"user_id": user_id}, - desc="delete_devices", - ) - for device_id in device_ids: - self.device_id_exists_cache.invalidate((user_id, device_id)) - - def update_device(self, user_id, device_id, new_display_name=None): - """Update a device. - - Args: - user_id (str): The ID of the user which owns the device - device_id (str): The ID of the device to update - new_display_name (str|None): new displayname for device; None - to leave unchanged - Raises: - StoreError: if the device is not found - Returns: - defer.Deferred - """ - updates = {} - if new_display_name is not None: - updates["display_name"] = new_display_name - if not updates: - return defer.succeed(None) - return self._simple_update_one( - table="devices", - keyvalues={"user_id": user_id, "device_id": device_id}, - updatevalues=updates, - desc="update_device", - ) - @defer.inlineCallbacks def get_devices_by_user(self, user_id): """Retrieve all of a user's registered devices. @@ -222,449 +72,603 @@ class DeviceStore(BackgroundUpdateStore): defer.returnValue({d["device_id"]: d for d in devices}) - @cached(max_entries=10000) - def get_device_list_last_stream_id_for_remote(self, user_id): - """Get the last stream_id we got for a user. May be None if we haven't - got any information for them. + def get_devices_by_remote(self, destination, from_stream_id): + """Get stream of updates to send to remote servers + + Returns: + (int, list[dict]): current stream id and list of updates """ - return self._simple_select_one_onecol( - table="device_lists_remote_extremeties", - keyvalues={"user_id": user_id}, - retcol="stream_id", - desc="get_device_list_remote_extremity", - allow_none=True, - ) + now_stream_id = self._device_list_id_gen.get_current_token() - @cachedList(cached_method_name="get_device_list_last_stream_id_for_remote", - list_name="user_ids", inlineCallbacks=True) - def get_device_list_last_stream_id_for_remotes(self, user_ids): - rows = yield self._simple_select_many_batch( - table="device_lists_remote_extremeties", - column="user_id", - iterable=user_ids, - retcols=("user_id", "stream_id",), - desc="get_user_devices_from_cache", + has_changed = self._device_list_federation_stream_cache.has_entity_changed( + destination, int(from_stream_id) ) + if not has_changed: + return (now_stream_id, []) - results = {user_id: None for user_id in user_ids} - results.update({ - row["user_id"]: row["stream_id"] for row in rows - }) - - defer.returnValue(results) + return self.runInteraction( + "get_devices_by_remote", self._get_devices_by_remote_txn, + destination, from_stream_id, now_stream_id, + ) - @defer.inlineCallbacks - def mark_remote_user_device_list_as_unsubscribed(self, user_id): - """Mark that we no longer track device lists for remote user. + def _get_devices_by_remote_txn(self, txn, destination, from_stream_id, + now_stream_id): + sql = """ + SELECT user_id, device_id, max(stream_id) FROM device_lists_outbound_pokes + WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? + GROUP BY user_id, device_id + LIMIT 20 """ - yield self._simple_delete( - table="device_lists_remote_extremeties", - keyvalues={ - "user_id": user_id, - }, - desc="mark_remote_user_device_list_as_unsubscribed", + txn.execute( + sql, (destination, from_stream_id, now_stream_id, False) ) - self.get_device_list_last_stream_id_for_remote.invalidate((user_id,)) - def update_remote_device_list_cache_entry(self, user_id, device_id, content, - stream_id): - """Updates a single device in the cache of a remote user's devicelist. + # maps (user_id, device_id) -> stream_id + query_map = {(r[0], r[1]): r[2] for r in txn} + if not query_map: + return (now_stream_id, []) - Note: assumes that we are the only thread that can be updating this user's - device list. + if len(query_map) >= 20: + now_stream_id = max(stream_id for stream_id in itervalues(query_map)) - Args: - user_id (str): User to update device list for - device_id (str): ID of decivice being updated - content (dict): new data on this device - stream_id (int): the version of the device list + devices = self._get_e2e_device_keys_txn( + txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True + ) - Returns: - Deferred[None] + prev_sent_id_sql = """ + SELECT coalesce(max(stream_id), 0) as stream_id + FROM device_lists_outbound_last_success + WHERE destination = ? AND user_id = ? AND stream_id <= ? """ - return self.runInteraction( - "update_remote_device_list_cache_entry", - self._update_remote_device_list_cache_entry_txn, - user_id, device_id, content, stream_id, - ) - def _update_remote_device_list_cache_entry_txn(self, txn, user_id, device_id, - content, stream_id): - if content.get("deleted"): - self._simple_delete_txn( - txn, - table="device_lists_remote_cache", - keyvalues={ + results = [] + for user_id, user_devices in iteritems(devices): + # The prev_id for the first row is always the last row before + # `from_stream_id` + txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) + rows = txn.fetchall() + prev_id = rows[0][0] + for device_id, device in iteritems(user_devices): + stream_id = query_map[(user_id, device_id)] + result = { "user_id": user_id, "device_id": device_id, - }, - ) + "prev_id": [prev_id] if prev_id else [], + "stream_id": stream_id, + } - txn.call_after( - self.device_id_exists_cache.invalidate, (user_id, device_id,) - ) - else: - self._simple_upsert_txn( - txn, - table="device_lists_remote_cache", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - values={ - "content": json.dumps(content), - }, + prev_id = stream_id - # we don't need to lock, because we assume we are the only thread - # updating this user's devices. - lock=False, - ) + if device is not None: + key_json = device.get("key_json", None) + if key_json: + result["keys"] = db_to_json(key_json) + device_display_name = device.get("device_display_name", None) + if device_display_name: + result["device_display_name"] = device_display_name + else: + result["deleted"] = True - txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id,)) - txn.call_after(self._get_cached_devices_for_user.invalidate, (user_id,)) - txn.call_after( - self.get_device_list_last_stream_id_for_remote.invalidate, (user_id,) + results.append(result) + + return (now_stream_id, results) + + def mark_as_sent_devices_by_remote(self, destination, stream_id): + """Mark that updates have successfully been sent to the destination. + """ + return self.runInteraction( + "mark_as_sent_devices_by_remote", self._mark_as_sent_devices_by_remote_txn, + destination, stream_id, ) - self._simple_upsert_txn( - txn, - table="device_lists_remote_extremeties", - keyvalues={ - "user_id": user_id, - }, - values={ - "stream_id": stream_id, - }, + def _mark_as_sent_devices_by_remote_txn(self, txn, destination, stream_id): + # We update the device_lists_outbound_last_success with the successfully + # poked users. We do the join to see which users need to be inserted and + # which updated. + sql = """ + SELECT user_id, coalesce(max(o.stream_id), 0), (max(s.stream_id) IS NOT NULL) + FROM device_lists_outbound_pokes as o + LEFT JOIN device_lists_outbound_last_success as s + USING (destination, user_id) + WHERE destination = ? AND o.stream_id <= ? + GROUP BY user_id + """ + txn.execute(sql, (destination, stream_id,)) + rows = txn.fetchall() - # again, we can assume we are the only thread updating this user's - # extremity. - lock=False, + sql = """ + UPDATE device_lists_outbound_last_success + SET stream_id = ? + WHERE destination = ? AND user_id = ? + """ + txn.executemany( + sql, ((row[1], destination, row[0],) for row in rows if row[2]) ) - def update_remote_device_list_cache(self, user_id, devices, stream_id): - """Replace the entire cache of the remote user's devices. + sql = """ + INSERT INTO device_lists_outbound_last_success + (destination, user_id, stream_id) VALUES (?, ?, ?) + """ + txn.executemany( + sql, ((destination, row[0], row[1],) for row in rows if not row[2]) + ) - Note: assumes that we are the only thread that can be updating this user's - device list. + # Delete all sent outbound pokes + sql = """ + DELETE FROM device_lists_outbound_pokes + WHERE destination = ? AND stream_id <= ? + """ + txn.execute(sql, (destination, stream_id,)) + + def get_device_stream_token(self): + return self._device_list_id_gen.get_current_token() + + @defer.inlineCallbacks + def get_user_devices_from_cache(self, query_list): + """Get the devices (and keys if any) for remote users from the cache. Args: - user_id (str): User to update device list for - devices (list[dict]): list of device objects supplied over federation - stream_id (int): the version of the device list + query_list(list): List of (user_id, device_ids), if device_ids is + falsey then return all device ids for that user. Returns: - Deferred[None] + (user_ids_not_in_cache, results_map), where user_ids_not_in_cache is + a set of user_ids and results_map is a mapping of + user_id -> device_id -> device_info """ - return self.runInteraction( - "update_remote_device_list_cache", - self._update_remote_device_list_cache_txn, - user_id, devices, stream_id, + user_ids = set(user_id for user_id, _ in query_list) + user_map = yield self.get_device_list_last_stream_id_for_remotes(list(user_ids)) + user_ids_in_cache = set( + user_id for user_id, stream_id in user_map.items() if stream_id ) + user_ids_not_in_cache = user_ids - user_ids_in_cache - def _update_remote_device_list_cache_txn(self, txn, user_id, devices, - stream_id): - self._simple_delete_txn( - txn, + results = {} + for user_id, device_id in query_list: + if user_id not in user_ids_in_cache: + continue + + if device_id: + device = yield self._get_cached_user_device(user_id, device_id) + results.setdefault(user_id, {})[device_id] = device + else: + results[user_id] = yield self._get_cached_devices_for_user(user_id) + + defer.returnValue((user_ids_not_in_cache, results)) + + @cachedInlineCallbacks(num_args=2, tree=True) + def _get_cached_user_device(self, user_id, device_id): + content = yield self._simple_select_one_onecol( table="device_lists_remote_cache", keyvalues={ "user_id": user_id, + "device_id": device_id, }, + retcol="content", + desc="_get_cached_user_device", ) + defer.returnValue(db_to_json(content)) - self._simple_insert_many_txn( - txn, + @cachedInlineCallbacks() + def _get_cached_devices_for_user(self, user_id): + devices = yield self._simple_select_list( table="device_lists_remote_cache", - values=[ - { - "user_id": user_id, - "device_id": content["device_id"], - "content": json.dumps(content), - } - for content in devices - ] - ) - - txn.call_after(self._get_cached_devices_for_user.invalidate, (user_id,)) - txn.call_after(self._get_cached_user_device.invalidate_many, (user_id,)) - txn.call_after( - self.get_device_list_last_stream_id_for_remote.invalidate, (user_id,) - ) - - self._simple_upsert_txn( - txn, - table="device_lists_remote_extremeties", keyvalues={ "user_id": user_id, }, - values={ - "stream_id": stream_id, - }, - - # we don't need to lock, because we can assume we are the only thread - # updating this user's extremity. - lock=False, + retcols=("device_id", "content"), + desc="_get_cached_devices_for_user", ) + defer.returnValue({ + device["device_id"]: db_to_json(device["content"]) + for device in devices + }) - def get_devices_by_remote(self, destination, from_stream_id): - """Get stream of updates to send to remote servers + def get_devices_with_keys_by_user(self, user_id): + """Get all devices (with any device keys) for a user Returns: - (int, list[dict]): current stream id and list of updates + (stream_id, devices) """ + return self.runInteraction( + "get_devices_with_keys_by_user", + self._get_devices_with_keys_by_user_txn, user_id, + ) + + def _get_devices_with_keys_by_user_txn(self, txn, user_id): now_stream_id = self._device_list_id_gen.get_current_token() - has_changed = self._device_list_federation_stream_cache.has_entity_changed( - destination, int(from_stream_id) + devices = self._get_e2e_device_keys_txn( + txn, [(user_id, None)], include_all_devices=True ) - if not has_changed: - return (now_stream_id, []) - return self.runInteraction( - "get_devices_by_remote", self._get_devices_by_remote_txn, - destination, from_stream_id, now_stream_id, - ) + if devices: + user_devices = devices[user_id] + results = [] + for device_id, device in iteritems(user_devices): + result = { + "device_id": device_id, + } + + key_json = device.get("key_json", None) + if key_json: + result["keys"] = db_to_json(key_json) + device_display_name = device.get("device_display_name", None) + if device_display_name: + result["device_display_name"] = device_display_name + + results.append(result) + + return now_stream_id, results + + return now_stream_id, [] + + @defer.inlineCallbacks + def get_user_whose_devices_changed(self, from_key): + """Get set of users whose devices have changed since `from_key`. + """ + from_key = int(from_key) + changed = self._device_list_stream_cache.get_all_entities_changed(from_key) + if changed is not None: + defer.returnValue(set(changed)) - def _get_devices_by_remote_txn(self, txn, destination, from_stream_id, - now_stream_id): sql = """ - SELECT user_id, device_id, max(stream_id) FROM device_lists_outbound_pokes - WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? - GROUP BY user_id, device_id - LIMIT 20 + SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? """ - txn.execute( - sql, (destination, from_stream_id, now_stream_id, False) + rows = yield self._execute("get_user_whose_devices_changed", None, sql, from_key) + defer.returnValue(set(row[0] for row in rows)) + + def get_all_device_list_changes_for_remotes(self, from_key, to_key): + """Return a list of `(stream_id, user_id, destination)` which is the + combined list of changes to devices, and which destinations need to be + poked. `destination` may be None if no destinations need to be poked. + """ + # We do a group by here as there can be a large number of duplicate + # entries, since we throw away device IDs. + sql = """ + SELECT MAX(stream_id) AS stream_id, user_id, destination + FROM device_lists_stream + LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) + WHERE ? < stream_id AND stream_id <= ? + GROUP BY user_id, destination + """ + return self._execute( + "get_all_device_list_changes_for_remotes", None, + sql, from_key, to_key ) - # maps (user_id, device_id) -> stream_id - query_map = {(r[0], r[1]): r[2] for r in txn} - if not query_map: - return (now_stream_id, []) + @cached(max_entries=10000) + def get_device_list_last_stream_id_for_remote(self, user_id): + """Get the last stream_id we got for a user. May be None if we haven't + got any information for them. + """ + return self._simple_select_one_onecol( + table="device_lists_remote_extremeties", + keyvalues={"user_id": user_id}, + retcol="stream_id", + desc="get_device_list_last_stream_id_for_remote", + allow_none=True, + ) - if len(query_map) >= 20: - now_stream_id = max(stream_id for stream_id in itervalues(query_map)) + @cachedList(cached_method_name="get_device_list_last_stream_id_for_remote", + list_name="user_ids", inlineCallbacks=True) + def get_device_list_last_stream_id_for_remotes(self, user_ids): + rows = yield self._simple_select_many_batch( + table="device_lists_remote_extremeties", + column="user_id", + iterable=user_ids, + retcols=("user_id", "stream_id",), + desc="get_device_list_last_stream_id_for_remotes", + ) - devices = self._get_e2e_device_keys_txn( - txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True + results = {user_id: None for user_id in user_ids} + results.update({ + row["user_id"]: row["stream_id"] for row in rows + }) + + defer.returnValue(results) + + +class DeviceStore(DeviceWorkerStore, BackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(DeviceStore, self).__init__(db_conn, hs) + + # Map of (user_id, device_id) -> bool. If there is an entry that implies + # the device exists. + self.device_id_exists_cache = Cache( + name="device_id_exists", + keylen=2, + max_entries=10000, ) - prev_sent_id_sql = """ - SELECT coalesce(max(stream_id), 0) as stream_id - FROM device_lists_outbound_last_success - WHERE destination = ? AND user_id = ? AND stream_id <= ? + self._clock.looping_call( + self._prune_old_outbound_device_pokes, 60 * 60 * 1000 + ) + + self.register_background_index_update( + "device_lists_stream_idx", + index_name="device_lists_stream_user_id", + table="device_lists_stream", + columns=["user_id", "device_id"], + ) + + # create a unique index on device_lists_remote_cache + self.register_background_index_update( + "device_lists_remote_cache_unique_idx", + index_name="device_lists_remote_cache_unique_id", + table="device_lists_remote_cache", + columns=["user_id", "device_id"], + unique=True, + ) + + # And one on device_lists_remote_extremeties + self.register_background_index_update( + "device_lists_remote_extremeties_unique_idx", + index_name="device_lists_remote_extremeties_unique_idx", + table="device_lists_remote_extremeties", + columns=["user_id"], + unique=True, + ) + + # once they complete, we can remove the old non-unique indexes. + self.register_background_update_handler( + DROP_DEVICE_LIST_STREAMS_NON_UNIQUE_INDEXES, + self._drop_device_list_streams_non_unique_indexes, + ) + + @defer.inlineCallbacks + def store_device(self, user_id, device_id, + initial_device_display_name): + """Ensure the given device is known; add it to the store if not + + Args: + user_id (str): id of user associated with the device + device_id (str): id of device + initial_device_display_name (str): initial displayname of the + device. Ignored if device exists. + Returns: + defer.Deferred: boolean whether the device was inserted or an + existing device existed with that ID. """ + key = (user_id, device_id) + if self.device_id_exists_cache.get(key, None): + defer.returnValue(False) - results = [] - for user_id, user_devices in iteritems(devices): - # The prev_id for the first row is always the last row before - # `from_stream_id` - txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) - rows = txn.fetchall() - prev_id = rows[0][0] - for device_id, device in iteritems(user_devices): - stream_id = query_map[(user_id, device_id)] - result = { + try: + inserted = yield self._simple_insert( + "devices", + values={ "user_id": user_id, "device_id": device_id, - "prev_id": [prev_id] if prev_id else [], - "stream_id": stream_id, - } - - prev_id = stream_id + "display_name": initial_device_display_name + }, + desc="store_device", + or_ignore=True, + ) + self.device_id_exists_cache.prefill(key, True) + defer.returnValue(inserted) + except Exception as e: + logger.error("store_device with device_id=%s(%r) user_id=%s(%r)" + " display_name=%s(%r) failed: %s", + type(device_id).__name__, device_id, + type(user_id).__name__, user_id, + type(initial_device_display_name).__name__, + initial_device_display_name, e) + raise StoreError(500, "Problem storing device.") - if device is not None: - key_json = device.get("key_json", None) - if key_json: - result["keys"] = db_to_json(key_json) - device_display_name = device.get("device_display_name", None) - if device_display_name: - result["device_display_name"] = device_display_name - else: - result["deleted"] = True + @defer.inlineCallbacks + def delete_device(self, user_id, device_id): + """Delete a device. - results.append(result) + Args: + user_id (str): The ID of the user which owns the device + device_id (str): The ID of the device to delete + Returns: + defer.Deferred + """ + yield self._simple_delete_one( + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + desc="delete_device", + ) - return (now_stream_id, results) + self.device_id_exists_cache.invalidate((user_id, device_id)) @defer.inlineCallbacks - def get_user_devices_from_cache(self, query_list): - """Get the devices (and keys if any) for remote users from the cache. + def delete_devices(self, user_id, device_ids): + """Deletes several devices. Args: - query_list(list): List of (user_id, device_ids), if device_ids is - falsey then return all device ids for that user. + user_id (str): The ID of the user which owns the devices + device_ids (list): The IDs of the devices to delete + Returns: + defer.Deferred + """ + yield self._simple_delete_many( + table="devices", + column="device_id", + iterable=device_ids, + keyvalues={"user_id": user_id}, + desc="delete_devices", + ) + for device_id in device_ids: + self.device_id_exists_cache.invalidate((user_id, device_id)) + def update_device(self, user_id, device_id, new_display_name=None): + """Update a device. + + Args: + user_id (str): The ID of the user which owns the device + device_id (str): The ID of the device to update + new_display_name (str|None): new displayname for device; None + to leave unchanged + Raises: + StoreError: if the device is not found Returns: - (user_ids_not_in_cache, results_map), where user_ids_not_in_cache is - a set of user_ids and results_map is a mapping of - user_id -> device_id -> device_info + defer.Deferred """ - user_ids = set(user_id for user_id, _ in query_list) - user_map = yield self.get_device_list_last_stream_id_for_remotes(list(user_ids)) - user_ids_in_cache = set( - user_id for user_id, stream_id in user_map.items() if stream_id + updates = {} + if new_display_name is not None: + updates["display_name"] = new_display_name + if not updates: + return defer.succeed(None) + return self._simple_update_one( + table="devices", + keyvalues={"user_id": user_id, "device_id": device_id}, + updatevalues=updates, + desc="update_device", ) - user_ids_not_in_cache = user_ids - user_ids_in_cache - - results = {} - for user_id, device_id in query_list: - if user_id not in user_ids_in_cache: - continue - - if device_id: - device = yield self._get_cached_user_device(user_id, device_id) - results.setdefault(user_id, {})[device_id] = device - else: - results[user_id] = yield self._get_cached_devices_for_user(user_id) - - defer.returnValue((user_ids_not_in_cache, results)) - @cachedInlineCallbacks(num_args=2, tree=True) - def _get_cached_user_device(self, user_id, device_id): - content = yield self._simple_select_one_onecol( - table="device_lists_remote_cache", + @defer.inlineCallbacks + def mark_remote_user_device_list_as_unsubscribed(self, user_id): + """Mark that we no longer track device lists for remote user. + """ + yield self._simple_delete( + table="device_lists_remote_extremeties", keyvalues={ "user_id": user_id, - "device_id": device_id, }, - retcol="content", - desc="_get_cached_user_device", + desc="mark_remote_user_device_list_as_unsubscribed", ) - defer.returnValue(db_to_json(content)) + self.get_device_list_last_stream_id_for_remote.invalidate((user_id,)) - @cachedInlineCallbacks() - def _get_cached_devices_for_user(self, user_id): - devices = yield self._simple_select_list( - table="device_lists_remote_cache", - keyvalues={ - "user_id": user_id, - }, - retcols=("device_id", "content"), - desc="_get_cached_devices_for_user", - ) - defer.returnValue({ - device["device_id"]: db_to_json(device["content"]) - for device in devices - }) + def update_remote_device_list_cache_entry(self, user_id, device_id, content, + stream_id): + """Updates a single device in the cache of a remote user's devicelist. - def get_devices_with_keys_by_user(self, user_id): - """Get all devices (with any device keys) for a user + Note: assumes that we are the only thread that can be updating this user's + device list. + + Args: + user_id (str): User to update device list for + device_id (str): ID of decivice being updated + content (dict): new data on this device + stream_id (int): the version of the device list Returns: - (stream_id, devices) + Deferred[None] """ return self.runInteraction( - "get_devices_with_keys_by_user", - self._get_devices_with_keys_by_user_txn, user_id, + "update_remote_device_list_cache_entry", + self._update_remote_device_list_cache_entry_txn, + user_id, device_id, content, stream_id, ) - def _get_devices_with_keys_by_user_txn(self, txn, user_id): - now_stream_id = self._device_list_id_gen.get_current_token() + def _update_remote_device_list_cache_entry_txn(self, txn, user_id, device_id, + content, stream_id): + if content.get("deleted"): + self._simple_delete_txn( + txn, + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + ) - devices = self._get_e2e_device_keys_txn( - txn, [(user_id, None)], include_all_devices=True + txn.call_after( + self.device_id_exists_cache.invalidate, (user_id, device_id,) + ) + else: + self._simple_upsert_txn( + txn, + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={ + "content": json.dumps(content), + }, + + # we don't need to lock, because we assume we are the only thread + # updating this user's devices. + lock=False, + ) + + txn.call_after(self._get_cached_user_device.invalidate, (user_id, device_id,)) + txn.call_after(self._get_cached_devices_for_user.invalidate, (user_id,)) + txn.call_after( + self.get_device_list_last_stream_id_for_remote.invalidate, (user_id,) ) - if devices: - user_devices = devices[user_id] - results = [] - for device_id, device in iteritems(user_devices): - result = { - "device_id": device_id, - } + self._simple_upsert_txn( + txn, + table="device_lists_remote_extremeties", + keyvalues={ + "user_id": user_id, + }, + values={ + "stream_id": stream_id, + }, - key_json = device.get("key_json", None) - if key_json: - result["keys"] = db_to_json(key_json) - device_display_name = device.get("device_display_name", None) - if device_display_name: - result["device_display_name"] = device_display_name + # again, we can assume we are the only thread updating this user's + # extremity. + lock=False, + ) - results.append(result) + def update_remote_device_list_cache(self, user_id, devices, stream_id): + """Replace the entire cache of the remote user's devices. - return now_stream_id, results + Note: assumes that we are the only thread that can be updating this user's + device list. - return now_stream_id, [] + Args: + user_id (str): User to update device list for + devices (list[dict]): list of device objects supplied over federation + stream_id (int): the version of the device list - def mark_as_sent_devices_by_remote(self, destination, stream_id): - """Mark that updates have successfully been sent to the destination. + Returns: + Deferred[None] """ return self.runInteraction( - "mark_as_sent_devices_by_remote", self._mark_as_sent_devices_by_remote_txn, - destination, stream_id, + "update_remote_device_list_cache", + self._update_remote_device_list_cache_txn, + user_id, devices, stream_id, ) - def _mark_as_sent_devices_by_remote_txn(self, txn, destination, stream_id): - # We update the device_lists_outbound_last_success with the successfully - # poked users. We do the join to see which users need to be inserted and - # which updated. - sql = """ - SELECT user_id, coalesce(max(o.stream_id), 0), (max(s.stream_id) IS NOT NULL) - FROM device_lists_outbound_pokes as o - LEFT JOIN device_lists_outbound_last_success as s - USING (destination, user_id) - WHERE destination = ? AND o.stream_id <= ? - GROUP BY user_id - """ - txn.execute(sql, (destination, stream_id,)) - rows = txn.fetchall() - - sql = """ - UPDATE device_lists_outbound_last_success - SET stream_id = ? - WHERE destination = ? AND user_id = ? - """ - txn.executemany( - sql, ((row[1], destination, row[0],) for row in rows if row[2]) + def _update_remote_device_list_cache_txn(self, txn, user_id, devices, + stream_id): + self._simple_delete_txn( + txn, + table="device_lists_remote_cache", + keyvalues={ + "user_id": user_id, + }, ) - sql = """ - INSERT INTO device_lists_outbound_last_success - (destination, user_id, stream_id) VALUES (?, ?, ?) - """ - txn.executemany( - sql, ((destination, row[0], row[1],) for row in rows if not row[2]) + self._simple_insert_many_txn( + txn, + table="device_lists_remote_cache", + values=[ + { + "user_id": user_id, + "device_id": content["device_id"], + "content": json.dumps(content), + } + for content in devices + ] ) - # Delete all sent outbound pokes - sql = """ - DELETE FROM device_lists_outbound_pokes - WHERE destination = ? AND stream_id <= ? - """ - txn.execute(sql, (destination, stream_id,)) - - @defer.inlineCallbacks - def get_user_whose_devices_changed(self, from_key): - """Get set of users whose devices have changed since `from_key`. - """ - from_key = int(from_key) - changed = self._device_list_stream_cache.get_all_entities_changed(from_key) - if changed is not None: - defer.returnValue(set(changed)) + txn.call_after(self._get_cached_devices_for_user.invalidate, (user_id,)) + txn.call_after(self._get_cached_user_device.invalidate_many, (user_id,)) + txn.call_after( + self.get_device_list_last_stream_id_for_remote.invalidate, (user_id,) + ) - sql = """ - SELECT DISTINCT user_id FROM device_lists_stream WHERE stream_id > ? - """ - rows = yield self._execute("get_user_whose_devices_changed", None, sql, from_key) - defer.returnValue(set(row[0] for row in rows)) + self._simple_upsert_txn( + txn, + table="device_lists_remote_extremeties", + keyvalues={ + "user_id": user_id, + }, + values={ + "stream_id": stream_id, + }, - def get_all_device_list_changes_for_remotes(self, from_key, to_key): - """Return a list of `(stream_id, user_id, destination)` which is the - combined list of changes to devices, and which destinations need to be - poked. `destination` may be None if no destinations need to be poked. - """ - # We do a group by here as there can be a large number of duplicate - # entries, since we throw away device IDs. - sql = """ - SELECT MAX(stream_id) AS stream_id, user_id, destination - FROM device_lists_stream - LEFT JOIN device_lists_outbound_pokes USING (stream_id, user_id, device_id) - WHERE ? < stream_id AND stream_id <= ? - GROUP BY user_id, destination - """ - return self._execute( - "get_all_device_list_changes_for_remotes", None, - sql, from_key, to_key + # we don't need to lock, because we can assume we are the only thread + # updating this user's extremity. + lock=False, ) @defer.inlineCallbacks @@ -732,9 +736,6 @@ class DeviceStore(BackgroundUpdateStore): ] ) - def get_device_stream_token(self): - return self._device_list_id_gen.get_current_token() - def _prune_old_outbound_device_pokes(self): """Delete old entries out of the device_lists_outbound_pokes to ensure that we don't fill up due to dead servers. We keep one entry per diff --git a/synapse/storage/end_to_end_keys.py b/synapse/storage/end_to_end_keys.py index 2a0f6cfca9..e381e472a2 100644 --- a/synapse/storage/end_to_end_keys.py +++ b/synapse/storage/end_to_end_keys.py @@ -23,49 +23,7 @@ from synapse.util.caches.descriptors import cached from ._base import SQLBaseStore, db_to_json -class EndToEndKeyStore(SQLBaseStore): - def set_e2e_device_keys(self, user_id, device_id, time_now, device_keys): - """Stores device keys for a device. Returns whether there was a change - or the keys were already in the database. - """ - def _set_e2e_device_keys_txn(txn): - old_key_json = self._simple_select_one_onecol_txn( - txn, - table="e2e_device_keys_json", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - retcol="key_json", - allow_none=True, - ) - - # In py3 we need old_key_json to match new_key_json type. The DB - # returns unicode while encode_canonical_json returns bytes. - new_key_json = encode_canonical_json(device_keys).decode("utf-8") - - if old_key_json == new_key_json: - return False - - self._simple_upsert_txn( - txn, - table="e2e_device_keys_json", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - }, - values={ - "ts_added_ms": time_now, - "key_json": new_key_json, - } - ) - - return True - - return self.runInteraction( - "set_e2e_device_keys", _set_e2e_device_keys_txn - ) - +class EndToEndKeyWorkerStore(SQLBaseStore): @defer.inlineCallbacks def get_e2e_device_keys( self, query_list, include_all_devices=False, @@ -238,6 +196,50 @@ class EndToEndKeyStore(SQLBaseStore): "count_e2e_one_time_keys", _count_e2e_one_time_keys ) + +class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): + def set_e2e_device_keys(self, user_id, device_id, time_now, device_keys): + """Stores device keys for a device. Returns whether there was a change + or the keys were already in the database. + """ + def _set_e2e_device_keys_txn(txn): + old_key_json = self._simple_select_one_onecol_txn( + txn, + table="e2e_device_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + retcol="key_json", + allow_none=True, + ) + + # In py3 we need old_key_json to match new_key_json type. The DB + # returns unicode while encode_canonical_json returns bytes. + new_key_json = encode_canonical_json(device_keys).decode("utf-8") + + if old_key_json == new_key_json: + return False + + self._simple_upsert_txn( + txn, + table="e2e_device_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={ + "ts_added_ms": time_now, + "key_json": new_key_json, + } + ) + + return True + + return self.runInteraction( + "set_e2e_device_keys", _set_e2e_device_keys_txn + ) + def claim_e2e_one_time_keys(self, query_list): """Take a list of one time keys out of the database""" def _claim_e2e_one_time_keys(txn): -- cgit 1.5.1 From b29693a30b4def57c9c38be4c6f5ff2f9a6e9db9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 4 Mar 2019 18:11:26 +0000 Subject: Clean up read-receipt handling. Remove a call to run_as_background_process: there is no need to run this as a background process, because build_and_send_edu does not block. We may as well inline the whole of _push_remotes. --- changelog.d/4797.misc | 1 + synapse/handlers/receipts.py | 103 ++++++++++++++++++------------------------- 2 files changed, 45 insertions(+), 59 deletions(-) create mode 100644 changelog.d/4797.misc (limited to 'synapse') diff --git a/changelog.d/4797.misc b/changelog.d/4797.misc new file mode 100644 index 0000000000..822e98e6a7 --- /dev/null +++ b/changelog.d/4797.misc @@ -0,0 +1 @@ +Clean up read-receipt handling. diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 8b2d03a756..1728089667 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,7 +16,6 @@ import logging from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process from synapse.types import get_domain_from_id from ._base import BaseHandler @@ -38,31 +37,6 @@ class ReceiptsHandler(BaseHandler): self.clock = self.hs.get_clock() self.state = hs.get_state_handler() - @defer.inlineCallbacks - def received_client_receipt(self, room_id, receipt_type, user_id, - event_id): - """Called when a client tells us a local user has read up to the given - event_id in the room. - """ - receipt = { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": [event_id], - "data": { - "ts": int(self.clock.time_msec()), - } - } - - is_new = yield self._handle_new_receipts([receipt]) - - if is_new: - # fire off a process in the background to send the receipt to - # remote servers - run_as_background_process( - 'push_receipts_to_remotes', self._push_remotes, receipt - ) - @defer.inlineCallbacks def _received_remote_receipt(self, origin, content): """Called when we receive an EDU of type m.receipt from a remote HS. @@ -128,43 +102,54 @@ class ReceiptsHandler(BaseHandler): defer.returnValue(True) @defer.inlineCallbacks - def _push_remotes(self, receipt): - """Given a receipt, works out which remote servers should be - poked and pokes them. + def received_client_receipt(self, room_id, receipt_type, user_id, + event_id): + """Called when a client tells us a local user has read up to the given + event_id in the room. """ - try: - # TODO: optimise this to move some of the work to the workers. - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] + receipt = { + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + "event_ids": [event_id], + "data": { + "ts": int(self.clock.time_msec()), + } + } - users = yield self.state.get_current_user_in_room(room_id) - remotedomains = set(get_domain_from_id(u) for u in users) - remotedomains = remotedomains.copy() - remotedomains.discard(self.server_name) - - logger.debug("Sending receipt to: %r", remotedomains) - - for domain in remotedomains: - self.federation.build_and_send_edu( - destination=domain, - edu_type="m.receipt", - content={ - room_id: { - receipt_type: { - user_id: { - "event_ids": event_ids, - "data": data, - } + is_new = yield self._handle_new_receipts([receipt]) + if not is_new: + return + + # Work out which remote servers should be poked and poke them. + + # TODO: optimise this to move some of the work to the workers. + data = receipt["data"] + + # XXX why does this not use state.get_current_hosts_in_room() ? + users = yield self.state.get_current_user_in_room(room_id) + remotedomains = set(get_domain_from_id(u) for u in users) + remotedomains = remotedomains.copy() + remotedomains.discard(self.server_name) + + logger.debug("Sending receipt to: %r", remotedomains) + + for domain in remotedomains: + self.federation.build_and_send_edu( + destination=domain, + edu_type="m.receipt", + content={ + room_id: { + receipt_type: { + user_id: { + "event_ids": [event_id], + "data": data, } - }, + } }, - key=(room_id, receipt_type, user_id), - ) - except Exception: - logger.exception("Error pushing receipts to remote servers") + }, + key=(room_id, receipt_type, user_id), + ) @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): -- cgit 1.5.1 From 2db49ea476ce3fdf32b14a2b376a508be2540da9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 4 Mar 2019 18:18:11 +0000 Subject: Add some debug about processing read receipts. I'm hoping to establish which rooms are having lots of RRs sent for them, and how old the events are when they are sent. --- changelog.d/4798.misc | 1 + synapse/storage/receipts.py | 26 ++++++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 changelog.d/4798.misc (limited to 'synapse') diff --git a/changelog.d/4798.misc b/changelog.d/4798.misc new file mode 100644 index 0000000000..d60f208dc3 --- /dev/null +++ b/changelog.d/4798.misc @@ -0,0 +1 @@ +Add some debug about processing read receipts. diff --git a/synapse/storage/receipts.py b/synapse/storage/receipts.py index 0ac665e967..0fd1ccc40a 100644 --- a/synapse/storage/receipts.py +++ b/synapse/storage/receipts.py @@ -346,15 +346,23 @@ class ReceiptsStore(ReceiptsWorkerStore): def insert_linearized_receipt_txn(self, txn, room_id, receipt_type, user_id, event_id, data, stream_id): + """Inserts a read-receipt into the database if it's newer than the current RR + + Returns: int|None + None if the RR is older than the current RR + otherwise, the rx timestamp of the event that the RR corresponds to + (or 0 if the event is unknown) + """ res = self._simple_select_one_txn( txn, table="events", - retcols=["topological_ordering", "stream_ordering"], + retcols=["stream_ordering", "received_ts"], keyvalues={"event_id": event_id}, allow_none=True ) stream_ordering = int(res["stream_ordering"]) if res else None + rx_ts = res["received_ts"] if res else 0 # We don't want to clobber receipts for more recent events, so we # have to compare orderings of existing receipts @@ -373,7 +381,7 @@ class ReceiptsStore(ReceiptsWorkerStore): "one for later event %s", event_id, eid, ) - return False + return None txn.call_after( self.get_receipts_for_room.invalidate, (room_id, receipt_type) @@ -429,7 +437,7 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_ordering=stream_ordering, ) - return True + return rx_ts @defer.inlineCallbacks def insert_receipt(self, room_id, receipt_type, user_id, event_ids, data): @@ -466,7 +474,7 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_id_manager = self._receipts_id_gen.get_next() with stream_id_manager as stream_id: - have_persisted = yield self.runInteraction( + event_ts = yield self.runInteraction( "insert_linearized_receipt", self.insert_linearized_receipt_txn, room_id, receipt_type, user_id, linearized_event_id, @@ -474,8 +482,14 @@ class ReceiptsStore(ReceiptsWorkerStore): stream_id=stream_id, ) - if not have_persisted: - defer.returnValue(None) + if event_ts is None: + defer.returnValue(None) + + now = self._clock.time_msec() + logger.debug( + "RR for event %s in %s (%i ms old)", + linearized_event_id, room_id, now - event_ts, + ) yield self.insert_graph_receipt( room_id, receipt_type, user_id, event_ids, data -- cgit 1.5.1 From daa10e3e66dadef3b860c31baaeded1da92430be Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 4 Mar 2019 18:27:32 +0000 Subject: Remove unused `wait_for_replication` method I guess this was used once? It's not now, anyway. --- synapse/notifier.py | 50 -------------------------------------------------- 1 file changed, 50 deletions(-) (limited to 'synapse') diff --git a/synapse/notifier.py b/synapse/notifier.py index de02b1017e..2505202e98 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -178,8 +178,6 @@ class Notifier(object): self.remove_expired_streams, self.UNUSED_STREAM_EXPIRY_MS ) - self.replication_deferred = ObservableDeferred(defer.Deferred()) - # This is not a very cheap test to perform, but it's only executed # when rendering the metrics page, which is likely once per minute at # most when scraping it. @@ -518,10 +516,6 @@ class Notifier(object): def notify_replication(self): """Notify the any replication listeners that there's a new event""" with PreserveLoggingContext(): - deferred = self.replication_deferred - self.replication_deferred = ObservableDeferred(defer.Deferred()) - deferred.callback(None) - # the callbacks may well outlast the current request, so we run # them in the sentinel logcontext. # @@ -530,47 +524,3 @@ class Notifier(object): # accordingly, but that requires more changes) for cb in self.replication_callbacks: cb() - - @defer.inlineCallbacks - def wait_for_replication(self, callback, timeout): - """Wait for an event to happen. - - Args: - callback: Gets called whenever an event happens. If this returns a - truthy value then ``wait_for_replication`` returns, otherwise - it waits for another event. - timeout: How many milliseconds to wait for callback return a truthy - value. - - Returns: - A deferred that resolves with the value returned by the callback. - """ - listener = _NotificationListener(None) - - end_time = self.clock.time_msec() + timeout - - while True: - listener.deferred = self.replication_deferred.observe() - result = yield callback() - if result: - break - - now = self.clock.time_msec() - if end_time <= now: - break - - listener.deferred = timeout_deferred( - listener.deferred, - timeout=(end_time - now) / 1000., - reactor=self.hs.get_reactor(), - ) - - try: - with PreserveLoggingContext(): - yield listener.deferred - except defer.TimeoutError: - break - except defer.CancelledError: - break - - defer.returnValue(result) -- cgit 1.5.1 From 157e5a8f27cfb7561721785512e69a0177ea48dd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 18:24:32 +0000 Subject: Split DeviceHandler into master and worker --- synapse/handlers/device.py | 342 +++++++++++++++++++++++---------------------- synapse/server.py | 7 +- 2 files changed, 179 insertions(+), 170 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c708c35d4d..7e48661355 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -37,13 +37,185 @@ from ._base import BaseHandler logger = logging.getLogger(__name__) -class DeviceHandler(BaseHandler): +class DeviceWorkerHandler(BaseHandler): def __init__(self, hs): - super(DeviceHandler, self).__init__(hs) + super(DeviceWorkerHandler, self).__init__(hs) self.hs = hs self.state = hs.get_state_handler() self._auth_handler = hs.get_auth_handler() + + @defer.inlineCallbacks + def get_devices_by_user(self, user_id): + """ + Retrieve the given user's devices + + Args: + user_id (str): + Returns: + defer.Deferred: list[dict[str, X]]: info on each device + """ + + device_map = yield self.store.get_devices_by_user(user_id) + + ips = yield self.store.get_last_client_ip_by_device( + user_id, device_id=None + ) + + devices = list(device_map.values()) + for device in devices: + _update_device_from_client_ips(device, ips) + + defer.returnValue(devices) + + @defer.inlineCallbacks + def get_device(self, user_id, device_id): + """ Retrieve the given device + + Args: + user_id (str): + device_id (str): + + Returns: + defer.Deferred: dict[str, X]: info on the device + Raises: + errors.NotFoundError: if the device was not found + """ + try: + device = yield self.store.get_device(user_id, device_id) + except errors.StoreError: + raise errors.NotFoundError + ips = yield self.store.get_last_client_ip_by_device( + user_id, device_id, + ) + _update_device_from_client_ips(device, ips) + defer.returnValue(device) + + @measure_func("device.get_user_ids_changed") + @defer.inlineCallbacks + def get_user_ids_changed(self, user_id, from_token): + """Get list of users that have had the devices updated, or have newly + joined a room, that `user_id` may be interested in. + + Args: + user_id (str) + from_token (StreamToken) + """ + now_token = yield self.hs.get_event_sources().get_current_token() + + room_ids = yield self.store.get_rooms_for_user(user_id) + + # First we check if any devices have changed + changed = yield self.store.get_user_whose_devices_changed( + from_token.device_list_key + ) + + # Then work out if any users have since joined + rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) + + member_events = yield self.store.get_membership_changes_for_user( + user_id, from_token.room_key, now_token.room_key, + ) + rooms_changed.update(event.room_id for event in member_events) + + stream_ordering = RoomStreamToken.parse_stream_token( + from_token.room_key + ).stream + + possibly_changed = set(changed) + possibly_left = set() + for room_id in rooms_changed: + current_state_ids = yield self.store.get_current_state_ids(room_id) + + # The user may have left the room + # TODO: Check if they actually did or if we were just invited. + if room_id not in room_ids: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_left.add(state_key) + continue + + # Fetch the current state at the time. + try: + event_ids = yield self.store.get_forward_extremeties_for_room( + room_id, stream_ordering=stream_ordering + ) + except errors.StoreError: + # we have purged the stream_ordering index since the stream + # ordering: treat it the same as a new room + event_ids = [] + + # special-case for an empty prev state: include all members + # in the changed list + if not event_ids: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_changed.add(state_key) + continue + + current_member_id = current_state_ids.get((EventTypes.Member, user_id)) + if not current_member_id: + continue + + # mapping from event_id -> state_dict + prev_state_ids = yield self.store.get_state_ids_for_events(event_ids) + + # Check if we've joined the room? If so we just blindly add all the users to + # the "possibly changed" users. + for state_dict in itervalues(prev_state_ids): + member_event = state_dict.get((EventTypes.Member, user_id), None) + if not member_event or member_event != current_member_id: + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + possibly_changed.add(state_key) + break + + # If there has been any change in membership, include them in the + # possibly changed list. We'll check if they are joined below, + # and we're not toooo worried about spuriously adding users. + for key, event_id in iteritems(current_state_ids): + etype, state_key = key + if etype != EventTypes.Member: + continue + + # check if this member has changed since any of the extremities + # at the stream_ordering, and add them to the list if so. + for state_dict in itervalues(prev_state_ids): + prev_event_id = state_dict.get(key, None) + if not prev_event_id or prev_event_id != event_id: + if state_key != user_id: + possibly_changed.add(state_key) + break + + if possibly_changed or possibly_left: + users_who_share_room = yield self.store.get_users_who_share_room_with_user( + user_id + ) + + # Take the intersection of the users whose devices may have changed + # and those that actually still share a room with the user + possibly_joined = possibly_changed & users_who_share_room + possibly_left = (possibly_changed | possibly_left) - users_who_share_room + else: + possibly_joined = [] + possibly_left = [] + + defer.returnValue({ + "changed": list(possibly_joined), + "left": list(possibly_left), + }) + + +class DeviceHandler(DeviceWorkerHandler): + def __init__(self, hs): + super(DeviceHandler, self).__init__(hs) + self.federation_sender = hs.get_federation_sender() self._edu_updater = DeviceListEduUpdater(hs, self) @@ -103,52 +275,6 @@ class DeviceHandler(BaseHandler): raise errors.StoreError(500, "Couldn't generate a device ID.") - @defer.inlineCallbacks - def get_devices_by_user(self, user_id): - """ - Retrieve the given user's devices - - Args: - user_id (str): - Returns: - defer.Deferred: list[dict[str, X]]: info on each device - """ - - device_map = yield self.store.get_devices_by_user(user_id) - - ips = yield self.store.get_last_client_ip_by_device( - user_id, device_id=None - ) - - devices = list(device_map.values()) - for device in devices: - _update_device_from_client_ips(device, ips) - - defer.returnValue(devices) - - @defer.inlineCallbacks - def get_device(self, user_id, device_id): - """ Retrieve the given device - - Args: - user_id (str): - device_id (str): - - Returns: - defer.Deferred: dict[str, X]: info on the device - Raises: - errors.NotFoundError: if the device was not found - """ - try: - device = yield self.store.get_device(user_id, device_id) - except errors.StoreError: - raise errors.NotFoundError - ips = yield self.store.get_last_client_ip_by_device( - user_id, device_id, - ) - _update_device_from_client_ips(device, ips) - defer.returnValue(device) - @defer.inlineCallbacks def delete_device(self, user_id, device_id): """ Delete the given device @@ -287,126 +413,6 @@ class DeviceHandler(BaseHandler): for host in hosts: self.federation_sender.send_device_messages(host) - @measure_func("device.get_user_ids_changed") - @defer.inlineCallbacks - def get_user_ids_changed(self, user_id, from_token): - """Get list of users that have had the devices updated, or have newly - joined a room, that `user_id` may be interested in. - - Args: - user_id (str) - from_token (StreamToken) - """ - now_token = yield self.hs.get_event_sources().get_current_token() - - room_ids = yield self.store.get_rooms_for_user(user_id) - - # First we check if any devices have changed - changed = yield self.store.get_user_whose_devices_changed( - from_token.device_list_key - ) - - # Then work out if any users have since joined - rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) - - member_events = yield self.store.get_membership_changes_for_user( - user_id, from_token.room_key, now_token.room_key - ) - rooms_changed.update(event.room_id for event in member_events) - - stream_ordering = RoomStreamToken.parse_stream_token( - from_token.room_key - ).stream - - possibly_changed = set(changed) - possibly_left = set() - for room_id in rooms_changed: - current_state_ids = yield self.store.get_current_state_ids(room_id) - - # The user may have left the room - # TODO: Check if they actually did or if we were just invited. - if room_id not in room_ids: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_left.add(state_key) - continue - - # Fetch the current state at the time. - try: - event_ids = yield self.store.get_forward_extremeties_for_room( - room_id, stream_ordering=stream_ordering - ) - except errors.StoreError: - # we have purged the stream_ordering index since the stream - # ordering: treat it the same as a new room - event_ids = [] - - # special-case for an empty prev state: include all members - # in the changed list - if not event_ids: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_changed.add(state_key) - continue - - current_member_id = current_state_ids.get((EventTypes.Member, user_id)) - if not current_member_id: - continue - - # mapping from event_id -> state_dict - prev_state_ids = yield self.store.get_state_ids_for_events(event_ids) - - # Check if we've joined the room? If so we just blindly add all the users to - # the "possibly changed" users. - for state_dict in itervalues(prev_state_ids): - member_event = state_dict.get((EventTypes.Member, user_id), None) - if not member_event or member_event != current_member_id: - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - possibly_changed.add(state_key) - break - - # If there has been any change in membership, include them in the - # possibly changed list. We'll check if they are joined below, - # and we're not toooo worried about spuriously adding users. - for key, event_id in iteritems(current_state_ids): - etype, state_key = key - if etype != EventTypes.Member: - continue - - # check if this member has changed since any of the extremities - # at the stream_ordering, and add them to the list if so. - for state_dict in itervalues(prev_state_ids): - prev_event_id = state_dict.get(key, None) - if not prev_event_id or prev_event_id != event_id: - if state_key != user_id: - possibly_changed.add(state_key) - break - - if possibly_changed or possibly_left: - users_who_share_room = yield self.store.get_users_who_share_room_with_user( - user_id - ) - - # Take the intersection of the users whose devices may have changed - # and those that actually still share a room with the user - possibly_joined = possibly_changed & users_who_share_room - possibly_left = (possibly_changed | possibly_left) - users_who_share_room - else: - possibly_joined = [] - possibly_left = [] - - defer.returnValue({ - "changed": list(possibly_joined), - "left": list(possibly_left), - }) - @defer.inlineCallbacks def on_federation_query_user_devices(self, user_id): stream_id, devices = yield self.store.get_devices_with_keys_by_user(user_id) diff --git a/synapse/server.py b/synapse/server.py index 4d364fccce..4323e7ff12 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -51,7 +51,7 @@ from synapse.handlers.acme import AcmeHandler from synapse.handlers.appservice import ApplicationServicesHandler from synapse.handlers.auth import AuthHandler, MacaroonGenerator from synapse.handlers.deactivate_account import DeactivateAccountHandler -from synapse.handlers.device import DeviceHandler +from synapse.handlers.device import DeviceHandler, DeviceWorkerHandler from synapse.handlers.devicemessage import DeviceMessageHandler from synapse.handlers.e2e_keys import E2eKeysHandler from synapse.handlers.e2e_room_keys import E2eRoomKeysHandler @@ -307,7 +307,10 @@ class HomeServer(object): return MacaroonGenerator(self) def build_device_handler(self): - return DeviceHandler(self) + if self.config.worker_app: + return DeviceWorkerHandler(self) + else: + return DeviceHandler(self) def build_device_message_handler(self): return DeviceMessageHandler(self) -- cgit 1.5.1 From bfa7d46a107d4a3eb55701c42fe75290688f4e30 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 4 Mar 2019 18:09:06 +0000 Subject: Allow /keys/{changes,query} API to run on worker --- docs/workers.rst | 2 ++ synapse/app/client_reader.py | 11 +++++++++++ synapse/handlers/device.py | 4 ++-- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/docs/workers.rst b/docs/workers.rst index 3c18db1b19..d80fc04d2e 100644 --- a/docs/workers.rst +++ b/docs/workers.rst @@ -225,6 +225,8 @@ following regular expressions:: ^/_matrix/client/(api/v1|r0|unstable)/rooms/.*/state$ ^/_matrix/client/(api/v1|r0|unstable)/login$ ^/_matrix/client/(api/v1|r0|unstable)/account/3pid$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/query$ + ^/_matrix/client/(api/v1|r0|unstable)/keys/changes$ Additionally, the following REST endpoints can be handled, but all requests must be routed to the same instance:: diff --git a/synapse/app/client_reader.py b/synapse/app/client_reader.py index 5070094cad..beaea64a61 100644 --- a/synapse/app/client_reader.py +++ b/synapse/app/client_reader.py @@ -33,9 +33,13 @@ from synapse.replication.slave.storage._base import BaseSlavedStore from synapse.replication.slave.storage.account_data import SlavedAccountDataStore from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore from synapse.replication.slave.storage.client_ips import SlavedClientIpStore +from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore +from synapse.replication.slave.storage.devices import SlavedDeviceStore from synapse.replication.slave.storage.directory import DirectoryStore from synapse.replication.slave.storage.events import SlavedEventStore from synapse.replication.slave.storage.keys import SlavedKeyStore +from synapse.replication.slave.storage.push_rule import SlavedPushRuleStore +from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.room import RoomStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore @@ -49,6 +53,7 @@ from synapse.rest.client.v1.room import ( RoomStateRestServlet, ) from synapse.rest.client.v2_alpha.account import ThreepidRestServlet +from synapse.rest.client.v2_alpha.keys import KeyChangesServlet, KeyQueryServlet from synapse.rest.client.v2_alpha.register import RegisterRestServlet from synapse.server import HomeServer from synapse.storage.engines import create_engine @@ -61,6 +66,10 @@ logger = logging.getLogger("synapse.app.client_reader") class ClientReaderSlavedStore( + SlavedDeviceInboxStore, + SlavedDeviceStore, + SlavedReceiptsStore, + SlavedPushRuleStore, SlavedAccountDataStore, SlavedEventStore, SlavedKeyStore, @@ -98,6 +107,8 @@ class ClientReaderServer(HomeServer): RegisterRestServlet(self).register(resource) LoginRestServlet(self).register(resource) ThreepidRestServlet(self).register(resource) + KeyQueryServlet(self).register(resource) + KeyChangesServlet(self).register(resource) resources.update({ "/_matrix/client/r0": resource, diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 7e48661355..c09a7c6280 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -101,7 +101,7 @@ class DeviceWorkerHandler(BaseHandler): user_id (str) from_token (StreamToken) """ - now_token = yield self.hs.get_event_sources().get_current_token() + now_room_key = yield self.store.get_room_events_max_id() room_ids = yield self.store.get_rooms_for_user(user_id) @@ -114,7 +114,7 @@ class DeviceWorkerHandler(BaseHandler): rooms_changed = self.store.get_rooms_that_changed(room_ids, from_token.room_key) member_events = yield self.store.get_membership_changes_for_user( - user_id, from_token.room_key, now_token.room_key, + user_id, from_token.room_key, now_room_key, ) rooms_changed.update(event.room_id for event in member_events) -- cgit 1.5.1 From c7325776a7a02354657c519eb5169c1f2f3e0872 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 4 Mar 2019 18:31:18 +0000 Subject: Remove redundant PreserveLoggingContext Both (!) things that register as replication listeners do the right thing wrt logcontexts, so this is redundant. --- synapse/notifier.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'synapse') diff --git a/synapse/notifier.py b/synapse/notifier.py index 2505202e98..ff589660da 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -203,7 +203,9 @@ class Notifier(object): def add_replication_callback(self, cb): """Add a callback that will be called when some new data is available. - Callback is not given any arguments. + Callback is not given any arguments. It should *not* return a Deferred - if + it needs to do any asynchronous work, a background thread should be started and + wrapped with run_as_background_process. """ self.replication_callbacks.append(cb) @@ -515,12 +517,5 @@ class Notifier(object): def notify_replication(self): """Notify the any replication listeners that there's a new event""" - with PreserveLoggingContext(): - # the callbacks may well outlast the current request, so we run - # them in the sentinel logcontext. - # - # (ideally it would be up to the callbacks to know if they were - # starting off background processes and drop the logcontext - # accordingly, but that requires more changes) - for cb in self.replication_callbacks: - cb() + for cb in self.replication_callbacks: + cb() -- cgit 1.5.1 From aa06d26ae05585ddfb5e33a2dd521c9aa27b6cfa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 5 Mar 2019 09:16:35 +0000 Subject: clarify comments --- changelog.d/4699.bugfix | 2 +- synapse/handlers/federation.py | 19 +++++++++++-------- synapse/visibility.py | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'synapse') diff --git a/changelog.d/4699.bugfix b/changelog.d/4699.bugfix index 8cd8340cc1..1d7f3174e7 100644 --- a/changelog.d/4699.bugfix +++ b/changelog.d/4699.bugfix @@ -1 +1 @@ -Fix attempting to paginate in rooms where server cannot see any events. +Fix attempting to paginate in rooms where server cannot see any events, to avoid unnecessarily pulling in lots of redacted events. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index bf2989aefd..72b63d64d0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -862,9 +862,9 @@ class FederationHandler(BaseHandler): # as otherwise we'll just spend a lot of resources to get redacted # events. # - # We do this by filtering all the extremities and seeing if any remain. - # Given we don't have the extremity events themselves, we need to - # actually check the events that reference them. + # We do this by filtering all the backwards extremities and seeing if + # any remain. Given we don't have the extremity events themselves, we + # need to actually check the events that reference them. # # *Note*: the spec wants us to keep backfilling until we reach the start # of the room in case we are allowed to see some of the history. However @@ -873,13 +873,16 @@ class FederationHandler(BaseHandler): # there is its often sufficiently long ago that clients would stop # attempting to paginate before backfill reached the visible history. # - # TODO: If we do do a backfill the we should filter the extremities to - # only include those that point to visible portions of history. + # TODO: If we do do a backfill then we should filter the backwards + # extremities to only include those that point to visible portions of + # history. # # TODO: Correctly handle the case where we are allowed to see the - # forward event but not the extremity, e.g. in the case of initial - # join of the server where we are allowed to see the join event but - # not anything before it. + # forward event but not the backward extremity, e.g. in the case of + # initial join of the server where we are allowed to see the join + # event but not anything before it. This would require looking at the + # state *before* the event, ignoring the special casing certain event + # types have. forward_events = yield self.store.get_successor_events( list(extremities), diff --git a/synapse/visibility.py b/synapse/visibility.py index e9dc73c25e..efec21673b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -277,7 +277,7 @@ def filter_events_for_server(store, server_name, events, redact=True, return True - # Next lets check to see if all the events have a history visibility + # Lets check to see if all the events have a history visibility # of "shared" or "world_readable". If thats the case then we don't # need to check membership (as we know the server is in the room). event_to_state_ids = yield store.get_state_ids_for_events( -- cgit 1.5.1 From b9f61630927752422fb80cf7ece083741aefd399 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 5 Mar 2019 13:58:30 +0000 Subject: Simplify token replication logic --- synapse/replication/tcp/protocol.py | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) (limited to 'synapse') diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index dac4fbeef7..55630ba9a7 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -478,7 +478,7 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): # Now we can send any updates that came in while we were subscribing pending_rdata = self.pending_rdata.pop(stream_name, []) - batch_updates = [] + updates = [] for token, update in pending_rdata: # If the token is null, it is part of a batch update. Batches # are multiple updates that share a single token. To denote @@ -489,34 +489,25 @@ class ServerReplicationStreamProtocol(BaseReplicationStreamProtocol): # final token. if token is None: # Store this update as part of a batch - batch_updates.append(update) + updates.append(update) continue - if len(batch_updates) > 0: - # There is an ongoing batch and this is the end - if current_token <= current_token: - # This batch is older than current_token, dismiss it - batch_updates = [] - else: - # This is the end of the batch. Append final update of - # this batch before sending - batch_updates.append(update) - - # Send all updates that are part of this batch with the - # found token - for update in batch_updates: - self.send_command(RdataCommand(stream_name, token, update)) - - # Clear saved batch updates - batch_updates = [] + if token <= current_token: + # This update or batch of updates is older than + # current_token, dismiss it + updates = [] continue - # This is an update that's not part of a batch. - # - # Only send updates newer than the current token - if token > current_token: + updates.append(update) + + # Send all updates that are part of this batch with the + # found token + for update in updates: self.send_command(RdataCommand(stream_name, token, update)) + # Clear stored updates + updates = [] + # They're now fully subscribed self.replication_streams.add(stream_name) except Exception as e: -- cgit 1.5.1 From a4c3a361b70bc02d65104240bef1b3cbb110bf22 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 5 Mar 2019 14:25:33 +0000 Subject: Add rate-limiting on registration (#4735) * Rate-limiting for registration * Add unit test for registration rate limiting * Add config parameters for rate limiting on auth endpoints * Doc * Fix doc of rate limiting function Co-Authored-By: babolivier * Incorporate review * Fix config parsing * Fix linting errors * Set default config for auth rate limiting * Fix tests * Add changelog * Advance reactor instead of mocked clock * Move parameters to registration specific config and give them more sensible default values * Remove unused config options * Don't mock the rate limiter un MAU tests * Rename _register_with_store into register_with_store * Make CI happy * Remove unused import * Update sample config * Fix ratelimiting test for py2 * Add non-guest test --- changelog.d/4735.feature | 1 + docs/sample_config.yaml | 11 +++++++ synapse/api/ratelimiting.py | 31 ++++++++++--------- synapse/config/registration.py | 18 +++++++++++ synapse/handlers/_base.py | 4 +-- synapse/handlers/register.py | 39 ++++++++++++++++++----- synapse/replication/http/register.py | 8 +++-- synapse/rest/client/v2_alpha/register.py | 33 +++++++++++++++++--- tests/api/test_ratelimiting.py | 20 ++++++------ tests/handlers/test_profile.py | 4 +-- tests/replication/slave/storage/_base.py | 4 +-- tests/rest/client/v1/test_events.py | 4 +-- tests/rest/client/v1/test_rooms.py | 6 ++-- tests/rest/client/v1/test_typing.py | 4 +-- tests/rest/client/v2_alpha/test_register.py | 48 +++++++++++++++++++++++++++++ tests/test_mau.py | 3 +- tests/utils.py | 2 ++ 17 files changed, 186 insertions(+), 54 deletions(-) create mode 100644 changelog.d/4735.feature (limited to 'synapse') diff --git a/changelog.d/4735.feature b/changelog.d/4735.feature new file mode 100644 index 0000000000..a4c0b196f6 --- /dev/null +++ b/changelog.d/4735.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /register endpoint. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 7cf58d2182..e0140003fd 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -657,6 +657,17 @@ trusted_third_party_id_servers: # autocreate_auto_join_rooms: true +# Number of registration requests a client can send per second. +# Defaults to 1/minute (0.17). +# +#rc_registration_requests_per_second: 0.17 + +# Number of registration requests a client can send before being +# throttled. +# Defaults to 3. +# +#rc_registration_request_burst_count: 3.0 + ## Metrics ### diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index 3bb5b3da37..ad68079eeb 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -23,12 +23,13 @@ class Ratelimiter(object): def __init__(self): self.message_counts = collections.OrderedDict() - def send_message(self, user_id, time_now_s, msg_rate_hz, burst_count, update=True): - """Can the user send a message? + def can_do_action(self, key, time_now_s, rate_hz, burst_count, update=True): + """Can the entity (e.g. user or IP address) perform the action? Args: - user_id: The user sending a message. + key: The key we should use when rate limiting. Can be a user ID + (when sending events), an IP address, etc. time_now_s: The time now. - msg_rate_hz: The long term number of messages a user can send in a + rate_hz: The long term number of messages a user can send in a second. burst_count: How many messages the user can send before being limited. @@ -41,10 +42,10 @@ class Ratelimiter(object): """ self.prune_message_counts(time_now_s) message_count, time_start, _ignored = self.message_counts.get( - user_id, (0., time_now_s, None), + key, (0., time_now_s, None), ) time_delta = time_now_s - time_start - sent_count = message_count - time_delta * msg_rate_hz + sent_count = message_count - time_delta * rate_hz if sent_count < 0: allowed = True time_start = time_now_s @@ -56,13 +57,13 @@ class Ratelimiter(object): message_count += 1 if update: - self.message_counts[user_id] = ( - message_count, time_start, msg_rate_hz + self.message_counts[key] = ( + message_count, time_start, rate_hz ) - if msg_rate_hz > 0: + if rate_hz > 0: time_allowed = ( - time_start + (message_count - burst_count + 1) / msg_rate_hz + time_start + (message_count - burst_count + 1) / rate_hz ) if time_allowed < time_now_s: time_allowed = time_now_s @@ -72,12 +73,12 @@ class Ratelimiter(object): return allowed, time_allowed def prune_message_counts(self, time_now_s): - for user_id in list(self.message_counts.keys()): - message_count, time_start, msg_rate_hz = ( - self.message_counts[user_id] + for key in list(self.message_counts.keys()): + message_count, time_start, rate_hz = ( + self.message_counts[key] ) time_delta = time_now_s - time_start - if message_count - time_delta * msg_rate_hz > 0: + if message_count - time_delta * rate_hz > 0: break else: - del self.message_counts[user_id] + del self.message_counts[key] diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 2881482f96..d32f6fff73 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -54,6 +54,13 @@ class RegistrationConfig(Config): config.get("disable_msisdn_registration", False) ) + self.rc_registration_requests_per_second = config.get( + "rc_registration_requests_per_second", 0.17, + ) + self.rc_registration_request_burst_count = config.get( + "rc_registration_request_burst_count", 3, + ) + def default_config(self, generate_secrets=False, **kwargs): if generate_secrets: registration_shared_secret = 'registration_shared_secret: "%s"' % ( @@ -140,6 +147,17 @@ class RegistrationConfig(Config): # users cannot be auto-joined since they do not exist. # autocreate_auto_join_rooms: true + + # Number of registration requests a client can send per second. + # Defaults to 1/minute (0.17). + # + #rc_registration_requests_per_second: 0.17 + + # Number of registration requests a client can send before being + # throttled. + # Defaults to 3. + # + #rc_registration_request_burst_count: 3.0 """ % locals() def add_arguments(self, parser): diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index 594754cfd8..d8d86d6ff3 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -93,9 +93,9 @@ class BaseHandler(object): messages_per_second = self.hs.config.rc_messages_per_second burst_count = self.hs.config.rc_message_burst_count - allowed, time_allowed = self.ratelimiter.send_message( + allowed, time_allowed = self.ratelimiter.can_do_action( user_id, time_now, - msg_rate_hz=messages_per_second, + rate_hz=messages_per_second, burst_count=burst_count, update=update, ) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index c0e06929bd..47d5e276f8 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -24,6 +24,7 @@ from synapse.api.errors import ( AuthError, Codes, InvalidCaptchaError, + LimitExceededError, RegistrationError, SynapseError, ) @@ -60,6 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler + self.ratelimiter = hs.get_ratelimiter() self._next_generated_user_id = None @@ -149,6 +151,7 @@ class RegistrationHandler(BaseHandler): threepid=None, user_type=None, default_display_name=None, + address=None, ): """Registers a new client on the server. @@ -167,6 +170,7 @@ class RegistrationHandler(BaseHandler): api.constants.UserTypes, or None for a normal user. default_display_name (unicode|None): if set, the new user's displayname will be set to this. Defaults to 'localpart'. + address (str|None): the IP address used to perform the regitration. Returns: A tuple of (user_id, access_token). Raises: @@ -206,7 +210,7 @@ class RegistrationHandler(BaseHandler): token = None if generate_token: token = self.macaroon_gen.generate_access_token(user_id) - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -215,6 +219,7 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=default_display_name, admin=admin, user_type=user_type, + address=address, ) if self.hs.config.user_directory_search_all_users: @@ -238,12 +243,13 @@ class RegistrationHandler(BaseHandler): if default_display_name is None: default_display_name = localpart try: - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, make_guest=make_guest, create_profile_with_displayname=default_display_name, + address=address, ) except SynapseError: # if user id is taken, just generate another @@ -337,7 +343,7 @@ class RegistrationHandler(BaseHandler): user_id, allowed_appservice=service ) - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, password_hash="", appservice_id=service_id, @@ -513,7 +519,7 @@ class RegistrationHandler(BaseHandler): token = self.macaroon_gen.generate_access_token(user_id) if need_register: - yield self._register_with_store( + yield self.register_with_store( user_id=user_id, token=token, password_hash=password_hash, @@ -590,10 +596,10 @@ class RegistrationHandler(BaseHandler): ratelimit=False, ) - def _register_with_store(self, user_id, token=None, password_hash=None, - was_guest=False, make_guest=False, appservice_id=None, - create_profile_with_displayname=None, admin=False, - user_type=None): + def register_with_store(self, user_id, token=None, password_hash=None, + was_guest=False, make_guest=False, appservice_id=None, + create_profile_with_displayname=None, admin=False, + user_type=None, address=None): """Register user in the datastore. Args: @@ -612,10 +618,26 @@ class RegistrationHandler(BaseHandler): admin (boolean): is an admin user? user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. + address (str|None): the IP address used to perform the regitration. Returns: Deferred """ + # Don't rate limit for app services + if appservice_id is None and address is not None: + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + address, time_now_s=time_now, + rate_hz=self.hs.config.rc_registration_requests_per_second, + burst_count=self.hs.config.rc_registration_request_burst_count, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)), + ) + if self.hs.config.worker_app: return self._register_client( user_id=user_id, @@ -627,6 +649,7 @@ class RegistrationHandler(BaseHandler): create_profile_with_displayname=create_profile_with_displayname, admin=admin, user_type=user_type, + address=address, ) else: return self.store.register( diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 1d27c9221f..912a5ac341 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -33,11 +33,12 @@ class ReplicationRegisterServlet(ReplicationEndpoint): def __init__(self, hs): super(ReplicationRegisterServlet, self).__init__(hs) self.store = hs.get_datastore() + self.registration_handler = hs.get_registration_handler() @staticmethod def _serialize_payload( user_id, token, password_hash, was_guest, make_guest, appservice_id, - create_profile_with_displayname, admin, user_type, + create_profile_with_displayname, admin, user_type, address, ): """ Args: @@ -56,6 +57,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): admin (boolean): is an admin user? user_type (str|None): type of user. One of the values from api.constants.UserTypes, or None for a normal user. + address (str|None): the IP address used to perform the regitration. """ return { "token": token, @@ -66,13 +68,14 @@ class ReplicationRegisterServlet(ReplicationEndpoint): "create_profile_with_displayname": create_profile_with_displayname, "admin": admin, "user_type": user_type, + "address": address, } @defer.inlineCallbacks def _handle_request(self, request, user_id): content = parse_json_object_from_request(request) - yield self.store.register( + yield self.registration_handler.register_with_store( user_id=user_id, token=content["token"], password_hash=content["password_hash"], @@ -82,6 +85,7 @@ class ReplicationRegisterServlet(ReplicationEndpoint): create_profile_with_displayname=content["create_profile_with_displayname"], admin=content["admin"], user_type=content["user_type"], + address=content["address"] ) defer.returnValue((200, {})) diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 94cbba4303..b7f354570c 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -25,7 +25,12 @@ from twisted.internet import defer import synapse import synapse.types from synapse.api.constants import LoginType -from synapse.api.errors import Codes, SynapseError, UnrecognizedRequestError +from synapse.api.errors import ( + Codes, + LimitExceededError, + SynapseError, + UnrecognizedRequestError, +) from synapse.config.server import is_threepid_reserved from synapse.http.servlet import ( RestServlet, @@ -191,18 +196,36 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() + self.ratelimiter = hs.get_ratelimiter() + self.clock = hs.get_clock() @interactive_auth_handler @defer.inlineCallbacks def on_POST(self, request): body = parse_json_object_from_request(request) + client_addr = request.getClientIP() + + time_now = self.clock.time() + + allowed, time_allowed = self.ratelimiter.can_do_action( + client_addr, time_now_s=time_now, + rate_hz=self.hs.config.rc_registration_requests_per_second, + burst_count=self.hs.config.rc_registration_request_burst_count, + update=False, + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now)), + ) + kind = b"user" if b"kind" in request.args: kind = request.args[b"kind"][0] if kind == b"guest": - ret = yield self._do_guest_registration(body) + ret = yield self._do_guest_registration(body, address=client_addr) defer.returnValue(ret) return elif kind != b"user": @@ -411,6 +434,7 @@ class RegisterRestServlet(RestServlet): guest_access_token=guest_access_token, generate_token=False, threepid=threepid, + address=client_addr, ) # Necessary due to auth checks prior to the threepid being # written to the db @@ -522,12 +546,13 @@ class RegisterRestServlet(RestServlet): defer.returnValue(result) @defer.inlineCallbacks - def _do_guest_registration(self, params): + def _do_guest_registration(self, params, address=None): if not self.hs.config.allow_guest_access: raise SynapseError(403, "Guest access is disabled") user_id, _ = yield self.registration_handler.register( generate_token=False, - make_guest=True + make_guest=True, + address=address, ) # we don't allow guests to specify their own device_id, because diff --git a/tests/api/test_ratelimiting.py b/tests/api/test_ratelimiting.py index 8933fe3b72..30a255d441 100644 --- a/tests/api/test_ratelimiting.py +++ b/tests/api/test_ratelimiting.py @@ -6,34 +6,34 @@ from tests import unittest class TestRatelimiter(unittest.TestCase): def test_allowed(self): limiter = Ratelimiter() - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=0, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=0, rate_hz=0.1, burst_count=1 ) self.assertTrue(allowed) self.assertEquals(10., time_allowed) - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=5, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=5, rate_hz=0.1, burst_count=1 ) self.assertFalse(allowed) self.assertEquals(10., time_allowed) - allowed, time_allowed = limiter.send_message( - user_id="test_id", time_now_s=10, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id", time_now_s=10, rate_hz=0.1, burst_count=1 ) self.assertTrue(allowed) self.assertEquals(20., time_allowed) def test_pruning(self): limiter = Ratelimiter() - allowed, time_allowed = limiter.send_message( - user_id="test_id_1", time_now_s=0, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id_1", time_now_s=0, rate_hz=0.1, burst_count=1 ) self.assertIn("test_id_1", limiter.message_counts) - allowed, time_allowed = limiter.send_message( - user_id="test_id_2", time_now_s=10, msg_rate_hz=0.1, burst_count=1 + allowed, time_allowed = limiter.can_do_action( + key="test_id_2", time_now_s=10, rate_hz=0.1, burst_count=1 ) self.assertNotIn("test_id_1", limiter.message_counts) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 80da1c8954..d60c124eec 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -55,11 +55,11 @@ class ProfileTestCase(unittest.TestCase): federation_client=self.mock_federation, federation_server=Mock(), federation_registry=self.mock_registry, - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) self.store = hs.get_datastore() diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 9e9fbbfe93..524af4f8d1 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -31,10 +31,10 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): hs = self.setup_test_homeserver( "blue", federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - hs.get_ratelimiter().send_message.return_value = (True, 0) + hs.get_ratelimiter().can_do_action.return_value = (True, 0) return hs diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index 483bebc832..36d8547275 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -40,10 +40,10 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): config.auto_join_rooms = [] hs = self.setup_test_homeserver( - config=config, ratelimiter=NonCallableMock(spec_set=["send_message"]) + config=config, ratelimiter=NonCallableMock(spec_set=["can_do_action"]) ) self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index a824be9a62..015c144248 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -41,10 +41,10 @@ class RoomBase(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.ratelimiter = self.hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) self.hs.get_federation_handler = Mock(return_value=Mock()) @@ -96,7 +96,7 @@ class RoomPermissionsTestCase(RoomBase): # auth as user_id now self.helper.auth_user_id = self.user_id - def test_send_message(self): + def test_can_do_action(self): msg_content = b'{"msgtype":"m.text","body":"hello"}' seq = iter(range(100)) diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 0ad814c5e5..30fb77bac8 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -42,13 +42,13 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), + ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) self.event_source = hs.get_event_sources().sources["typing"] self.ratelimiter = hs.get_ratelimiter() - self.ratelimiter.send_message.return_value = (True, 0) + self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 906b348d3e..3600434858 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -130,3 +130,51 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"403", channel.result) self.assertEquals(channel.json_body["error"], "Guest access is disabled") + + def test_POST_ratelimiting_guest(self): + self.hs.config.rc_registration_request_burst_count = 5 + + for i in range(0, 6): + url = self.url + b"?kind=guest" + request, channel = self.make_request(b"POST", url, b"{}") + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.reactor.advance(retry_after_ms / 1000.) + + request, channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting(self): + self.hs.config.rc_registration_request_burst_count = 5 + + for i in range(0, 6): + params = { + "username": "kermit" + str(i), + "password": "monkey", + "device_id": "frogfone", + "auth": {"type": LoginType.DUMMY}, + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", self.url, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + self.reactor.advance(retry_after_ms / 1000.) + + request, channel = self.make_request(b"POST", self.url + b"?kind=guest", b"{}") + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) diff --git a/tests/test_mau.py b/tests/test_mau.py index 04f95c942f..00be1a8c21 100644 --- a/tests/test_mau.py +++ b/tests/test_mau.py @@ -17,7 +17,7 @@ import json -from mock import Mock, NonCallableMock +from mock import Mock from synapse.api.constants import LoginType from synapse.api.errors import Codes, HttpResponseException, SynapseError @@ -36,7 +36,6 @@ class TestMauLimit(unittest.HomeserverTestCase): "red", http_client=None, federation_client=Mock(), - ratelimiter=NonCallableMock(spec_set=["send_message"]), ) self.store = self.hs.get_datastore() diff --git a/tests/utils.py b/tests/utils.py index ee272157aa..e4c42f9fa8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -150,6 +150,8 @@ def default_config(name): config.admin_contact = None config.rc_messages_per_second = 10000 config.rc_message_burst_count = 10000 + config.rc_registration_request_burst_count = 3.0 + config.rc_registration_requests_per_second = 0.17 config.saml2_enabled = False config.public_baseurl = None config.default_identity_server = None -- cgit 1.5.1 From 067ce795c06f3ac5ebc25e4d01624b076a972f76 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 5 Mar 2019 18:03:14 +0000 Subject: Move settings from registration to ratelimiting in config file --- synapse/config/ratelimiting.py | 18 ++++++++++++++++++ synapse/config/registration.py | 20 ++------------------ 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'synapse') diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 54b71e6841..093042fdb9 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -27,6 +27,13 @@ class RatelimitConfig(Config): self.federation_rc_reject_limit = config["federation_rc_reject_limit"] self.federation_rc_concurrent = config["federation_rc_concurrent"] + self.rc_registration_requests_per_second = config.get( + "rc_registration_requests_per_second", 0.17, + ) + self.rc_registration_request_burst_count = config.get( + "rc_registration_request_burst_count", 3, + ) + def default_config(self, **kwargs): return """\ ## Ratelimiting ## @@ -62,4 +69,15 @@ class RatelimitConfig(Config): # single server # federation_rc_concurrent: 3 + + # Number of registration requests a client can send per second. + # Defaults to 1/minute (0.17). + # + #rc_registration_requests_per_second: 0.17 + + # Number of registration requests a client can send before being + # throttled. + # Defaults to 3. + # + #rc_registration_request_burst_count: 3.0 """ diff --git a/synapse/config/registration.py b/synapse/config/registration.py index d32f6fff73..d34dc9e456 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -54,13 +54,6 @@ class RegistrationConfig(Config): config.get("disable_msisdn_registration", False) ) - self.rc_registration_requests_per_second = config.get( - "rc_registration_requests_per_second", 0.17, - ) - self.rc_registration_request_burst_count = config.get( - "rc_registration_request_burst_count", 3, - ) - def default_config(self, generate_secrets=False, **kwargs): if generate_secrets: registration_shared_secret = 'registration_shared_secret: "%s"' % ( @@ -71,6 +64,8 @@ class RegistrationConfig(Config): return """\ ## Registration ## + # Registration can be rate-limited using the parameters in the "Ratelimiting" + # section of this file. # Enable registration for new users. enable_registration: False @@ -147,17 +142,6 @@ class RegistrationConfig(Config): # users cannot be auto-joined since they do not exist. # autocreate_auto_join_rooms: true - - # Number of registration requests a client can send per second. - # Defaults to 1/minute (0.17). - # - #rc_registration_requests_per_second: 0.17 - - # Number of registration requests a client can send before being - # throttled. - # Defaults to 3. - # - #rc_registration_request_burst_count: 3.0 """ % locals() def add_arguments(self, parser): -- cgit 1.5.1 From d7dbad3526136cfc9fdbd568635be5016fb637db Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Tue, 5 Mar 2019 18:41:27 +0000 Subject: Split ratelimiters in two (one for events, one for registration) --- synapse/handlers/_base.py | 2 +- synapse/handlers/message.py | 2 +- synapse/handlers/register.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 10 +++++++--- tests/handlers/test_profile.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_typing.py | 2 +- 9 files changed, 15 insertions(+), 11 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index d8d86d6ff3..a2212e2023 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -44,7 +44,7 @@ class BaseHandler(object): self.notifier = hs.get_notifier() self.state_handler = hs.get_state_handler() self.distributor = hs.get_distributor() - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.clock = hs.get_clock() self.hs = hs diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c762b58902..120aa0d017 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -224,7 +224,7 @@ class EventCreationHandler(object): self.profile_handler = hs.get_profile_handler() self.event_builder_factory = hs.get_event_builder_factory() self.server_name = hs.hostname - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.notifier = hs.get_notifier() self.config = hs.config diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 47d5e276f8..03130edc54 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -61,7 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_registration_ratelimiter() self._next_generated_user_id = None diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index b7f354570c..6f34029431 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -196,7 +196,7 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_registration_ratelimiter() self.clock = hs.get_clock() @interactive_auth_handler diff --git a/synapse/server.py b/synapse/server.py index 4323e7ff12..f3ca3e259a 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -205,7 +205,8 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() - self.ratelimiter = Ratelimiter() + self.events_ratelimiter = Ratelimiter() + self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -248,8 +249,11 @@ class HomeServer(object): def get_distributor(self): return self.distributor - def get_ratelimiter(self): - return self.ratelimiter + def get_events_ratelimiter(self): + return self.events_ratelimiter + + def get_registration_ratelimiter(self): + return self.registration_ratelimiter def build_federation_client(self): return FederationClient(self) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index d60c124eec..905816a44b 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -58,7 +58,7 @@ class ProfileTestCase(unittest.TestCase): ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) self.store = hs.get_datastore() diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 524af4f8d1..b293e04355 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -34,7 +34,7 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - hs.get_ratelimiter().can_do_action.return_value = (True, 0) + hs.get_events_ratelimiter().can_do_action.return_value = (True, 0) return hs diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index 36d8547275..cd328dc5f1 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -42,7 +42,7 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): hs = self.setup_test_homeserver( config=config, ratelimiter=NonCallableMock(spec_set=["can_do_action"]) ) - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 30fb77bac8..2e2e314a49 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): self.event_source = hs.get_event_sources().sources["typing"] - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_events_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() -- cgit 1.5.1 From f4195f41188928b8da9bed38c60e221466274a48 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 6 Mar 2019 10:55:22 +0000 Subject: Revert "Split ratelimiters in two (one for events, one for registration)" This reverts commit d7dbad3526136cfc9fdbd568635be5016fb637db. --- synapse/handlers/_base.py | 2 +- synapse/handlers/message.py | 2 +- synapse/handlers/register.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 10 +++------- tests/handlers/test_profile.py | 2 +- tests/replication/slave/storage/_base.py | 2 +- tests/rest/client/v1/test_events.py | 2 +- tests/rest/client/v1/test_typing.py | 2 +- 9 files changed, 11 insertions(+), 15 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index a2212e2023..d8d86d6ff3 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -44,7 +44,7 @@ class BaseHandler(object): self.notifier = hs.get_notifier() self.state_handler = hs.get_state_handler() self.distributor = hs.get_distributor() - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.clock = hs.get_clock() self.hs = hs diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 120aa0d017..c762b58902 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -224,7 +224,7 @@ class EventCreationHandler(object): self.profile_handler = hs.get_profile_handler() self.event_builder_factory = hs.get_event_builder_factory() self.server_name = hs.hostname - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.notifier = hs.get_notifier() self.config = hs.config diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 03130edc54..47d5e276f8 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -61,7 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler - self.ratelimiter = hs.get_registration_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self._next_generated_user_id = None diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 6f34029431..b7f354570c 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -196,7 +196,7 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() - self.ratelimiter = hs.get_registration_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.clock = hs.get_clock() @interactive_auth_handler diff --git a/synapse/server.py b/synapse/server.py index f3ca3e259a..4323e7ff12 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -205,8 +205,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() - self.events_ratelimiter = Ratelimiter() - self.registration_ratelimiter = Ratelimiter() + self.ratelimiter = Ratelimiter() self.datastore = None @@ -249,11 +248,8 @@ class HomeServer(object): def get_distributor(self): return self.distributor - def get_events_ratelimiter(self): - return self.events_ratelimiter - - def get_registration_ratelimiter(self): - return self.registration_ratelimiter + def get_ratelimiter(self): + return self.ratelimiter def build_federation_client(self): return FederationClient(self) diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 905816a44b..d60c124eec 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -58,7 +58,7 @@ class ProfileTestCase(unittest.TestCase): ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) self.store = hs.get_datastore() diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index b293e04355..524af4f8d1 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -34,7 +34,7 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): ratelimiter=NonCallableMock(spec_set=["can_do_action"]), ) - hs.get_events_ratelimiter().can_do_action.return_value = (True, 0) + hs.get_ratelimiter().can_do_action.return_value = (True, 0) return hs diff --git a/tests/rest/client/v1/test_events.py b/tests/rest/client/v1/test_events.py index cd328dc5f1..36d8547275 100644 --- a/tests/rest/client/v1/test_events.py +++ b/tests/rest/client/v1/test_events.py @@ -42,7 +42,7 @@ class EventStreamPermissionsTestCase(unittest.HomeserverTestCase): hs = self.setup_test_homeserver( config=config, ratelimiter=NonCallableMock(spec_set=["can_do_action"]) ) - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() diff --git a/tests/rest/client/v1/test_typing.py b/tests/rest/client/v1/test_typing.py index 2e2e314a49..30fb77bac8 100644 --- a/tests/rest/client/v1/test_typing.py +++ b/tests/rest/client/v1/test_typing.py @@ -47,7 +47,7 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): self.event_source = hs.get_event_sources().sources["typing"] - self.ratelimiter = hs.get_events_ratelimiter() + self.ratelimiter = hs.get_ratelimiter() self.ratelimiter.can_do_action.return_value = (True, 0) hs.get_handlers().federation_handler = Mock() -- cgit 1.5.1 From 6f3cde8b2500aafad2438de7eddfc442ec5288c7 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Wed, 6 Mar 2019 11:02:42 +0000 Subject: Make registration ratelimiter separate from the main events one --- synapse/handlers/register.py | 2 +- synapse/rest/client/v2_alpha/register.py | 2 +- synapse/server.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 47d5e276f8..03130edc54 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -61,7 +61,7 @@ class RegistrationHandler(BaseHandler): self.user_directory_handler = hs.get_user_directory_handler() self.captcha_client = CaptchaServerHttpClient(hs) self.identity_handler = self.hs.get_handlers().identity_handler - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_registration_ratelimiter() self._next_generated_user_id = None diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index b7f354570c..6f34029431 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -196,7 +196,7 @@ class RegisterRestServlet(RestServlet): self.identity_handler = hs.get_handlers().identity_handler self.room_member_handler = hs.get_room_member_handler() self.macaroon_gen = hs.get_macaroon_generator() - self.ratelimiter = hs.get_ratelimiter() + self.ratelimiter = hs.get_registration_ratelimiter() self.clock = hs.get_clock() @interactive_auth_handler diff --git a/synapse/server.py b/synapse/server.py index 4323e7ff12..72835e8c86 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -206,6 +206,7 @@ class HomeServer(object): self.clock = Clock(reactor) self.distributor = Distributor() self.ratelimiter = Ratelimiter() + self.registration_ratelimiter = Ratelimiter() self.datastore = None @@ -251,6 +252,9 @@ class HomeServer(object): def get_ratelimiter(self): return self.ratelimiter + def get_registration_ratelimiter(self): + return self.registration_ratelimiter + def build_federation_client(self): return FederationClient(self) -- cgit 1.5.1 From a9de04be724be9e19af0a5a5839c65924f90886a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 12 Feb 2019 10:31:21 +0000 Subject: Implement soft fail --- synapse/events/__init__.py | 14 ++++++++ synapse/handlers/federation.py | 77 +++++++++++++++++++++++++++++++++++++++++- synapse/storage/events.py | 1 + synapse/visibility.py | 4 +++ 4 files changed, 95 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 20c1ab4203..bd130f8816 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -77,6 +77,20 @@ class _EventInternalMetadata(object): """ return getattr(self, "recheck_redaction", False) + def is_soft_failed(self): + """Whether the event has been soft failed. + + Soft failed events should be handled as usual, except: + 1. They should not go down sync or event streams, or generally + sent to clients. + 2. They should not be added to the forward extremities (and + therefore not to current state). + + Returns: + bool + """ + return getattr(self, "soft_failed", False) + def _event_dict_property(key): # We want to be able to use hasattr with the event dict properties. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 72b63d64d0..a75abe8e91 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -45,6 +45,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.crypto.event_signing import compute_event_signature +from synapse.event_auth import auth_types_for_event from synapse.events.validator import EventValidator from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, @@ -1628,6 +1629,7 @@ class FederationHandler(BaseHandler): origin, event, state=state, auth_events=auth_events, + backfilled=backfilled, ) # reraise does not allow inlineCallbacks to preserve the stacktrace, so we @@ -1672,6 +1674,7 @@ class FederationHandler(BaseHandler): event, state=ev_info.get("state"), auth_events=ev_info.get("auth_events"), + backfilled=backfilled, ) defer.returnValue(res) @@ -1794,7 +1797,7 @@ class FederationHandler(BaseHandler): ) @defer.inlineCallbacks - def _prep_event(self, origin, event, state=None, auth_events=None): + def _prep_event(self, origin, event, state, auth_events, backfilled): """ Args: @@ -1802,6 +1805,7 @@ class FederationHandler(BaseHandler): event: state: auth_events: + backfilled (bool) Returns: Deferred, which resolves to synapse.events.snapshot.EventContext @@ -1843,6 +1847,77 @@ class FederationHandler(BaseHandler): context.rejected = RejectedReason.AUTH_ERROR + # For new (non-backfilled and non-outlier) events we check if the event + # passes auth based on the current state. If it doesn't then we + # "soft-fail" the event. + do_soft_fail_check = not backfilled and not event.internal_metadata.is_outlier() + if do_soft_fail_check: + extrem_ids = yield self.store.get_latest_event_ids_in_room( + event.room_id, + ) + + extrem_ids = set(extrem_ids) + prev_event_ids = set(event.prev_event_ids()) + + if extrem_ids == prev_event_ids: + # If they're the same then the current state is the same as the + # state at the event, so no point rechecking auth for soft fail. + do_soft_fail_check = False + + if do_soft_fail_check: + room_version = yield self.store.get_room_version(event.room_id) + + # Calculate the "current state". + if state is not None: + # If we're explicitly given the state then we won't have all the + # prev events, and so we have a gap in the graph. In this case + # we want to be a little careful as we might have been down for + # a while and have an incorrect view of the current state, + # however we still want to do checks as gaps are easy to + # maliciously manufacture. + # + # So we use a "current state" that is actually a state + # resolution across the current forward extremities and the + # given state at the event. This should correctly handle cases + # like bans, especially with state res v2. + + state_sets = yield self.store.get_state_groups( + event.room_id, extrem_ids, + ) + state_sets = list(state_sets.values()) + state_sets.append(state) + current_state_ids = yield self.state_handler.resolve_events( + room_version, state_sets, event, + ) + current_state_ids = { + k: e.event_id for k, e in iteritems(current_state_ids) + } + else: + current_state_ids = yield self.state_handler.get_current_state_ids( + event.room_id, latest_event_ids=extrem_ids, + ) + + # Now check if event pass auth against said current state + auth_types = auth_types_for_event(event) + current_state_ids = [ + e for k, e in iteritems(current_state_ids) + if k in auth_types + ] + + current_auth_events = yield self.store.get_events(current_state_ids) + current_auth_events = { + (e.type, e.state_key): e for e in current_auth_events.values() + } + + try: + self.auth.check(room_version, event, auth_events=current_auth_events) + except AuthError as e: + logger.warn( + "Failed current state auth resolution for %r because %s", + event, e, + ) + event.internal_metadata.soft_failed = True + if event.type == EventTypes.GuestAccess and not context.rejected: yield self.maybe_kick_guest_users(event) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 06db9e56e6..990a5eaaae 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -537,6 +537,7 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore new_events = [ event for event, ctx in event_contexts if not event.internal_metadata.is_outlier() and not ctx.rejected + and not event.internal_metadata.is_soft_failed() ] # start with the existing forward extremities diff --git a/synapse/visibility.py b/synapse/visibility.py index efec21673b..16c40cd74c 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -67,6 +67,10 @@ def filter_events_for_client(store, user_id, events, is_peeking=False, Returns: Deferred[list[synapse.events.EventBase]] """ + # Filter out events that have been soft failed so that we don't relay them + # to clients. + events = list(e for e in events if not e.internal_metadata.is_soft_failed()) + types = ( (EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id), -- cgit 1.5.1 From 6d13bdec91e228a54a856ebe0e104062d96a4180 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Mar 2019 17:21:08 +0000 Subject: Add docstrings from matrix-org-hotfixes --- synapse/handlers/sync.py | 33 ++++++++++++++++++++++++++------- synapse/storage/stream.py | 19 +++++++++++++++++++ 2 files changed, 45 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index bd97241ab4..42f514cd10 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1894,15 +1894,34 @@ def _calculate_state( class SyncResultBuilder(object): - "Used to help build up a new SyncResult for a user" + """Used to help build up a new SyncResult for a user + + Attributes: + sync_config (SyncConfig) + full_state (bool) + since_token (StreamToken) + now_token (StreamToken) + joined_room_ids (list[str]) + + # The following mirror the fields in a sync response + presence (list) + account_data (list) + joined (list[JoinedSyncResult]) + invited (list[InvitedSyncResult]) + archived (list[ArchivedSyncResult]) + device (list) + groups (GroupsSyncResult|None) + to_device (list) + """ def __init__(self, sync_config, full_state, since_token, now_token, joined_room_ids): """ Args: - sync_config(SyncConfig) - full_state(bool): The full_state flag as specified by user - since_token(StreamToken): The token supplied by user, or None. - now_token(StreamToken): The token to sync up to. + sync_config (SyncConfig) + full_state (bool): The full_state flag as specified by user + since_token (StreamToken): The token supplied by user, or None. + now_token (StreamToken): The token to sync up to. + joined_room_ids (list[str]): List of rooms the user is joined to """ self.sync_config = sync_config self.full_state = full_state @@ -1930,8 +1949,8 @@ class RoomSyncResultBuilder(object): Args: room_id(str) rtype(str): One of `"joined"` or `"archived"` - events(list): List of events to include in the room, (more events - may be added when generating result). + events(list[FrozenEvent]): List of events to include in the room + (more events may be added when generating result). newly_joined(bool): If the user has newly joined the room full_state(bool): Whether the full state should be sent in result since_token(StreamToken): Earliest point to return events from, or None diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index d6cfdba519..580fafeb3a 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -191,6 +191,25 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): @defer.inlineCallbacks def get_room_events_stream_for_rooms(self, room_ids, from_key, to_key, limit=0, order='DESC'): + """Get new room events in stream ordering since `from_key`. + + Args: + room_id (str) + from_key (str): Token from which no events are returned before + to_key (str): Token from which no events are returned after. (This + is typically the current stream token) + limit (int): Maximum number of events to return + order (str): Either "DESC" or "ASC". Determines which events are + returned when the result is limited. If "DESC" then the most + recent `limit` events are returned, otherwise returns the + oldest `limit` events. + + Returns: + Deferred[dict[str,tuple[list[FrozenEvent], str]]] + A map from room id to a tuple containing: + - list of recent events in the room + - stream ordering key for the start of the chunk of events returned. + """ from_id = RoomStreamToken.parse_stream_token(from_key).stream room_ids = yield self._events_stream_cache.get_entities_changed( -- cgit 1.5.1 From 8b7790e68f552748b0fe20455c766a2376c2fefd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Mar 2019 17:29:15 +0000 Subject: Port #4422 debug logging from hotfixes --- synapse/handlers/sync.py | 53 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 7 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index bd97241ab4..32101eb36a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -39,6 +39,9 @@ from synapse.visibility import filter_events_for_client logger = logging.getLogger(__name__) +# Debug logger for https://github.com/matrix-org/synapse/issues/4422 +issue4422_logger = logging.getLogger("synapse.handler.sync.4422_debug") + # Counts the number of times we returned a non-empty sync. `type` is one of # "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is @@ -962,6 +965,15 @@ class SyncHandler(object): yield self._generate_sync_entry_for_groups(sync_result_builder) + # debug for https://github.com/matrix-org/synapse/issues/4422 + for joined_room in sync_result_builder.joined: + room_id = joined_room.room_id + if room_id in newly_joined_rooms: + issue4422_logger.debug( + "Sync result for newly joined room %s: %r", + room_id, joined_room, + ) + defer.returnValue(SyncResult( presence=sync_result_builder.presence, account_data=sync_result_builder.account_data, @@ -1425,6 +1437,17 @@ class SyncHandler(object): old_mem_ev = yield self.store.get_event( old_mem_ev_id, allow_none=True ) + + # debug for #4422 + if has_join: + prev_membership = None + if old_mem_ev: + prev_membership = old_mem_ev.membership + issue4422_logger.debug( + "Previous membership for room %s with join: %s (event %s)", + room_id, prev_membership, old_mem_ev_id, + ) + if not old_mem_ev or old_mem_ev.membership != Membership.JOIN: newly_joined_rooms.append(room_id) @@ -1519,30 +1542,39 @@ class SyncHandler(object): for room_id in sync_result_builder.joined_room_ids: room_entry = room_to_events.get(room_id, None) + newly_joined = room_id in newly_joined_rooms if room_entry: events, start_key = room_entry prev_batch_token = now_token.copy_and_replace("room_key", start_key) - room_entries.append(RoomSyncResultBuilder( + entry = RoomSyncResultBuilder( room_id=room_id, rtype="joined", events=events, - newly_joined=room_id in newly_joined_rooms, + newly_joined=newly_joined, full_state=False, - since_token=None if room_id in newly_joined_rooms else since_token, + since_token=None if newly_joined else since_token, upto_token=prev_batch_token, - )) + ) else: - room_entries.append(RoomSyncResultBuilder( + entry = RoomSyncResultBuilder( room_id=room_id, rtype="joined", events=[], - newly_joined=room_id in newly_joined_rooms, + newly_joined=newly_joined, full_state=False, since_token=since_token, upto_token=since_token, - )) + ) + + if newly_joined: + # debugging for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "RoomSyncResultBuilder events for newly joined room %s: %r", + room_id, entry.events, + ) + room_entries.append(entry) defer.returnValue((room_entries, invited, newly_joined_rooms, newly_left_rooms)) @@ -1663,6 +1695,13 @@ class SyncHandler(object): newly_joined_room=newly_joined, ) + if newly_joined: + # debug for https://github.com/matrix-org/synapse/issues/4422 + issue4422_logger.debug( + "Timeline events after filtering in newly-joined room %s: %r", + room_id, batch, + ) + # When we join the room (or the client requests full_state), we should # send down any existing tags. Usually the user won't have tags in a # newly joined room, unless either a) they've joined before or b) the -- cgit 1.5.1 From b879870b2dc3e5cd1e8a9907209b5af66e32ddd2 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Mar 2019 17:35:11 +0000 Subject: Send message after room has been shutdown Currently the explanation message is sent to the abuse room before any users are forced joined, which means it tends to get lost in the backlog of joins. So instead we send the message *after* we've forced joined everyone. --- synapse/rest/client/v1/admin.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 0201cf1186..2a29f0c2af 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -488,17 +488,6 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): ) new_room_id = info["room_id"] - yield self.event_creation_handler.create_and_send_nonmember_event( - room_creator_requester, - { - "type": "m.room.message", - "content": {"body": message, "msgtype": "m.text"}, - "room_id": new_room_id, - "sender": new_room_user_id, - }, - ratelimit=False, - ) - requester_user_id = requester.user.to_string() logger.info("Shutting down room %r", room_id) @@ -536,6 +525,17 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): kicked_users.append(user_id) + yield self.event_creation_handler.create_and_send_nonmember_event( + room_creator_requester, + { + "type": "m.room.message", + "content": {"body": message, "msgtype": "m.text"}, + "room_id": new_room_id, + "sender": new_room_user_id, + }, + ratelimit=False, + ) + aliases_for_room = yield self.store.get_aliases_for_room(room_id) yield self.store.update_aliases_for_room( -- cgit 1.5.1 From face0c5b3c8ed6d0f29f7eaa3a2f9fd19eb99540 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 6 Mar 2019 17:39:32 +0000 Subject: Prefill client IPs cache on workers --- synapse/replication/slave/storage/client_ips.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse') diff --git a/synapse/replication/slave/storage/client_ips.py b/synapse/replication/slave/storage/client_ips.py index 60641f1a49..5b8521c770 100644 --- a/synapse/replication/slave/storage/client_ips.py +++ b/synapse/replication/slave/storage/client_ips.py @@ -43,6 +43,8 @@ class SlavedClientIpStore(BaseSlavedStore): if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY: return + self.client_ip_last_seen.prefill(key, now) + self.hs.get_tcp_replication().send_user_ip( user_id, access_token, ip, user_agent, device_id, now ) -- cgit 1.5.1 From f6135d06cf94fdef9942051f43872c7518511e74 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 7 Mar 2019 01:22:53 -0800 Subject: Rewrite userdir to be faster (#4537) --- changelog.d/4537.feature | 1 + synapse/handlers/user_directory.py | 222 ++++---------------- synapse/storage/schema/delta/53/user_share.sql | 47 +++++ synapse/storage/user_directory.py | 271 ++++++++----------------- tests/handlers/test_user_directory.py | 266 ++++++++++++++++++++---- tests/storage/test_user_directory.py | 2 - 6 files changed, 400 insertions(+), 409 deletions(-) create mode 100644 changelog.d/4537.feature create mode 100644 synapse/storage/schema/delta/53/user_share.sql (limited to 'synapse') diff --git a/changelog.d/4537.feature b/changelog.d/4537.feature new file mode 100644 index 0000000000..8f792b8890 --- /dev/null +++ b/changelog.d/4537.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 283c6c1b81..c21da8343a 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -15,7 +15,7 @@ import logging -from six import iteritems +from six import iteritems, iterkeys from twisted.internet import defer @@ -63,10 +63,6 @@ class UserDirectoryHandler(object): # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already self.initially_handled_users = set() - self.initially_handled_users_in_public = set() - - self.initially_handled_users_share = set() - self.initially_handled_users_share_private_room = set() # The current position in the current_state_delta stream self.pos = None @@ -140,7 +136,6 @@ class UserDirectoryHandler(object): # FIXME(#3714): We should probably do this in the same worker as all # the other changes. yield self.store.remove_from_user_dir(user_id) - yield self.store.remove_from_user_in_public_room(user_id) @defer.inlineCallbacks def _unsafe_process(self): @@ -215,15 +210,13 @@ class UserDirectoryHandler(object): logger.info("Processed all users") self.initially_handled_users = None - self.initially_handled_users_in_public = None - self.initially_handled_users_share = None - self.initially_handled_users_share_private_room = None yield self.store.update_user_directory_stream_pos(new_pos) @defer.inlineCallbacks def _handle_initial_room(self, room_id): - """Called when we initially fill out user_directory one room at a time + """ + Called when we initially fill out user_directory one room at a time """ is_in_room = yield self.store.is_host_joined(room_id, self.server_name) if not is_in_room: @@ -238,23 +231,15 @@ class UserDirectoryHandler(object): unhandled_users = user_ids - self.initially_handled_users yield self.store.add_profiles_to_user_dir( - room_id, {user_id: users_with_profile[user_id] for user_id in unhandled_users}, ) self.initially_handled_users |= unhandled_users - if is_public: - yield self.store.add_users_to_public_room( - room_id, user_ids=user_ids - self.initially_handled_users_in_public - ) - self.initially_handled_users_in_public |= user_ids - # We now go and figure out the new users who share rooms with user entries # We sleep aggressively here as otherwise it can starve resources. # We also batch up inserts/updates, but try to avoid too many at once. to_insert = set() - to_update = set() count = 0 for user_id in user_ids: if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: @@ -277,21 +262,7 @@ class UserDirectoryHandler(object): count += 1 user_set = (user_id, other_user_id) - - if user_set in self.initially_handled_users_share_private_room: - continue - - if user_set in self.initially_handled_users_share: - if is_public: - continue - to_update.add(user_set) - else: - to_insert.add(user_set) - - if is_public: - self.initially_handled_users_share.add(user_set) - else: - self.initially_handled_users_share_private_room.add(user_set) + to_insert.add(user_set) if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: yield self.store.add_users_who_share_room( @@ -299,22 +270,10 @@ class UserDirectoryHandler(object): ) to_insert.clear() - if len(to_update) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - to_update.clear() - if to_insert: yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) to_insert.clear() - if to_update: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - to_update.clear() - @defer.inlineCallbacks def _handle_deltas(self, deltas): """Called with the state deltas to process @@ -356,6 +315,7 @@ class UserDirectoryHandler(object): user_ids = yield self.store.get_users_in_dir_due_to_room( room_id ) + for user_id in user_ids: yield self._handle_remove_user(room_id, user_id) return @@ -436,14 +396,20 @@ class UserDirectoryHandler(object): # ignore the change return - if change: - users_with_profile = yield self.state.get_current_user_in_room(room_id) - for user_id, profile in iteritems(users_with_profile): - yield self._handle_new_user(room_id, user_id, profile) - else: - users = yield self.store.get_users_in_public_due_to_room(room_id) - for user_id in users: - yield self._handle_remove_user(room_id, user_id) + users_with_profile = yield self.state.get_current_user_in_room(room_id) + + # Remove every user from the sharing tables for that room. + for user_id in iterkeys(users_with_profile): + yield self.store.remove_user_who_share_room(user_id, room_id) + + # Then, re-add them to the tables. + # NOTE: this is not the most efficient method, as handle_new_user sets + # up local_user -> other_user and other_user_whos_local -> local_user, + # which when ran over an entire room, will result in the same values + # being added multiple times. The batching upserts shouldn't make this + # too bad, though. + for user_id, profile in iteritems(users_with_profile): + yield self._handle_new_user(room_id, user_id, profile) @defer.inlineCallbacks def _handle_local_user(self, user_id): @@ -457,7 +423,7 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_directory(user_id) if not row: - yield self.store.add_profiles_to_user_dir(None, {user_id: profile}) + yield self.store.add_profiles_to_user_dir({user_id: profile}) @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): @@ -471,55 +437,27 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_directory(user_id) if not row: - yield self.store.add_profiles_to_user_dir(room_id, {user_id: profile}) + yield self.store.add_profiles_to_user_dir({user_id: profile}) is_public = yield self.store.is_room_world_readable_or_publicly_joinable( room_id ) - - if is_public: - row = yield self.store.get_user_in_public_room(user_id) - if not row: - yield self.store.add_users_to_public_room(room_id, [user_id]) - else: - logger.debug("Not adding new user to public dir, %r", user_id) - - # Now we update users who share rooms with users. We do this by getting - # all the current users in the room and seeing which aren't already - # marked in the database as sharing with `user_id` - + # Now we update users who share rooms with users. users_with_profile = yield self.state.get_current_user_in_room(room_id) to_insert = set() - to_update = set() - - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) # First, if they're our user then we need to update for every user - if self.is_mine_id(user_id) and not is_appservice: - # Returns a map of other_user_id -> shared_private. We only need - # to update mappings if for users that either don't share a room - # already (aren't in the map) or, if the room is private, those that - # only share a public room. - user_ids_shared = yield self.store.get_users_who_share_room_from_dir( - user_id - ) + if self.is_mine_id(user_id): - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue + is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + + # We don't care about appservice users. + if not is_appservice: + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - shared_is_private = user_ids_shared.get(other_user_id) - if shared_is_private is True: - # We've already marked in the database they share a private room - continue - elif shared_is_private is False: - # They already share a public room, so only update if this is - # a private room - if not is_public: - to_update.add((user_id, other_user_id)) - elif shared_is_private is None: - # This is the first time they both share a room to_insert.add((user_id, other_user_id)) # Next we need to update for every local user in the room @@ -531,29 +469,11 @@ class UserDirectoryHandler(object): other_user_id ) if self.is_mine_id(other_user_id) and not is_appservice: - shared_is_private = yield self.store.get_if_users_share_a_room( - other_user_id, user_id - ) - if shared_is_private is True: - # We've already marked in the database they share a private room - continue - elif shared_is_private is False: - # They already share a public room, so only update if this is - # a private room - if not is_public: - to_update.add((other_user_id, user_id)) - elif shared_is_private is None: - # This is the first time they both share a room - to_insert.add((other_user_id, user_id)) + to_insert.add((other_user_id, user_id)) if to_insert: yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - if to_update: - yield self.store.update_users_who_share_room( - room_id, not is_public, to_update - ) - @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): """Called when we might need to remove user to directory @@ -562,84 +482,16 @@ class UserDirectoryHandler(object): room_id (str): room_id that user left or stopped being public that user_id (str) """ - logger.debug("Maybe removing user %r", user_id) - - row = yield self.store.get_user_in_directory(user_id) - update_user_dir = row and row["room_id"] == room_id - - row = yield self.store.get_user_in_public_room(user_id) - update_user_in_public = row and row["room_id"] == room_id - - if update_user_in_public or update_user_dir: - # XXX: Make this faster? - rooms = yield self.store.get_rooms_for_user(user_id) - for j_room_id in rooms: - if not update_user_in_public and not update_user_dir: - break - - is_in_room = yield self.store.is_host_joined( - j_room_id, self.server_name - ) - - if not is_in_room: - continue - - if update_user_dir: - update_user_dir = False - yield self.store.update_user_in_user_dir(user_id, j_room_id) + logger.debug("Removing user %r", user_id) - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) + # Remove user from sharing tables + yield self.store.remove_user_who_share_room(user_id, room_id) - if update_user_in_public and is_public: - yield self.store.update_user_in_public_user_list(user_id, j_room_id) - update_user_in_public = False + # Are they still in a room with members? If not, remove them entirely. + users_in_room_with = yield self.store.get_users_who_share_room_from_dir(user_id) - if update_user_dir: + if len(users_in_room_with) == 0: yield self.store.remove_from_user_dir(user_id) - elif update_user_in_public: - yield self.store.remove_from_user_in_public_room(user_id) - - # Now handle users_who_share_rooms. - - # Get a list of user tuples that were in the DB due to this room and - # users (this includes tuples where the other user matches `user_id`) - user_tuples = yield self.store.get_users_in_share_dir_with_room_id( - user_id, room_id - ) - - for user_id, other_user_id in user_tuples: - # For each user tuple get a list of rooms that they still share, - # trying to find a private room, and update the entry in the DB - rooms = yield self.store.get_rooms_in_common_for_users( - user_id, other_user_id - ) - - # If they dont share a room anymore, remove the mapping - if not rooms: - yield self.store.remove_user_who_share_room(user_id, other_user_id) - continue - - found_public_share = None - for j_room_id in rooms: - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - j_room_id - ) - - if is_public: - found_public_share = j_room_id - else: - found_public_share = None - yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)] - ) - break - - if found_public_share: - yield self.store.update_users_who_share_room( - room_id, not is_public, [(user_id, other_user_id)] - ) @defer.inlineCallbacks def _handle_profile_change(self, user_id, room_id, prev_event_id, event_id): diff --git a/synapse/storage/schema/delta/53/user_share.sql b/synapse/storage/schema/delta/53/user_share.sql new file mode 100644 index 0000000000..14424ded0c --- /dev/null +++ b/synapse/storage/schema/delta/53/user_share.sql @@ -0,0 +1,47 @@ +/* Copyright 2017 Vector Creations Ltd, 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Old disused version of the tables below. +DROP TABLE IF EXISTS users_who_share_rooms; + +-- This is no longer used because it's duplicated by the users_who_share_public_rooms +DROP TABLE IF EXISTS users_in_public_rooms; + +-- Tables keeping track of what users share rooms. This is a map of local users +-- to local or remote users, per room. Remote users cannot be in the user_id +-- column, only the other_user_id column. There are two tables, one for public +-- rooms and those for private rooms. +CREATE TABLE IF NOT EXISTS users_who_share_public_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS users_who_share_private_rooms ( + user_id TEXT NOT NULL, + other_user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_who_share_public_rooms_u_idx ON users_who_share_public_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_public_rooms_r_idx ON users_who_share_public_rooms(room_id); +CREATE INDEX users_who_share_public_rooms_o_idx ON users_who_share_public_rooms(other_user_id); + +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id); + +-- Make sure that we populate the tables initially by resetting the stream ID +UPDATE user_directory_stream_pos SET stream_id = NULL; diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index fea866c043..2317d22ed6 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -63,31 +63,14 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(False) - @defer.inlineCallbacks - def add_users_to_public_room(self, room_id, user_ids): - """Add user to the list of users in public rooms - - Args: - room_id (str): A room_id that all users are in that is world_readable - or publically joinable - user_ids (list(str)): Users to add - """ - yield self._simple_insert_many( - table="users_in_public_rooms", - values=[{"user_id": user_id, "room_id": room_id} for user_id in user_ids], - desc="add_users_to_public_room", - ) - for user_id in user_ids: - self.get_user_in_public_room.invalidate((user_id,)) - - def add_profiles_to_user_dir(self, room_id, users_with_profile): + def add_profiles_to_user_dir(self, users_with_profile): """Add profiles to the user directory Args: - room_id (str): A room_id that all users are joined to users_with_profile (dict): Users to add to directory in the form of mapping of user_id -> ProfileInfo """ + if isinstance(self.database_engine, PostgresEngine): # We weight the loclpart most highly, then display name and finally # server name @@ -113,7 +96,7 @@ class UserDirectoryStore(SQLBaseStore): INSERT INTO user_directory_search(user_id, value) VALUES (?,?) """ - args = ( + args = tuple( ( user_id, "%s %s" % (user_id, p.display_name) if p.display_name else user_id, @@ -132,7 +115,7 @@ class UserDirectoryStore(SQLBaseStore): values=[ { "user_id": user_id, - "room_id": room_id, + "room_id": None, "display_name": profile.display_name, "avatar_url": profile.avatar_url, } @@ -250,16 +233,6 @@ class UserDirectoryStore(SQLBaseStore): "update_profile_in_user_dir", _update_profile_in_user_dir_txn ) - @defer.inlineCallbacks - def update_user_in_public_user_list(self, user_id, room_id): - yield self._simple_update_one( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - updatevalues={"room_id": room_id}, - desc="update_user_in_public_user_list", - ) - self.get_user_in_public_room.invalidate((user_id,)) - def remove_from_user_dir(self, user_id): def _remove_from_user_dir_txn(txn): self._simple_delete_txn( @@ -269,62 +242,50 @@ class UserDirectoryStore(SQLBaseStore): txn, table="user_directory_search", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + txn, + table="users_who_share_public_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_public_rooms", + keyvalues={"other_user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id}, ) txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - txn.call_after(self.get_user_in_public_room.invalidate, (user_id,)) return self.runInteraction("remove_from_user_dir", _remove_from_user_dir_txn) - @defer.inlineCallbacks - def remove_from_user_in_public_room(self, user_id): - yield self._simple_delete( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - desc="remove_from_user_in_public_room", - ) - self.get_user_in_public_room.invalidate((user_id,)) - - def get_users_in_public_due_to_room(self, room_id): - """Get all user_ids that are in the room directory because they're - in the given room_id - """ - return self._simple_select_onecol( - table="users_in_public_rooms", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_public_due_to_room", - ) - @defer.inlineCallbacks def get_users_in_dir_due_to_room(self, room_id): """Get all user_ids that are in the room directory because they're in the given room_id """ - user_ids_dir = yield self._simple_select_onecol( - table="user_directory", - keyvalues={"room_id": room_id}, - retcol="user_id", - desc="get_users_in_dir_due_to_room", - ) - - user_ids_pub = yield self._simple_select_onecol( - table="users_in_public_rooms", + user_ids_share_pub = yield self._simple_select_onecol( + table="users_who_share_public_rooms", keyvalues={"room_id": room_id}, - retcol="user_id", + retcol="other_user_id", desc="get_users_in_dir_due_to_room", ) - user_ids_share = yield self._simple_select_onecol( - table="users_who_share_rooms", + user_ids_share_priv = yield self._simple_select_onecol( + table="users_who_share_private_rooms", keyvalues={"room_id": room_id}, - retcol="user_id", + retcol="other_user_id", desc="get_users_in_dir_due_to_room", ) - user_ids = set(user_ids_dir) - user_ids.update(user_ids_pub) - user_ids.update(user_ids_share) + user_ids = set(user_ids_share_pub) + user_ids.update(user_ids_share_priv) defer.returnValue(user_ids) @@ -351,7 +312,7 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue([name for name, in rows]) def add_users_who_share_room(self, room_id, share_private, user_id_tuples): - """Insert entries into the users_who_share_rooms table. The first + """Insert entries into the users_who_share_*_rooms table. The first user should be a local user. Args: @@ -361,109 +322,71 @@ class UserDirectoryStore(SQLBaseStore): """ def _add_users_who_share_room_txn(txn): - self._simple_insert_many_txn( + + if share_private: + tbl = "users_who_share_private_rooms" + else: + tbl = "users_who_share_public_rooms" + + self._simple_upsert_many_txn( txn, - table="users_who_share_rooms", - values=[ - { - "user_id": user_id, - "other_user_id": other_user_id, - "room_id": room_id, - "share_private": share_private, - } + table=tbl, + key_names=["user_id", "other_user_id", "room_id"], + key_values=[ + (user_id, other_user_id, room_id) for user_id, other_user_id in user_id_tuples ], + value_names=(), + value_values=None, ) for user_id, other_user_id in user_id_tuples: txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) - ) return self.runInteraction( "add_users_who_share_room", _add_users_who_share_room_txn ) - def update_users_who_share_room(self, room_id, share_private, user_id_sets): - """Updates entries in the users_who_share_rooms table. The first - user should be a local user. - - Args: - room_id (str) - share_private (bool): Is the room private - user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. + def remove_user_who_share_room(self, user_id, room_id): """ - - def _update_users_who_share_room_txn(txn): - sql = """ - UPDATE users_who_share_rooms - SET room_id = ?, share_private = ? - WHERE user_id = ? AND other_user_id = ? - """ - txn.executemany( - sql, ((room_id, share_private, uid, oid) for uid, oid in user_id_sets) - ) - for user_id, other_user_id in user_id_sets: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) - ) - - return self.runInteraction( - "update_users_who_share_room", _update_users_who_share_room_txn - ) - - def remove_user_who_share_room(self, user_id, other_user_id): - """Deletes entries in the users_who_share_rooms table. The first + Deletes entries in the users_who_share_*_rooms table. The first user should be a local user. Args: + user_id (str) room_id (str) - share_private (bool): Is the room private - user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _remove_user_who_share_room_txn(txn): self._simple_delete_txn( txn, - table="users_who_share_rooms", - keyvalues={"user_id": user_id, "other_user_id": other_user_id}, + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id, "room_id": room_id}, ) - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) + self._simple_delete_txn( + txn, + table="users_who_share_private_rooms", + keyvalues={"other_user_id": user_id, "room_id": room_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_public_rooms", + keyvalues={"user_id": user_id, "room_id": room_id}, + ) + self._simple_delete_txn( + txn, + table="users_who_share_public_rooms", + keyvalues={"other_user_id": user_id, "room_id": room_id}, ) txn.call_after( - self.get_if_users_share_a_room.invalidate, (user_id, other_user_id) + self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) - @cached(max_entries=500000) - def get_if_users_share_a_room(self, user_id, other_user_id): - """Gets if users share a room. - - Args: - user_id (str): Must be a local user_id - other_user_id (str) - - Returns: - bool|None: None if they don't share a room, otherwise whether they - share a private room or not. - """ - return self._simple_select_one_onecol( - table="users_who_share_rooms", - keyvalues={"user_id": user_id, "other_user_id": other_user_id}, - retcol="share_private", - allow_none=True, - desc="get_if_users_share_a_room", - ) - @cachedInlineCallbacks(max_entries=500000, iterable=True) def get_users_who_share_room_from_dir(self, user_id): """Returns the set of users who share a room with `user_id` @@ -472,32 +395,29 @@ class UserDirectoryStore(SQLBaseStore): user_id(str): Must be a local user Returns: - dict: user_id -> share_private mapping + list: user_id """ - rows = yield self._simple_select_list( - table="users_who_share_rooms", + rows = yield self._simple_select_onecol( + table="users_who_share_private_rooms", + keyvalues={"user_id": user_id}, + retcol="other_user_id", + desc="get_users_who_share_room_with_user", + ) + + pub_rows = yield self._simple_select_onecol( + table="users_who_share_public_rooms", keyvalues={"user_id": user_id}, - retcols=("other_user_id", "share_private"), + retcol="other_user_id", desc="get_users_who_share_room_with_user", ) - defer.returnValue({row["other_user_id"]: row["share_private"] for row in rows}) + users = set(pub_rows) + users.update(rows) - def get_users_in_share_dir_with_room_id(self, user_id, room_id): - """Get all user tuples that are in the users_who_share_rooms due to the - given room_id. + # Remove the user themselves from this list. + users.discard(user_id) - Returns: - [(user_id, other_user_id)]: where one of the two will match the given - user_id. - """ - sql = """ - SELECT user_id, other_user_id FROM users_who_share_rooms - WHERE room_id = ? AND (user_id = ? OR other_user_id = ?) - """ - return self._execute( - "get_users_in_share_dir_with_room_id", None, sql, room_id, user_id, user_id - ) + defer.returnValue(list(users)) @defer.inlineCallbacks def get_rooms_in_common_for_users(self, user_id, other_user_id): @@ -532,12 +452,10 @@ class UserDirectoryStore(SQLBaseStore): def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") - txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_rooms") + txn.execute("DELETE FROM users_who_share_public_rooms") + txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) - txn.call_after(self.get_user_in_public_room.invalidate_all) txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) - txn.call_after(self.get_if_users_share_a_room.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -548,21 +466,11 @@ class UserDirectoryStore(SQLBaseStore): return self._simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, - retcols=("room_id", "display_name", "avatar_url"), + retcols=("display_name", "avatar_url"), allow_none=True, desc="get_user_in_directory", ) - @cached() - def get_user_in_public_room(self, user_id): - return self._simple_select_one( - table="users_in_public_rooms", - keyvalues={"user_id": user_id}, - retcols=("room_id",), - allow_none=True, - desc="get_user_in_public_room", - ) - def get_user_directory_stream_pos(self): return self._simple_select_one_onecol( table="user_directory_stream_pos", @@ -660,14 +568,15 @@ class UserDirectoryStore(SQLBaseStore): where_clause = "1=1" else: join_clause = """ - LEFT JOIN users_in_public_rooms AS p USING (user_id) LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_rooms - WHERE user_id = ? AND share_private - ) AS s USING (user_id) + SELECT other_user_id AS user_id FROM users_who_share_public_rooms + UNION + SELECT other_user_id AS user_id FROM users_who_share_private_rooms + WHERE user_id = ? + ) AS p USING (user_id) """ join_args = (user_id,) - where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" + where_clause = "p.user_id IS NOT NULL" if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -686,7 +595,7 @@ class UserDirectoryStore(SQLBaseStore): %s AND vector @@ to_tsquery('english', ?) ORDER BY - (CASE WHEN s.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) + (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END) * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END) * ( diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 11f2bae698..a16a2dc67b 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -14,78 +14,262 @@ # limitations under the License. from mock import Mock -from twisted.internet import defer - from synapse.api.constants import UserTypes -from synapse.handlers.user_directory import UserDirectoryHandler +from synapse.rest.client.v1 import admin, login, room from synapse.storage.roommember import ProfileInfo from tests import unittest -from tests.utils import setup_test_homeserver -class UserDirectoryHandlers(object): - def __init__(self, hs): - self.user_directory_handler = UserDirectoryHandler(hs) +class UserDirectoryTestCase(unittest.HomeserverTestCase): + """ + Tests the UserDirectoryHandler. + """ + servlets = [ + login.register_servlets, + admin.register_servlets, + room.register_servlets, + ] -class UserDirectoryTestCase(unittest.TestCase): - """ Tests the UserDirectoryHandler. """ + def make_homeserver(self, reactor, clock): - @defer.inlineCallbacks - def setUp(self): - hs = yield setup_test_homeserver(self.addCleanup) - self.store = hs.get_datastore() - hs.handlers = UserDirectoryHandlers(hs) + config = self.default_config() + config.update_user_directory = True + return self.setup_test_homeserver(config=config) - self.handler = hs.get_handlers().user_directory_handler + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.handler = hs.get_user_directory_handler() - @defer.inlineCallbacks def test_handle_local_profile_change_with_support_user(self): support_user_id = "@support:test" - yield self.store.register( - user_id=support_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT + self.get_success( + self.store.register( + user_id=support_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT, + ) ) - yield self.handler.handle_local_profile_change(support_user_id, None) - profile = yield self.store.get_user_in_directory(support_user_id) + self.get_success( + self.handler.handle_local_profile_change(support_user_id, None) + ) + profile = self.get_success(self.store.get_user_in_directory(support_user_id)) self.assertTrue(profile is None) display_name = 'display_name' - profile_info = ProfileInfo( - avatar_url='avatar_url', - display_name=display_name, - ) + profile_info = ProfileInfo(avatar_url='avatar_url', display_name=display_name) regular_user_id = '@regular:test' - yield self.handler.handle_local_profile_change(regular_user_id, profile_info) - profile = yield self.store.get_user_in_directory(regular_user_id) + self.get_success( + self.handler.handle_local_profile_change(regular_user_id, profile_info) + ) + profile = self.get_success(self.store.get_user_in_directory(regular_user_id)) self.assertTrue(profile['display_name'] == display_name) - @defer.inlineCallbacks def test_handle_user_deactivated_support_user(self): s_user_id = "@support:test" - self.store.register( - user_id=s_user_id, - token="123", - password_hash=None, - user_type=UserTypes.SUPPORT + self.get_success( + self.store.register( + user_id=s_user_id, + token="123", + password_hash=None, + user_type=UserTypes.SUPPORT, + ) ) self.store.remove_from_user_dir = Mock() self.store.remove_from_user_in_public_room = Mock() - yield self.handler.handle_user_deactivated(s_user_id) + self.get_success(self.handler.handle_user_deactivated(s_user_id)) self.store.remove_from_user_dir.not_called() self.store.remove_from_user_in_public_room.not_called() - @defer.inlineCallbacks def test_handle_user_deactivated_regular_user(self): r_user_id = "@regular:test" - self.store.register(user_id=r_user_id, token="123", password_hash=None) + self.get_success( + self.store.register(user_id=r_user_id, token="123", password_hash=None) + ) self.store.remove_from_user_dir = Mock() - self.store.remove_from_user_in_public_room = Mock() - yield self.handler.handle_user_deactivated(r_user_id) + self.get_success(self.handler.handle_user_deactivated(r_user_id)) self.store.remove_from_user_dir.called_once_with(r_user_id) - self.store.remove_from_user_in_public_room.assert_called_once_with(r_user_id) + + def test_private_room(self): + """ + A user can be searched for only by people that are either in a public + room, or that share a private chat. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + u3 = self.register_user("user3", "pass") + + # We do not add users to the directory until they join a room. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + # Check we have populated the database correctly. + shares_public = self.get_users_who_share_public_rooms() + shares_private = self.get_users_who_share_private_rooms() + + self.assertEqual(shares_public, []) + self.assertEqual( + self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) + ) + + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + + # We get NO search results when searching for user2 by user3. + s = self.get_success(self.handler.search_users(u3, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + # We get NO search results when searching for user3 by user1. + s = self.get_success(self.handler.search_users(u1, "user3", 10)) + self.assertEqual(len(s["results"]), 0) + + # User 2 then leaves. + self.helper.leave(room, user=u2, tok=u2_token) + + # Check we have removed the values. + shares_public = self.get_users_who_share_public_rooms() + shares_private = self.get_users_who_share_private_rooms() + + self.assertEqual(shares_public, []) + self.assertEqual(self._compress_shared(shares_private), set()) + + # User1 now gets no search results for any of the other users. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + + s = self.get_success(self.handler.search_users(u1, "user3", 10)) + self.assertEqual(len(s["results"]), 0) + + def _compress_shared(self, shared): + """ + Compress a list of users who share rooms dicts to a list of tuples. + """ + r = set() + for i in shared: + r.add((i["user_id"], i["other_user_id"], i["room_id"])) + return r + + def get_users_who_share_public_rooms(self): + return self.get_success( + self.store._simple_select_list( + "users_who_share_public_rooms", + None, + ["user_id", "other_user_id", "room_id"], + ) + ) + + def get_users_who_share_private_rooms(self): + return self.get_success( + self.store._simple_select_list( + "users_who_share_private_rooms", + None, + ["user_id", "other_user_id", "room_id"], + ) + ) + + def test_initial(self): + """ + The user directory's initial handler correctly updates the search tables. + """ + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + u3 = self.register_user("user3", "pass") + u3_token = self.login(u3, "pass") + + room = self.helper.create_room_as(u1, is_public=True, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + private_room = self.helper.create_room_as(u1, is_public=False, tok=u1_token) + self.helper.invite(private_room, src=u1, targ=u3, tok=u1_token) + self.helper.join(private_room, user=u3, tok=u3_token) + + self.get_success(self.store.update_user_directory_stream_pos(None)) + self.get_success(self.store.delete_all_from_user_dir()) + + shares_public = self.get_users_who_share_public_rooms() + shares_private = self.get_users_who_share_private_rooms() + + self.assertEqual(shares_private, []) + self.assertEqual(shares_public, []) + + # Reset the handled users caches + self.handler.initially_handled_users = set() + + # Do the initial population + d = self.handler._do_initial_spam() + + # This takes a while, so pump it a bunch of times to get through the + # sleep delays + for i in range(10): + self.pump(1) + + self.get_success(d) + + shares_public = self.get_users_who_share_public_rooms() + shares_private = self.get_users_who_share_private_rooms() + + # User 1 and User 2 share public rooms + self.assertEqual( + self._compress_shared(shares_public), set([(u1, u2, room), (u2, u1, room)]) + ) + + # User 1 and User 3 share private rooms + self.assertEqual( + self._compress_shared(shares_private), + set([(u1, u3, private_room), (u3, u1, private_room)]), + ) + + def test_search_all_users(self): + """ + Search all users = True means that a user does not have to share a + private room with the searching user or be in a public room to be search + visible. + """ + self.handler.search_all_users = True + self.hs.config.user_directory_search_all_users = True + + u1 = self.register_user("user1", "pass") + u1_token = self.login(u1, "pass") + u2 = self.register_user("user2", "pass") + u2_token = self.login(u2, "pass") + u3 = self.register_user("user3", "pass") + + # User 1 and User 2 join a room. User 3 never does. + room = self.helper.create_room_as(u1, is_public=True, tok=u1_token) + self.helper.invite(room, src=u1, targ=u2, tok=u1_token) + self.helper.join(room, user=u2, tok=u2_token) + + self.get_success(self.store.update_user_directory_stream_pos(None)) + self.get_success(self.store.delete_all_from_user_dir()) + + # Reset the handled users caches + self.handler.initially_handled_users = set() + + # Do the initial population + d = self.handler._do_initial_spam() + + # This takes a while, so pump it a bunch of times to get through the + # sleep delays + for i in range(10): + self.pump(1) + + self.get_success(d) + + # Despite not sharing a room, search_all_users means we get a search + # result. + s = self.get_success(self.handler.search_users(u1, u3, 10)) + self.assertEqual(len(s["results"]), 1) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 0dde1ab2fe..a2a652a235 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -35,14 +35,12 @@ class UserDirectoryStoreTestCase(unittest.TestCase): # alice and bob are both in !room_id. bobby is not but shares # a homeserver with alice. yield self.store.add_profiles_to_user_dir( - "!room:id", { ALICE: ProfileInfo(None, "alice"), BOB: ProfileInfo(None, "bob"), BOBBY: ProfileInfo(None, "bobby"), }, ) - yield self.store.add_users_to_public_room("!room:id", [ALICE, BOB]) yield self.store.add_users_who_share_room( "!room:id", False, ((ALICE, BOB), (BOB, ALICE)) ) -- cgit 1.5.1 From c633fc02d72e325ab9689f3f27edb86ef93cec0c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 7 Mar 2019 15:53:14 +0000 Subject: Add some debug logging for device list handling --- synapse/handlers/device.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c09a7c6280..03644a93cc 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -402,6 +402,12 @@ class DeviceHandler(DeviceWorkerHandler): user_id, device_ids, list(hosts) ) + for device_id in device_ids: + logger.debug( + "Notifying about update %r/%r, ID: %r", user_id, device_id, + position, + ) + room_ids = yield self.store.get_rooms_for_user(user_id) yield self.notifier.on_new_event( @@ -409,7 +415,7 @@ class DeviceHandler(DeviceWorkerHandler): ) if hosts: - logger.info("Sending device list update notif to: %r", hosts) + logger.info("Sending device list update notif for %r to: %r", user_id, hosts) for host in hosts: self.federation_sender.send_device_messages(host) @@ -479,15 +485,26 @@ class DeviceListEduUpdater(object): if get_domain_from_id(user_id) != origin: # TODO: Raise? - logger.warning("Got device list update edu for %r from %r", user_id, origin) + logger.warning( + "Got device list update edu for %r/%r from %r", + user_id, device_id, origin, + ) return room_ids = yield self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we # probably won't get any further updates. + logger.warning( + "Got device list update edu for %r/%r, but don't share a room", + user_id, device_id, + ) return + logger.debug( + "Received device list update for %r/%r", user_id, device_id, + ) + self._pending_updates.setdefault(user_id, []).append( (device_id, stream_id, prev_ids, edu_content) ) @@ -505,10 +522,18 @@ class DeviceListEduUpdater(object): # This can happen since we batch updates return + for device_id, stream_id, prev_ids, content in pending_updates: + logger.debug( + "Handling update %r/%r, ID: %r, prev: %r ", + user_id, device_id, stream_id, prev_ids, + ) + # Given a list of updates we check if we need to resync. This # happens if we've missed updates. resync = yield self._need_to_do_resync(user_id, pending_updates) + logger.debug("Need to re-sync devices for %r? %r", user_id, resync) + if resync: # Fetch all devices for the user. origin = get_domain_from_id(user_id) @@ -561,6 +586,12 @@ class DeviceListEduUpdater(object): ) devices = [] + for device in devices: + logger.debug( + "Handling resync update %r/%r, ID: %r", + user_id, device["device_id"], stream_id, + ) + yield self.store.update_remote_device_list_cache( user_id, devices, stream_id, ) @@ -593,6 +624,11 @@ class DeviceListEduUpdater(object): user_id ) + logger.debug( + "Current extremity for %r: %r", + user_id, extremity, + ) + stream_id_in_updates = set() # stream_ids in updates list for _, stream_id, prev_ids, _ in updates: if not prev_ids: -- cgit 1.5.1 From d42b41544a3d8950f2a804703aa4ad311e9feddd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 7 Mar 2019 16:04:24 +0000 Subject: When re-syncing device lists reset the state We keep track of what stream IDs we've seen so that we know what updates we've handled or missed. If we re-sync we don't know if the updates we've seen are included in the re-sync (there may be a race), so we should reset the seen updates. --- synapse/handlers/device.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c09a7c6280..00f12ba40d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -566,6 +566,10 @@ class DeviceListEduUpdater(object): ) device_ids = [device["device_id"] for device in devices] yield self.device_handler.notify_device_update(user_id, device_ids) + + # We clobber the seen updates since we've re-synced from a given + # point. + self._seen_updates[user_id] = set([stream_id]) else: # Simply update the single device, since we know that is the only # change (because of the single prev_id matching the current cache) @@ -578,9 +582,9 @@ class DeviceListEduUpdater(object): user_id, [device_id for device_id, _, _, _ in pending_updates] ) - self._seen_updates.setdefault(user_id, set()).update( - stream_id for _, stream_id, _, _ in pending_updates - ) + self._seen_updates.setdefault(user_id, set()).update( + stream_id for _, stream_id, _, _ in pending_updates + ) @defer.inlineCallbacks def _need_to_do_resync(self, user_id, updates): -- cgit 1.5.1 From 0ff8163eae50626dc7bc07eda7638f9f5fed5b6e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Mar 2019 11:26:33 +0000 Subject: Factor out soft fail checks --- synapse/handlers/federation.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index a75abe8e91..9eaf2d3e18 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1847,6 +1847,28 @@ class FederationHandler(BaseHandler): context.rejected = RejectedReason.AUTH_ERROR + if not context.rejected: + yield self._check_for_soft_fail(event, state, backfilled) + + if event.type == EventTypes.GuestAccess and not context.rejected: + yield self.maybe_kick_guest_users(event) + + defer.returnValue(context) + + @defer.inlineCallbacks + def _check_for_soft_fail(self, event, state, backfilled): + """Checks if we should soft fail the event, if so marks the event as + such. + + Args: + event (FrozenEvent) + state (dict|None): The state at the event if we don't have all the + event's prev events + backfilled (bool): Whether the event is from backfill + + Returns: + Deferred + """ # For new (non-backfilled and non-outlier) events we check if the event # passes auth based on the current state. If it doesn't then we # "soft-fail" the event. @@ -1918,11 +1940,6 @@ class FederationHandler(BaseHandler): ) event.internal_metadata.soft_failed = True - if event.type == EventTypes.GuestAccess and not context.rejected: - yield self.maybe_kick_guest_users(event) - - defer.returnValue(context) - @defer.inlineCallbacks def on_query_auth(self, origin, event_id, room_id, remote_auth_chain, rejects, missing): -- cgit 1.5.1 From 5536ddba754fcdd65f8286c75829bf7860a393a5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Mar 2019 15:05:32 +0000 Subject: Make `prev_state` field optional The `prev_state` field on events is not specced and so synapse shouldn't explode if an event is missing the field. Fixes #4787 --- synapse/events/__init__.py | 1 - synapse/storage/events.py | 15 --------------- 2 files changed, 16 deletions(-) (limited to 'synapse') diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index bd130f8816..fafa135182 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -141,7 +141,6 @@ class EventBase(object): origin = _event_dict_property("origin") origin_server_ts = _event_dict_property("origin_server_ts") prev_events = _event_dict_property("prev_events") - prev_state = _event_dict_property("prev_state") redacts = _event_dict_property("redacts") room_id = _event_dict_property("room_id") sender = _event_dict_property("sender") diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 990a5eaaae..428300ea0a 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1407,21 +1407,6 @@ class EventsStore(StateGroupWorkerStore, EventFederationStore, EventsWorkerStore values=state_values, ) - self._simple_insert_many_txn( - txn, - table="event_edges", - values=[ - { - "event_id": event.event_id, - "prev_event_id": prev_id, - "room_id": event.room_id, - "is_state": True, - } - for event, _ in state_events_and_contexts - for prev_id, _ in event.prev_state - ], - ) - # Prefill the event cache self._add_to_cache(txn, events_and_contexts) -- cgit 1.5.1 From fe6c12e6cdbc2b34e93f6211c51e1eab33ae00c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Mar 2019 16:38:23 +0000 Subject: Add comment to schema --- synapse/storage/schema/full_schemas/11/event_edges.sql | 2 ++ 1 file changed, 2 insertions(+) (limited to 'synapse') diff --git a/synapse/storage/schema/full_schemas/11/event_edges.sql b/synapse/storage/schema/full_schemas/11/event_edges.sql index 52eec88357..bccd1c6f74 100644 --- a/synapse/storage/schema/full_schemas/11/event_edges.sql +++ b/synapse/storage/schema/full_schemas/11/event_edges.sql @@ -37,6 +37,8 @@ CREATE TABLE IF NOT EXISTS event_edges( event_id TEXT NOT NULL, prev_event_id TEXT NOT NULL, room_id TEXT NOT NULL, + -- We no longer insert prev_state into this table, so all new rows will have + -- is_state as false. is_state BOOL NOT NULL, UNIQUE (event_id, prev_event_id, room_id, is_state) ); -- cgit 1.5.1 From 2326e00bc43d61e18a5ba49e22d00da0b04c3693 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 11 Mar 2019 10:53:45 +0100 Subject: fix incorrect encoding of filenames with spaces in (#2090) fixes https://github.com/vector-im/riot-web/issues/3155 --- changelog.d/2090.bugfix | 1 + synapse/rest/media/v1/_base.py | 54 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 changelog.d/2090.bugfix (limited to 'synapse') diff --git a/changelog.d/2090.bugfix b/changelog.d/2090.bugfix new file mode 100644 index 0000000000..de2d22fcb8 --- /dev/null +++ b/changelog.d/2090.bugfix @@ -0,0 +1 @@ +Fix a bug where media with spaces in the name would get a corrupted name. diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index fece1ef0b8..953d89bd82 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -100,10 +100,29 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Content-Type", media_type.encode("UTF-8")) if upload_name: - if is_ascii(upload_name): - disposition = "inline; filename=%s" % (_quote(upload_name),) + # RFC6266 section 4.1 [1] defines both `filename` and `filename*`. + # + # `filename` is defined to be a `value`, which is defined by RFC2616 + # section 3.6 [2] to be a `token` or a `quoted-string`, where a `token` + # is (essentially) a single US-ASCII word, and a `quoted-string` is a + # US-ASCII string surrounded by double-quotes, using backslash as an + # escape charater. Note that %-encoding is *not* permitted. + # + # `filename*` is defined to be an `ext-value`, which is defined in + # RFC5987 section 3.2.1 [3] to be `charset "'" [ language ] "'" value-chars`, + # where `value-chars` is essentially a %-encoded string in the given charset. + # + # [1]: https://tools.ietf.org/html/rfc6266#section-4.1 + # [2]: https://tools.ietf.org/html/rfc2616#section-3.6 + # [3]: https://tools.ietf.org/html/rfc5987#section-3.2.1 + + # We avoid the quoted-string version of `filename`, because (a) synapse didn't + # correctly interpret those as of 0.99.2 and (b) they are a bit of a pain and we + # may as well just do the filename* version. + if _can_encode_filename_as_token(upload_name): + disposition = 'inline; filename=%s' % (upload_name, ) else: - disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),) + disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name), ) request.setHeader(b"Content-Disposition", disposition.encode('ascii')) @@ -116,6 +135,35 @@ def add_file_headers(request, media_type, file_size, upload_name): request.setHeader(b"Content-Length", b"%d" % (file_size,)) +# separators as defined in RFC2616. SP and HT are handled separately. +# see _can_encode_filename_as_token. +_FILENAME_SEPARATOR_CHARS = set(( + "(", ")", "<", ">", "@", ",", ";", ":", "\\", '"', + "/", "[", "]", "?", "=", "{", "}", +)) + + +def _can_encode_filename_as_token(x): + for c in x: + # from RFC2616: + # + # token = 1* + # + # separators = "(" | ")" | "<" | ">" | "@" + # | "," | ";" | ":" | "\" | <"> + # | "/" | "[" | "]" | "?" | "=" + # | "{" | "}" | SP | HT + # + # CHAR = + # + # CTL = + # + if ord(c) >= 127 or ord(c) <= 32 or c in _FILENAME_SEPARATOR_CHARS: + return False + return True + + @defer.inlineCallbacks def respond_with_responder(request, responder, media_type, file_size, upload_name=None): """Responds to the request with given responder. If responder is None then -- cgit 1.5.1 From 4abc988c6a1020a8f9e5d3aec92f4b817f6e352e Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 11 Mar 2019 21:11:36 +1100 Subject: initial --- synapse/handlers/user_directory.py | 41 +++++++++++++++++++++- synapse/storage/schema/delta/53/user_share.sql | 3 -- .../schema/delta/53/users_in_public_rooms.sql | 28 +++++++++++++++ synapse/storage/user_directory.py | 34 ++++++++++++++++++ tests/handlers/test_user_directory.py | 12 +++++++ 5 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 synapse/storage/schema/delta/53/users_in_public_rooms.sql (limited to 'synapse') diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index c21da8343a..fc45123d0c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -64,6 +64,10 @@ class UserDirectoryHandler(object): # This is a set of user_id's we've inserted already self.initially_handled_users = set() + self.register_background_update_handler( + "users_in_public_rooms_initial", self._populate_users_in_public_rooms + ) + # The current position in the current_state_delta stream self.pos = None @@ -77,6 +81,41 @@ class UserDirectoryHandler(object): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) + @defer.inlineCallbacks + def _populate_users_in_public_rooms(self, progress, batch_size): + """ + Populate the users_in_public_rooms table with the contents of the + users_who_share_public_rooms table. + """ + + def _fetch(txn): + sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" + txn.execute(sql) + return txn.fetchall() + + users = yield self.store.runInteraction( + "populate_users_in_public_rooms_fetch", _fetch + ) + + if users: + + def _fill(txn): + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id"], + key_values=users, + value_names=(), + value_values=None, + ) + + users = yield self.store.runInteraction( + "populate_users_in_public_rooms_fill", _fill + ) + + yield self._end_background_update("users_in_public_rooms_initial") + defer.returnValue(1) + def search_users(self, user_id, search_term, limit): """Searches for users in directory @@ -231,7 +270,7 @@ class UserDirectoryHandler(object): unhandled_users = user_ids - self.initially_handled_users yield self.store.add_profiles_to_user_dir( - {user_id: users_with_profile[user_id] for user_id in unhandled_users}, + {user_id: users_with_profile[user_id] for user_id in unhandled_users} ) self.initially_handled_users |= unhandled_users diff --git a/synapse/storage/schema/delta/53/user_share.sql b/synapse/storage/schema/delta/53/user_share.sql index 14424ded0c..5831b1a6f8 100644 --- a/synapse/storage/schema/delta/53/user_share.sql +++ b/synapse/storage/schema/delta/53/user_share.sql @@ -16,9 +16,6 @@ -- Old disused version of the tables below. DROP TABLE IF EXISTS users_who_share_rooms; --- This is no longer used because it's duplicated by the users_who_share_public_rooms -DROP TABLE IF EXISTS users_in_public_rooms; - -- Tables keeping track of what users share rooms. This is a map of local users -- to local or remote users, per room. Remote users cannot be in the user_id -- column, only the other_user_id column. There are two tables, one for public diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql new file mode 100644 index 0000000000..bd57fd778b --- /dev/null +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -0,0 +1,28 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We don't need the old version of this table. +DROP TABLE IF EXISTS users_in_public_rooms; + +-- Track what users are in public rooms. +CREATE TABLE IF NOT EXISTS users_in_public_rooms ( + user_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id); + +-- Fill the table. +INSERT INTO background_updates (update_name, progress_json) VALUES + ('users_in_public_rooms_initial', '{}'); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 2317d22ed6..8f40277b50 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -241,6 +241,9 @@ class UserDirectoryStore(SQLBaseStore): self._simple_delete_txn( txn, table="user_directory_search", keyvalues={"user_id": user_id} ) + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) self._simple_delete_txn( txn, table="users_who_share_public_rooms", @@ -339,6 +342,21 @@ class UserDirectoryStore(SQLBaseStore): value_names=(), value_values=None, ) + + # If it's a public room, also update them in users_in_public_rooms. + # We don't look before they're in the table before we do it, as it's + # more efficient to simply have Postgres do that (one UPSERT vs one + # SELECT and maybe one INSERT). + if not share_private: + for user_id in set([x[1] for x in user_id_tuples]): + self._simple_upsert_txn( + txn, + "users_in_public_rooms", + keyvalues={"user_id": user_id}, + values={}, + desc="add_user_as_in_public_room", + ) + for user_id, other_user_id in user_id_tuples: txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,) @@ -379,6 +397,21 @@ class UserDirectoryStore(SQLBaseStore): table="users_who_share_public_rooms", keyvalues={"other_user_id": user_id, "room_id": room_id}, ) + + # Are the users still in a public room after we deleted them from this one? + still_in_public = self._simple_select_one_onecol_txn( + txn, + "users_who_share_public_rooms", + keyvalues={"other_user_id": user_id}, + retcol="other_user_id", + allow_none=True, + ) + + if still_in_public is None: + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) + txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) @@ -452,6 +485,7 @@ class UserDirectoryStore(SQLBaseStore): def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") + txn.execute("DELETE FROM users_in_public_rooms") txn.execute("DELETE FROM users_who_share_public_rooms") txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index a16a2dc67b..0e0ac0a48b 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -121,6 +121,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) + self.assertEqual(set(public_users), set([u1, u2])) # We get one search result when searching for user2 by user1. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -140,9 +141,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have removed the values. shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() self.assertEqual(shares_public, []) self.assertEqual(self._compress_shared(shares_private), set()) + self.assertEqual(public_users, [u1]) # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -160,6 +163,15 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): r.add((i["user_id"], i["other_user_id"], i["room_id"])) return r + def get_users_in_public_rooms(self): + return self.get_success( + self.store._simple_select_list( + "users_in_public_rooms", + None, + ["user_id"], + ) + ) + def get_users_who_share_public_rooms(self): return self.get_success( self.store._simple_select_list( -- cgit 1.5.1 From 8da22e2b53614aa42776f9709a4265320e240765 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 11 Mar 2019 21:13:35 +1100 Subject: master startup --- synapse/app/homeserver.py | 1 + synapse/server.py | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'synapse') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e8b6cc3114..e0431608e8 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -376,6 +376,7 @@ def setup(config_options): logger.info("Database prepared in %s.", config.database_config['name']) hs.setup() + hs.setup_master() @defer.inlineCallbacks def do_acme(): diff --git a/synapse/server.py b/synapse/server.py index 72835e8c86..c992bbaa5f 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -185,6 +185,10 @@ class HomeServer(object): 'registration_handler', ] + REQUIRED_ON_MASTER_STARTUP = [ + "user_directory_handler", + ] + # This is overridden in derived application classes # (such as synapse.app.homeserver.SynapseHomeServer) and gives the class to be # instantiated during setup() for future return by get_datastore() @@ -221,6 +225,10 @@ class HomeServer(object): conn.commit() logger.info("Finished setting up.") + def setup_master(self): + for i in self.REQUIRED_ON_MASTER_STARTUP: + getattr(self, "get_" + i)() + def get_reactor(self): """ Fetch the Twisted reactor in use by this HomeServer. -- cgit 1.5.1 From 5ba8ceab4cd7062d9f1b23a19d43f8a9ef7c5d60 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 00:35:31 +1100 Subject: fixes --- synapse/handlers/user_directory.py | 45 ++++-------------------------- synapse/storage/_base.py | 13 +++++++-- synapse/storage/user_directory.py | 52 +++++++++++++++++++++++++++++++---- tests/handlers/test_user_directory.py | 16 ++++++++--- 4 files changed, 74 insertions(+), 52 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index fc45123d0c..20a026e776 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -60,14 +60,16 @@ class UserDirectoryHandler(object): self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users + # If we're a worker, don't sleep when doing the initial room work, as it + # won't monopolise the master's CPU. + if hs.config.worker_app: + self.INITIAL_ROOM_SLEEP_MS = 0 + self.INITIAL_USER_SLEEP_MS = 0 + # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already self.initially_handled_users = set() - self.register_background_update_handler( - "users_in_public_rooms_initial", self._populate_users_in_public_rooms - ) - # The current position in the current_state_delta stream self.pos = None @@ -81,41 +83,6 @@ class UserDirectoryHandler(object): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) - @defer.inlineCallbacks - def _populate_users_in_public_rooms(self, progress, batch_size): - """ - Populate the users_in_public_rooms table with the contents of the - users_who_share_public_rooms table. - """ - - def _fetch(txn): - sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" - txn.execute(sql) - return txn.fetchall() - - users = yield self.store.runInteraction( - "populate_users_in_public_rooms_fetch", _fetch - ) - - if users: - - def _fill(txn): - self._simple_upsert_many_txn( - txn, - table="users_in_public_rooms", - key_names=["user_id"], - key_values=users, - value_names=(), - value_values=None, - ) - - users = yield self.store.runInteraction( - "populate_users_in_public_rooms_fill", _fill - ) - - yield self._end_background_update("users_in_public_rooms_initial") - defer.returnValue(1) - def search_users(self, user_id, search_term, limit): """Searches for users in directory diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a0333d5309..7e3903859b 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -767,18 +767,25 @@ class SQLBaseStore(object): """ allvalues = {} allvalues.update(keyvalues) - allvalues.update(values) allvalues.update(insertion_values) + if not values: + latter = "NOTHING" + else: + allvalues.update(values) + latter = ( + "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values) + ) + sql = ( "INSERT INTO %s (%s) VALUES (%s) " - "ON CONFLICT (%s) DO UPDATE SET %s" + "ON CONFLICT (%s) DO %s" ) % ( table, ", ".join(k for k in allvalues), ", ".join("?" for _ in allvalues), ", ".join(k for k in keyvalues), - ", ".join(k + "=EXCLUDED." + k for k in values), + latter ) txn.execute(sql, list(allvalues.values())) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 8f40277b50..a15366a117 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -22,16 +22,57 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore - logger = logging.getLogger(__name__) -class UserDirectoryStore(SQLBaseStore): +class UserDirectoryStore(BackgroundUpdateStore): + def __init__(self, dbconn, hs): + super(UserDirectoryStore, self).__init__(dbconn, hs) + + self.register_background_update_handler( + "users_in_public_rooms_initial", self._populate_users_in_public_rooms + ) + + + @defer.inlineCallbacks + def _populate_users_in_public_rooms(self, progress, batch_size): + """ + Populate the users_in_public_rooms table with the contents of the + users_who_share_public_rooms table. + """ + + def _fetch(txn): + sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" + txn.execute(sql) + return txn.fetchall() + + users = yield self.runInteraction( + "populate_users_in_public_rooms_fetch", _fetch + ) + + if users: + def _fill(txn): + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id"], + key_values=users, + value_names=(), + value_values=None, + ) + + users = yield self.runInteraction( + "populate_users_in_public_rooms_fill", _fill + ) + + yield self._end_background_update("users_in_public_rooms_initial") + defer.returnValue(1) + @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -353,8 +394,7 @@ class UserDirectoryStore(SQLBaseStore): txn, "users_in_public_rooms", keyvalues={"user_id": user_id}, - values={}, - desc="add_user_as_in_public_room", + values=None, ) for user_id, other_user_id in user_id_tuples: @@ -603,7 +643,7 @@ class UserDirectoryStore(SQLBaseStore): else: join_clause = """ LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_public_rooms + SELECT user_id FROM users_in_public_rooms UNION SELECT other_user_id AS user_id FROM users_who_share_private_rooms WHERE user_id = ? diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 0e0ac0a48b..7a78451a6d 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -116,12 +116,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have populated the database correctly. shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() self.assertEqual(shares_public, []) self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) - self.assertEqual(set(public_users), set([u1, u2])) + self.assertEqual(public_users, []) # We get one search result when searching for user2 by user1. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -145,7 +146,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual(shares_public, []) self.assertEqual(self._compress_shared(shares_private), set()) - self.assertEqual(public_users, [u1]) + self.assertEqual(public_users, []) # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -165,10 +166,10 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def get_users_in_public_rooms(self): return self.get_success( - self.store._simple_select_list( + self.store._simple_select_onecol( "users_in_public_rooms", None, - ["user_id"], + "user_id", ) ) @@ -214,9 +215,12 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + # Nothing updated yet self.assertEqual(shares_private, []) self.assertEqual(shares_public, []) + self.assertEqual(public_users, []) # Reset the handled users caches self.handler.initially_handled_users = set() @@ -233,6 +237,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() # User 1 and User 2 share public rooms self.assertEqual( @@ -245,6 +250,9 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): set([(u1, u3, private_room), (u3, u1, private_room)]), ) + # User 1 and 2 are in public rooms + self.assertEqual(set(public_users), set([u1, u2])) + def test_search_all_users(self): """ Search all users = True means that a user does not have to share a -- cgit 1.5.1 From 1b77bd69fbc95e51c31c3c1a2a648f2ecb9ccdb0 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 00:39:12 +1100 Subject: pep8 --- synapse/storage/user_directory.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index a15366a117..5d402189e8 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -21,8 +21,8 @@ from six import iteritems from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules -from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.background_updates import BackgroundUpdateStore +from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -38,7 +38,6 @@ class UserDirectoryStore(BackgroundUpdateStore): "users_in_public_rooms_initial", self._populate_users_in_public_rooms ) - @defer.inlineCallbacks def _populate_users_in_public_rooms(self, progress, batch_size): """ -- cgit 1.5.1 From 78a6b950b3258c3d3e11a0b4341d40ddc99748e2 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 00:50:28 +1100 Subject: fix --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 5d402189e8..745e6f26ec 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -393,7 +393,7 @@ class UserDirectoryStore(BackgroundUpdateStore): txn, "users_in_public_rooms", keyvalues={"user_id": user_id}, - values=None, + values={}, ) for user_id, other_user_id in user_id_tuples: -- cgit 1.5.1 From 78c563b77ce141f61b13e0a00f16687a15704224 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Mar 2019 14:11:10 +0000 Subject: Correctly log expected errors when fetching server keys --- synapse/crypto/keyring.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 7474fd515f..0207cd989a 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -686,9 +686,9 @@ def _handle_key_deferred(verify_request): try: with PreserveLoggingContext(): _, key_id, verify_key = yield verify_request.deferred - except (IOError, RequestSendFailed) as e: + except KeyLookupError as e: logger.warn( - "Got IOError when downloading keys for %s: %s %s", + "Failed to download keys for %s: %s %s", server_name, type(e).__name__, str(e), ) raise SynapseError( -- cgit 1.5.1 From 290552fd836f4ae2dc1d893a7f72f7fff85365d3 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 11 Mar 2019 17:44:03 +0000 Subject: Make federation endpoints more tolerant of trailing slashes for some endpoints (#4793) Server side of a solution towards #3622. --- changelog.d/4793.feature | 1 + synapse/federation/transport/client.py | 2 +- synapse/federation/transport/server.py | 14 +++++++------- tests/handlers/test_typing.py | 6 +++--- 4 files changed, 12 insertions(+), 11 deletions(-) create mode 100644 changelog.d/4793.feature (limited to 'synapse') diff --git a/changelog.d/4793.feature b/changelog.d/4793.feature new file mode 100644 index 0000000000..90dba7d122 --- /dev/null +++ b/changelog.d/4793.feature @@ -0,0 +1 @@ +Synapse is now permissive about trailing slashes on some of its federation endpoints, allowing zero or more to be present. \ No newline at end of file diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 8e2be218e2..4e8919d657 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -167,7 +167,7 @@ class TransportLayerClient(object): # generated by the json_data_callback. json_data = transaction.get_dict() - path = _create_v1_path("/send/%s/", transaction.transaction_id) + path = _create_v1_path("/send/%s", transaction.transaction_id) response = yield self.client.put_json( transaction.destination, diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 96d680a5ad..efb6bdca48 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -312,7 +312,7 @@ class BaseFederationServlet(object): class FederationSendServlet(BaseFederationServlet): - PATH = "/send/(?P[^/]*)/" + PATH = "/send/(?P[^/]*)/?" def __init__(self, handler, server_name, **kwargs): super(FederationSendServlet, self).__init__( @@ -378,7 +378,7 @@ class FederationSendServlet(BaseFederationServlet): class FederationEventServlet(BaseFederationServlet): - PATH = "/event/(?P[^/]*)/" + PATH = "/event/(?P[^/]*)/?" # This is when someone asks for a data item for a given server data_id pair. def on_GET(self, origin, content, query, event_id): @@ -386,7 +386,7 @@ class FederationEventServlet(BaseFederationServlet): class FederationStateServlet(BaseFederationServlet): - PATH = "/state/(?P[^/]*)/" + PATH = "/state/(?P[^/]*)/?" # This is when someone asks for all data for a given context. def on_GET(self, origin, content, query, context): @@ -398,7 +398,7 @@ class FederationStateServlet(BaseFederationServlet): class FederationStateIdsServlet(BaseFederationServlet): - PATH = "/state_ids/(?P[^/]*)/" + PATH = "/state_ids/(?P[^/]*)/?" def on_GET(self, origin, content, query, room_id): return self.handler.on_state_ids_request( @@ -409,7 +409,7 @@ class FederationStateIdsServlet(BaseFederationServlet): class FederationBackfillServlet(BaseFederationServlet): - PATH = "/backfill/(?P[^/]*)/" + PATH = "/backfill/(?P[^/]*)/?" def on_GET(self, origin, content, query, context): versions = [x.decode('ascii') for x in query[b"v"]] @@ -1080,7 +1080,7 @@ class FederationGroupsCategoriesServlet(BaseFederationServlet): """Get all categories for a group """ PATH = ( - "/groups/(?P[^/]*)/categories/" + "/groups/(?P[^/]*)/categories/?" ) @defer.inlineCallbacks @@ -1150,7 +1150,7 @@ class FederationGroupsRolesServlet(BaseFederationServlet): """Get roles in a group """ PATH = ( - "/groups/(?P[^/]*)/roles/" + "/groups/(?P[^/]*)/roles/?" ) @defer.inlineCallbacks diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 13486930fb..b8e97390de 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -180,7 +180,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): put_json = self.hs.get_http_client().put_json put_json.assert_called_once_with( "farm", - path="/_matrix/federation/v1/send/1000000/", + path="/_matrix/federation/v1/send/1000000", data=_expect_edu_transaction( "m.typing", content={ @@ -201,7 +201,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): (request, channel) = self.make_request( "PUT", - "/_matrix/federation/v1/send/1000000/", + "/_matrix/federation/v1/send/1000000", _make_edu_transaction_json( "m.typing", content={ @@ -257,7 +257,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): put_json = self.hs.get_http_client().put_json put_json.assert_called_once_with( "farm", - path="/_matrix/federation/v1/send/1000000/", + path="/_matrix/federation/v1/send/1000000", data=_expect_edu_transaction( "m.typing", content={ -- cgit 1.5.1 From 8ea1b41a0e353da2d3ec3b7f5ee3bd240261c668 Mon Sep 17 00:00:00 2001 From: Aaron Raimist Date: Mon, 11 Mar 2019 13:21:52 -0500 Subject: Clarify what registration_shared_secret allows for (#2885) (#4844) * Clarify what registration_shared_secret allows for (#2885) Signed-off-by: Aaron Raimist * Add changelog Signed-off-by: Aaron Raimist --- INSTALL.md | 4 ++-- changelog.d/4844.misc | 1 + docs/sample_config.yaml | 4 ++-- synapse/config/registration.py | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 changelog.d/4844.misc (limited to 'synapse') diff --git a/INSTALL.md b/INSTALL.md index 2993f3a9e2..5c67f14ed6 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -402,8 +402,8 @@ This process uses a setting `registration_shared_secret` in `homeserver.yaml`, which is shared between Synapse itself and the `register_new_matrix_user` script. It doesn't matter what it is (a random value is generated by `--generate-config`), but it should be kept secret, as -anyone with knowledge of it can register users on your server even if -`enable_registration` is `false`. +anyone with knowledge of it can register users, including admin accounts, +on your server even if `enable_registration` is `false`. ## Setting up a TURN server diff --git a/changelog.d/4844.misc b/changelog.d/4844.misc new file mode 100644 index 0000000000..eff6f1c43c --- /dev/null +++ b/changelog.d/4844.misc @@ -0,0 +1 @@ +Clarify what registration_shared_secret allows for. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index b62745dd6e..22d5e6b1d7 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -624,8 +624,8 @@ enable_registration: False # - medium: msisdn # pattern: '\+44' -# If set, allows registration by anyone who also has the shared -# secret, even if registration is otherwise disabled. +# If set, allows registration of standard or admin accounts by anyone who +# has the shared secret, even if registration is otherwise disabled. # # registration_shared_secret: diff --git a/synapse/config/registration.py b/synapse/config/registration.py index d34dc9e456..a123f25a68 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -92,8 +92,8 @@ class RegistrationConfig(Config): # - medium: msisdn # pattern: '\\+44' - # If set, allows registration by anyone who also has the shared - # secret, even if registration is otherwise disabled. + # If set, allows registration of standard or admin accounts by anyone who + # has the shared secret, even if registration is otherwise disabled. # %(registration_shared_secret)s -- cgit 1.5.1 From c980c7e31facdb33504051942857a0f67410f39a Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 17:51:14 +1100 Subject: use the old method --- synapse/storage/user_directory.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 745e6f26ec..72a9071d03 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,12 +641,11 @@ class UserDirectoryStore(BackgroundUpdateStore): where_clause = "1=1" else: join_clause = """ + LEFT JOIN users_in_public_rooms AS p USING (user_id) LEFT JOIN ( - SELECT user_id FROM users_in_public_rooms - UNION SELECT other_user_id AS user_id FROM users_who_share_private_rooms WHERE user_id = ? - ) AS p USING (user_id) + ) AS s USING (user_id) """ join_args = (user_id,) where_clause = "p.user_id IS NOT NULL" -- cgit 1.5.1 From 8b618041efc230c87c74b912640b8e5727fcc539 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 18:06:28 +1100 Subject: fixup --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 72a9071d03..4de552c1bb 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -648,7 +648,7 @@ class UserDirectoryStore(BackgroundUpdateStore): ) AS s USING (user_id) """ join_args = (user_id,) - where_clause = "p.user_id IS NOT NULL" + where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) -- cgit 1.5.1 From 10480c434881d9c38acc02c98ab4b85b98097870 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 21:47:14 +1100 Subject: fixup --- synapse/handlers/user_directory.py | 117 +++++++++------ .../schema/delta/53/users_in_public_rooms.sql | 17 ++- synapse/storage/user_directory.py | 165 +++++++-------------- tests/handlers/test_user_directory.py | 36 +++-- 4 files changed, 159 insertions(+), 176 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 20a026e776..f9f7b8abd0 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -247,38 +247,58 @@ class UserDirectoryHandler(object): # We also batch up inserts/updates, but try to avoid too many at once. to_insert = set() count = 0 - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - if not self.is_mine_id(user_id): - count += 1 - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue + if is_public: + for user_id in user_ids: + if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - for other_user_id in user_ids: - if user_id == other_user_id: + if self.store.get_if_app_services_interested_in_user(user_id): + count += 1 continue + to_insert.add(user_id) + if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: + yield self.store.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + + if to_insert: + yield self.store.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + else: + + for user_id in user_ids: if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - count += 1 - user_set = (user_id, other_user_id) - to_insert.add(user_set) + if not self.is_mine_id(user_id): + count += 1 + continue - if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert - ) - to_insert.clear() + if self.store.get_if_app_services_interested_in_user(user_id): + count += 1 + continue - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - to_insert.clear() + for other_user_id in user_ids: + if user_id == other_user_id: + continue + + if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) + count += 1 + + user_set = (user_id, other_user_id) + to_insert.add(user_set) + + if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: + yield self.store.add_users_who_share_private_room( + room_id, not is_public, to_insert + ) + to_insert.clear() + + if to_insert: + yield self.store.add_users_who_share_private_room(room_id, to_insert) + to_insert.clear() @defer.inlineCallbacks def _handle_deltas(self, deltas): @@ -451,34 +471,37 @@ class UserDirectoryHandler(object): # Now we update users who share rooms with users. users_with_profile = yield self.state.get_current_user_in_room(room_id) - to_insert = set() + if is_public: + yield self.store.add_users_in_public_rooms(room_id, (user_id,)) + else: + to_insert = set() - # First, if they're our user then we need to update for every user - if self.is_mine_id(user_id): + # First, if they're our user then we need to update for every user + if self.is_mine_id(user_id): - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + is_appservice = self.store.get_if_app_services_interested_in_user(user_id) - # We don't care about appservice users. - if not is_appservice: - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue + # We don't care about appservice users. + if not is_appservice: + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - to_insert.add((user_id, other_user_id)) + to_insert.add((user_id, other_user_id)) - # Next we need to update for every local user in the room - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue + # Next we need to update for every local user in the room + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - is_appservice = self.store.get_if_app_services_interested_in_user( - other_user_id - ) - if self.is_mine_id(other_user_id) and not is_appservice: - to_insert.add((other_user_id, user_id)) + is_appservice = self.store.get_if_app_services_interested_in_user( + other_user_id + ) + if self.is_mine_id(other_user_id) and not is_appservice: + to_insert.add((other_user_id, user_id)) - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) + if to_insert: + yield self.store.add_users_who_share_private_room(room_id, to_insert) @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): @@ -493,10 +516,10 @@ class UserDirectoryHandler(object): # Remove user from sharing tables yield self.store.remove_user_who_share_room(user_id, room_id) - # Are they still in a room with members? If not, remove them entirely. - users_in_room_with = yield self.store.get_users_who_share_room_from_dir(user_id) + # Are they still in any rooms? If not, remove them entirely. + rooms_user_is_in = yield self.store.get_rooms_user_is_in(user_id) - if len(users_in_room_with) == 0: + if len(rooms_user_is_in) == 0: yield self.store.remove_from_user_dir(user_id) @defer.inlineCallbacks diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql index bd57fd778b..40adc98387 100644 --- a/synapse/storage/schema/delta/53/users_in_public_rooms.sql +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -16,13 +16,20 @@ -- We don't need the old version of this table. DROP TABLE IF EXISTS users_in_public_rooms; +-- Old version of users_in_public_rooms +DROP TABLE IF EXISTS users_who_share_public_rooms; + -- Track what users are in public rooms. CREATE TABLE IF NOT EXISTS users_in_public_rooms ( - user_id TEXT NOT NULL + user_id TEXT NOT NULL, + room_id TEXT NOT NULL ); -CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id); +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); + +-- Track what users are publicly visible +CREATE TABLE IF NOT EXISTS publicly_visible_users ( + user_id TEXT NOT NULL +); --- Fill the table. -INSERT INTO background_updates (update_name, progress_json) VALUES - ('users_in_public_rooms_initial', '{}'); +CREATE UNIQUE INDEX publicly_visible_users_u_idx ON publicly_visible_users(user_id); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 4de552c1bb..af4260bc61 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -21,57 +21,15 @@ from six import iteritems from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules -from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(BackgroundUpdateStore): - def __init__(self, dbconn, hs): - super(UserDirectoryStore, self).__init__(dbconn, hs) - - self.register_background_update_handler( - "users_in_public_rooms_initial", self._populate_users_in_public_rooms - ) - - @defer.inlineCallbacks - def _populate_users_in_public_rooms(self, progress, batch_size): - """ - Populate the users_in_public_rooms table with the contents of the - users_who_share_public_rooms table. - """ - - def _fetch(txn): - sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" - txn.execute(sql) - return txn.fetchall() - - users = yield self.runInteraction( - "populate_users_in_public_rooms_fetch", _fetch - ) - - if users: - def _fill(txn): - self._simple_upsert_many_txn( - txn, - table="users_in_public_rooms", - key_names=["user_id"], - key_values=users, - value_names=(), - value_values=None, - ) - - users = yield self.runInteraction( - "populate_users_in_public_rooms_fill", _fill - ) - - yield self._end_background_update("users_in_public_rooms_initial") - defer.returnValue(1) - +class UserDirectoryStore(object): @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -282,17 +240,10 @@ class UserDirectoryStore(BackgroundUpdateStore): txn, table="user_directory_search", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + txn, table="publicly_visible_users", keyvalues={"user_id": user_id} ) self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"user_id": user_id}, - ) - self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"other_user_id": user_id}, + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) self._simple_delete_txn( txn, @@ -314,9 +265,9 @@ class UserDirectoryStore(BackgroundUpdateStore): in the given room_id """ user_ids_share_pub = yield self._simple_select_onecol( - table="users_who_share_public_rooms", + table="publicly_visible_users", keyvalues={"room_id": room_id}, - retcol="other_user_id", + retcol="user_id", desc="get_users_in_dir_due_to_room", ) @@ -354,26 +305,19 @@ class UserDirectoryStore(BackgroundUpdateStore): rows = yield self._execute("get_all_local_users", None, sql) defer.returnValue([name for name, in rows]) - def add_users_who_share_room(self, room_id, share_private, user_id_tuples): - """Insert entries into the users_who_share_*_rooms table. The first + def add_users_who_share_private_room(self, room_id, user_id_tuples): + """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. Args: room_id (str) - share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _add_users_who_share_room_txn(txn): - - if share_private: - tbl = "users_who_share_private_rooms" - else: - tbl = "users_who_share_public_rooms" - self._simple_upsert_many_txn( txn, - table=tbl, + table="users_who_share_private_rooms", key_names=["user_id", "other_user_id", "room_id"], key_values=[ (user_id, other_user_id, room_id) @@ -383,26 +327,44 @@ class UserDirectoryStore(BackgroundUpdateStore): value_values=None, ) - # If it's a public room, also update them in users_in_public_rooms. + return self.runInteraction( + "add_users_who_share_room", _add_users_who_share_room_txn + ) + + def add_users_in_public_rooms(self, room_id, user_ids): + """Insert entries into the users_who_share_private_rooms table. The first + user should be a local user. + + Args: + room_id (str) + user_ids (list[str]) + """ + + def _add_users_in_public_rooms_txn(txn): + + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id", "room_id"], + key_values=[(user_id, room_id) for user_id in user_ids], + value_names=(), + value_values=None, + ) + + # If it's a public room, also update them in publicly_visible_users. # We don't look before they're in the table before we do it, as it's # more efficient to simply have Postgres do that (one UPSERT vs one # SELECT and maybe one INSERT). - if not share_private: - for user_id in set([x[1] for x in user_id_tuples]): - self._simple_upsert_txn( - txn, - "users_in_public_rooms", - keyvalues={"user_id": user_id}, - values={}, - ) - - for user_id, other_user_id in user_id_tuples: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) + for user_id in user_ids: + self._simple_upsert_txn( + txn, + "publicly_visible_users", + keyvalues={"user_id": user_id}, + values={}, ) return self.runInteraction( - "add_users_who_share_room", _add_users_who_share_room_txn + "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) def remove_user_who_share_room(self, user_id, room_id): @@ -428,40 +390,32 @@ class UserDirectoryStore(BackgroundUpdateStore): ) self._simple_delete_txn( txn, - table="users_who_share_public_rooms", + table="users_in_public_rooms", keyvalues={"user_id": user_id, "room_id": room_id}, ) - self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"other_user_id": user_id, "room_id": room_id}, - ) # Are the users still in a public room after we deleted them from this one? still_in_public = self._simple_select_one_onecol_txn( txn, - "users_who_share_public_rooms", - keyvalues={"other_user_id": user_id}, - retcol="other_user_id", + "users_in_public_rooms", + keyvalues={"user_id": user_id}, + retcol="user_id", allow_none=True, ) if still_in_public is None: self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + txn, table="publicly_visible_users", keyvalues={"user_id": user_id} ) - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) - @cachedInlineCallbacks(max_entries=500000, iterable=True) - def get_users_who_share_room_from_dir(self, user_id): - """Returns the set of users who share a room with `user_id` + @defer.inlineCallbacks + def get_rooms_user_is_in(self, user_id): + """ + Returns the rooms that a user is in. Args: user_id(str): Must be a local user @@ -472,23 +426,19 @@ class UserDirectoryStore(BackgroundUpdateStore): rows = yield self._simple_select_onecol( table="users_who_share_private_rooms", keyvalues={"user_id": user_id}, - retcol="other_user_id", - desc="get_users_who_share_room_with_user", + retcol="room_id", + desc="get_rooms_user_is_in", ) pub_rows = yield self._simple_select_onecol( - table="users_who_share_public_rooms", + table="users_in_public_rooms", keyvalues={"user_id": user_id}, - retcol="other_user_id", - desc="get_users_who_share_room_with_user", + retcol="room_id", + desc="get_rooms_user_is_in", ) users = set(pub_rows) users.update(rows) - - # Remove the user themselves from this list. - users.discard(user_id) - defer.returnValue(list(users)) @defer.inlineCallbacks @@ -525,10 +475,9 @@ class UserDirectoryStore(BackgroundUpdateStore): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_public_rooms") + txn.execute("DELETE FROM publicly_visible_users") txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) - txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -641,7 +590,7 @@ class UserDirectoryStore(BackgroundUpdateStore): where_clause = "1=1" else: join_clause = """ - LEFT JOIN users_in_public_rooms AS p USING (user_id) + LEFT JOIN publicly_visible_users AS p USING (user_id) LEFT JOIN ( SELECT other_user_id AS user_id FROM users_who_share_private_rooms WHERE user_id = ? diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 7a78451a6d..d8248def3f 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -114,11 +114,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.helper.join(room, user=u2, tok=u2_token) # Check we have populated the database correctly. - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) @@ -140,11 +140,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.helper.leave(room, user=u2, tok=u2_token) # Check we have removed the values. - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual(self._compress_shared(shares_private), set()) self.assertEqual(public_users, []) @@ -165,20 +165,24 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): return r def get_users_in_public_rooms(self): - return self.get_success( - self.store._simple_select_onecol( + r = self.get_success( + self.store._simple_select_list( "users_in_public_rooms", None, - "user_id", + ("user_id", "room_id"), ) ) + retval = [] + for i in r: + retval.append((i["user_id"], i["room_id"])) + return retval - def get_users_who_share_public_rooms(self): + def get_publicly_visible_users(self): return self.get_success( - self.store._simple_select_list( - "users_who_share_public_rooms", + self.store._simple_select_onecol( + "publicly_visible_users", None, - ["user_id", "other_user_id", "room_id"], + "user_id", ) ) @@ -213,13 +217,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.get_success(self.store.update_user_directory_stream_pos(None)) self.get_success(self.store.delete_all_from_user_dir()) - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() # Nothing updated yet self.assertEqual(shares_private, []) - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual(public_users, []) # Reset the handled users caches @@ -235,13 +239,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.get_success(d) - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() # User 1 and User 2 share public rooms self.assertEqual( - self._compress_shared(shares_public), set([(u1, u2, room), (u2, u1, room)]) + set(public_users), set([(u1, room), (u2, room)]) ) # User 1 and User 3 share private rooms @@ -251,7 +255,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): ) # User 1 and 2 are in public rooms - self.assertEqual(set(public_users), set([u1, u2])) + self.assertEqual(set(visible_users), set([u1, u2])) def test_search_all_users(self): """ -- cgit 1.5.1 From 6f5890b2fae4fad92b9448dfaf3ca6c37afc5720 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 22:27:56 +1100 Subject: fixup --- synapse/storage/user_directory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index af4260bc61..b848d9db00 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -21,6 +21,7 @@ from six import iteritems from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules +from synapse.storage._base import SQLBaseStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id @@ -29,7 +30,7 @@ from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(object): +class UserDirectoryStore(SQLBaseStore): @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable -- cgit 1.5.1 From 81d9d1bee6bd67ebf7440d6b885210fe67dbe3d1 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 22:28:48 +1100 Subject: fixup --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index b848d9db00..0e6619222c 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -266,7 +266,7 @@ class UserDirectoryStore(SQLBaseStore): in the given room_id """ user_ids_share_pub = yield self._simple_select_onecol( - table="publicly_visible_users", + table="users_in_public_rooms", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_dir_due_to_room", -- cgit 1.5.1 From 797b6a63fc5f8cb70d15ca0b98e871a57e712f0c Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 13 Mar 2019 01:17:51 +1100 Subject: fixup --- synapse/server.py | 5 ++ .../schema/delta/53/users_in_public_rooms.sql | 7 --- synapse/storage/user_directory.py | 59 ++++------------------ tests/handlers/test_user_directory.py | 21 +------- 4 files changed, 17 insertions(+), 75 deletions(-) (limited to 'synapse') diff --git a/synapse/server.py b/synapse/server.py index c992bbaa5f..b9549dd042 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -226,6 +226,11 @@ class HomeServer(object): logger.info("Finished setting up.") def setup_master(self): + """ + Some handlers have side effects on instantiation (like registering + background updates). This function causes them to be fetched, and + therefore instantiated, to run those side effects. + """ for i in self.REQUIRED_ON_MASTER_STARTUP: getattr(self, "get_" + i)() diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql index 40adc98387..f7827ca6d2 100644 --- a/synapse/storage/schema/delta/53/users_in_public_rooms.sql +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -26,10 +26,3 @@ CREATE TABLE IF NOT EXISTS users_in_public_rooms ( ); CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); - --- Track what users are publicly visible -CREATE TABLE IF NOT EXISTS publicly_visible_users ( - user_id TEXT NOT NULL -); - -CREATE UNIQUE INDEX publicly_visible_users_u_idx ON publicly_visible_users(user_id); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 0e6619222c..8fd4fd50da 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -240,9 +240,6 @@ class UserDirectoryStore(SQLBaseStore): self._simple_delete_txn( txn, table="user_directory_search", keyvalues={"user_id": user_id} ) - self._simple_delete_txn( - txn, table="publicly_visible_users", keyvalues={"user_id": user_id} - ) self._simple_delete_txn( txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) @@ -352,18 +349,6 @@ class UserDirectoryStore(SQLBaseStore): value_values=None, ) - # If it's a public room, also update them in publicly_visible_users. - # We don't look before they're in the table before we do it, as it's - # more efficient to simply have Postgres do that (one UPSERT vs one - # SELECT and maybe one INSERT). - for user_id in user_ids: - self._simple_upsert_txn( - txn, - "publicly_visible_users", - keyvalues={"user_id": user_id}, - values={}, - ) - return self.runInteraction( "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) @@ -395,20 +380,6 @@ class UserDirectoryStore(SQLBaseStore): keyvalues={"user_id": user_id, "room_id": room_id}, ) - # Are the users still in a public room after we deleted them from this one? - still_in_public = self._simple_select_one_onecol_txn( - txn, - "users_in_public_rooms", - keyvalues={"user_id": user_id}, - retcol="user_id", - allow_none=True, - ) - - if still_in_public is None: - self._simple_delete_txn( - txn, table="publicly_visible_users", keyvalues={"user_id": user_id} - ) - return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) @@ -476,7 +447,6 @@ class UserDirectoryStore(SQLBaseStore): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM publicly_visible_users") txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) @@ -583,22 +553,19 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - # make s.user_id null to keep the ordering algorithm happy - join_clause = """ - CROSS JOIN (SELECT NULL as user_id) AS s - """ join_args = () where_clause = "1=1" else: - join_clause = """ - LEFT JOIN publicly_visible_users AS p USING (user_id) - LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_private_rooms - WHERE user_id = ? - ) AS s USING (user_id) - """ join_args = (user_id,) - where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" + where_clause = """ + ( + EXISTS (select 1 from users_in_public_rooms WHERE user_id = t.user_id) + OR EXISTS ( + SELECT 1 FROM users_who_share_private_rooms + WHERE user_id = ? AND other_user_id = t.user_id + ) + ) + """ if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -610,9 +577,8 @@ class UserDirectoryStore(SQLBaseStore): # search: (domain, _, display name, localpart) sql = """ SELECT d.user_id AS user_id, display_name, avatar_url - FROM user_directory_search + FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) - %s WHERE %s AND vector @@ to_tsquery('english', ?) @@ -639,7 +605,6 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % ( - join_clause, where_clause, ) args = join_args + (full_query, exact_query, prefix_query, limit + 1) @@ -648,9 +613,8 @@ class UserDirectoryStore(SQLBaseStore): sql = """ SELECT d.user_id AS user_id, display_name, avatar_url - FROM user_directory_search + FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) - %s WHERE %s AND value MATCH ? @@ -660,7 +624,6 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % ( - join_clause, where_clause, ) args = join_args + (search_query, limit + 1) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index d8248def3f..114807efc1 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -116,9 +116,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have populated the database correctly. shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() - visible_users = self.get_publicly_visible_users() - self.assertEqual(visible_users, []) self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) @@ -142,9 +140,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have removed the values. shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() - visible_users = self.get_publicly_visible_users() - self.assertEqual(visible_users, []) self.assertEqual(self._compress_shared(shares_private), set()) self.assertEqual(public_users, []) @@ -177,15 +173,6 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): retval.append((i["user_id"], i["room_id"])) return retval - def get_publicly_visible_users(self): - return self.get_success( - self.store._simple_select_onecol( - "publicly_visible_users", - None, - "user_id", - ) - ) - def get_users_who_share_private_rooms(self): return self.get_success( self.store._simple_select_list( @@ -219,11 +206,9 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() - visible_users = self.get_publicly_visible_users() # Nothing updated yet self.assertEqual(shares_private, []) - self.assertEqual(visible_users, []) self.assertEqual(public_users, []) # Reset the handled users caches @@ -241,9 +226,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() - visible_users = self.get_publicly_visible_users() - # User 1 and User 2 share public rooms + # User 1 and User 2 are in the same public room self.assertEqual( set(public_users), set([(u1, room), (u2, room)]) ) @@ -254,9 +238,6 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): set([(u1, u3, private_room), (u3, u1, private_room)]), ) - # User 1 and 2 are in public rooms - self.assertEqual(set(visible_users), set([u1, u2])) - def test_search_all_users(self): """ Search all users = True means that a user does not have to share a -- cgit 1.5.1 From c0332d095f6116c1e8af2738bcc8f1fbe5b4432c Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 13 Mar 2019 01:30:54 +1100 Subject: fixup --- synapse/handlers/user_directory.py | 2 +- synapse/storage/user_directory.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index f9f7b8abd0..d92f8c529c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -517,7 +517,7 @@ class UserDirectoryHandler(object): yield self.store.remove_user_who_share_room(user_id, room_id) # Are they still in any rooms? If not, remove them entirely. - rooms_user_is_in = yield self.store.get_rooms_user_is_in(user_id) + rooms_user_is_in = yield self.store.get_user_dir_rooms_user_is_in(user_id) if len(rooms_user_is_in) == 0: yield self.store.remove_from_user_dir(user_id) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 8fd4fd50da..1c00b956e5 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -385,7 +385,7 @@ class UserDirectoryStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_rooms_user_is_in(self, user_id): + def get_user_dir_rooms_user_is_in(self, user_id): """ Returns the rooms that a user is in. -- cgit 1.5.1 From d42c81d724477803e6b0db6017281a3394a9cee5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 12 Mar 2019 14:42:53 +0000 Subject: Transfer local user's push rules on room upgrade (#4838) Transfer push rules (notifications) on room upgrade --- changelog.d/4838.bugfix | 1 + synapse/handlers/room_member.py | 4 +++ synapse/storage/push_rule.py | 57 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 changelog.d/4838.bugfix (limited to 'synapse') diff --git a/changelog.d/4838.bugfix b/changelog.d/4838.bugfix new file mode 100644 index 0000000000..7f4fceabff --- /dev/null +++ b/changelog.d/4838.bugfix @@ -0,0 +1 @@ +Transfer a user's notification settings (push rules) on room upgrade. \ No newline at end of file diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 190ea2c7b1..aead9e4608 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -232,6 +232,10 @@ class RoomMemberHandler(object): self.copy_room_tags_and_direct_to_room( predecessor["room_id"], room_id, user_id, ) + # Move over old push rules + self.store.move_push_rules_from_room_to_room_for_user( + predecessor["room_id"], room_id, user_id, + ) elif event.membership == Membership.LEAVE: if prev_member_event_id: prev_member_event = yield self.store.get_event(prev_member_event_id) diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py index 6a5028961d..4b8438c3e9 100644 --- a/synapse/storage/push_rule.py +++ b/synapse/storage/push_rule.py @@ -185,6 +185,63 @@ class PushRulesWorkerStore(ApplicationServiceWorkerStore, defer.returnValue(results) + @defer.inlineCallbacks + def move_push_rule_from_room_to_room( + self, new_room_id, user_id, rule, + ): + """Move a single push rule from one room to another for a specific user. + + Args: + new_room_id (str): ID of the new room. + user_id (str): ID of user the push rule belongs to. + rule (Dict): A push rule. + """ + # Create new rule id + rule_id_scope = '/'.join(rule["rule_id"].split('/')[:-1]) + new_rule_id = rule_id_scope + "/" + new_room_id + + # Change room id in each condition + for condition in rule.get("conditions", []): + if condition.get("key") == "room_id": + condition["pattern"] = new_room_id + + # Add the rule for the new room + yield self.add_push_rule( + user_id=user_id, + rule_id=new_rule_id, + priority_class=rule["priority_class"], + conditions=rule["conditions"], + actions=rule["actions"], + ) + + # Delete push rule for the old room + yield self.delete_push_rule(user_id, rule["rule_id"]) + + @defer.inlineCallbacks + def move_push_rules_from_room_to_room_for_user( + self, old_room_id, new_room_id, user_id, + ): + """Move all of the push rules from one room to another for a specific + user. + + Args: + old_room_id (str): ID of the old room. + new_room_id (str): ID of the new room. + user_id (str): ID of user to copy push rules for. + """ + # Retrieve push rules for this user + user_push_rules = yield self.get_push_rules_for_user(user_id) + + # Get rules relating to the old room, move them to the new room, then + # delete them from the old room + for rule in user_push_rules: + conditions = rule.get("conditions", []) + if any((c.get("key") == "room_id" and + c.get("pattern") == old_room_id) for c in conditions): + self.move_push_rule_from_room_to_room( + new_room_id, user_id, rule, + ) + @defer.inlineCallbacks def bulk_get_push_rules_for_room(self, event, context): state_group = context.state_group -- cgit 1.5.1 From 7998ca3a6616653fe8b76da3d7fd3c91d0d9597e Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 13 Mar 2019 15:26:29 +0000 Subject: Document using a certificate with a full chain (#4849) --- INSTALL.md | 8 ++++++-- changelog.d/4849.misc | 1 + docs/sample_config.yaml | 5 +++++ synapse/config/tls.py | 5 +++++ 4 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 changelog.d/4849.misc (limited to 'synapse') diff --git a/INSTALL.md b/INSTALL.md index 76833e0f8c..de6893530d 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -375,9 +375,13 @@ To configure Synapse to expose an HTTPS port, you will need to edit * You will also need to uncomment the `tls_certificate_path` and `tls_private_key_path` lines under the `TLS` section. You can either point these settings at an existing certificate and key, or you can - enable Synapse's built-in ACME (Let's Encrypt) support. Instructions + enable Synapse's built-in ACME (Let's Encrypt) support. Instructions for having Synapse automatically provision and renew federation - certificates through ACME can be found at [ACME.md](docs/ACME.md). + certificates through ACME can be found at [ACME.md](docs/ACME.md). If you + are using your own certificate, be sure to use a `.pem` file that includes + the full certificate chain including any intermediate certificates (for + instance, if using certbot, use `fullchain.pem` as your certificate, not + `cert.pem`). For those of you upgrading your TLS certificate in readiness for Synapse 1.0, please take a look at `our guide `_. diff --git a/changelog.d/4849.misc b/changelog.d/4849.misc new file mode 100644 index 0000000000..f2cab20b44 --- /dev/null +++ b/changelog.d/4849.misc @@ -0,0 +1 @@ +Update install docs to explicitly state a full-chain (not just the top-level) TLS certificate must be provided to Synapse. This caused some people's Synapse ports to appear correct in a browser but still (rightfully so) upset the federation tester. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 22d5e6b1d7..5f2534e465 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -246,6 +246,11 @@ listeners: # See 'ACME support' below to enable auto-provisioning this certificate via # Let's Encrypt. # +# If supplying your own, be sure to use a `.pem` file that includes the +# full certificate chain including any intermediate certificates (for +# instance, if using certbot, use `fullchain.pem` as your certificate, +# not `cert.pem`). +# #tls_certificate_path: "CONFDIR/SERVERNAME.tls.crt" # PEM-encoded private key for TLS diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 40045de7ac..f0014902da 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -181,6 +181,11 @@ class TlsConfig(Config): # See 'ACME support' below to enable auto-provisioning this certificate via # Let's Encrypt. # + # If supplying your own, be sure to use a `.pem` file that includes the + # full certificate chain including any intermediate certificates (for + # instance, if using certbot, use `fullchain.pem` as your certificate, + # not `cert.pem`). + # #tls_certificate_path: "%(tls_certificate_path)s" # PEM-encoded private key for TLS -- cgit 1.5.1 From eed7271b3b1e754e1b164da61d225ca4326cfa0a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 12 Mar 2019 16:50:58 +0000 Subject: declare a ReadReceipt class I'm going to use this in queues and things, so it'll be useful to give it more of a structure. --- synapse/handlers/receipts.py | 46 +++++++++++++++++++++----------------------- synapse/types.py | 12 ++++++++++++ 2 files changed, 34 insertions(+), 24 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 1728089667..733e7c3752 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,7 +16,7 @@ import logging from twisted.internet import defer -from synapse.types import get_domain_from_id +from synapse.types import ReadReceipt, get_domain_from_id from ._base import BaseHandler @@ -42,13 +42,13 @@ class ReceiptsHandler(BaseHandler): """Called when we receive an EDU of type m.receipt from a remote HS. """ receipts = [ - { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": user_values["event_ids"], - "data": user_values.get("data", {}), - } + ReadReceipt( + room_id=room_id, + receipt_type=receipt_type, + user_id=user_id, + event_ids=user_values["event_ids"], + data=user_values.get("data", {}), + ) for room_id, room_values in content.items() for receipt_type, users in room_values.items() for user_id, user_values in users.items() @@ -64,14 +64,12 @@ class ReceiptsHandler(BaseHandler): max_batch_id = None for receipt in receipts: - room_id = receipt["room_id"] - receipt_type = receipt["receipt_type"] - user_id = receipt["user_id"] - event_ids = receipt["event_ids"] - data = receipt["data"] - res = yield self.store.insert_receipt( - room_id, receipt_type, user_id, event_ids, data + receipt.room_id, + receipt.receipt_type, + receipt.user_id, + receipt.event_ids, + receipt.data, ) if not res: @@ -107,15 +105,15 @@ class ReceiptsHandler(BaseHandler): """Called when a client tells us a local user has read up to the given event_id in the room. """ - receipt = { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - "event_ids": [event_id], - "data": { + receipt = ReadReceipt( + room_id=room_id, + receipt_type=receipt_type, + user_id=user_id, + event_ids=[event_id], + data={ "ts": int(self.clock.time_msec()), - } - } + }, + ) is_new = yield self._handle_new_receipts([receipt]) if not is_new: @@ -124,7 +122,7 @@ class ReceiptsHandler(BaseHandler): # Work out which remote servers should be poked and poke them. # TODO: optimise this to move some of the work to the workers. - data = receipt["data"] + data = receipt.data # XXX why does this not use state.get_current_hosts_in_room() ? users = yield self.state.get_current_user_in_room(room_id) diff --git a/synapse/types.py b/synapse/types.py index d8cb64addb..3de94b6335 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -16,6 +16,8 @@ import re import string from collections import namedtuple +import attr + from synapse.api.errors import SynapseError @@ -455,3 +457,13 @@ class ThirdPartyInstanceID( @classmethod def create(cls, appservice_id, network_id,): return cls(appservice_id=appservice_id, network_id=network_id) + + +@attr.s(slots=True) +class ReadReceipt(object): + """Information about a read-receipt""" + room_id = attr.ib() + receipt_type = attr.ib() + user_id = attr.ib() + event_ids = attr.ib() + data = attr.ib() -- cgit 1.5.1 From fdcad8eabdf20718d053255555fb27ac190613c4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 13 Mar 2019 15:55:37 +0000 Subject: Move client receipt processing to federation sender worker. This is mostly a prerequisite for #4730, but also fits with the general theme of "move everything off the master that we possibly can". --- synapse/app/federation_sender.py | 30 ++++++++++++++++++++++++++ synapse/federation/send_queue.py | 9 ++++++++ synapse/federation/transaction_queue.py | 35 +++++++++++++++++++++++++++++++ synapse/handlers/receipts.py | 37 ++++----------------------------- 4 files changed, 78 insertions(+), 33 deletions(-) (limited to 'synapse') diff --git a/synapse/app/federation_sender.py b/synapse/app/federation_sender.py index a461442fdc..9711a7147c 100644 --- a/synapse/app/federation_sender.py +++ b/synapse/app/federation_sender.py @@ -28,6 +28,7 @@ from synapse.config.logger import setup_logging from synapse.federation import send_queue from synapse.http.site import SynapseSite from synapse.metrics import RegistryProxy +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.metrics.resource import METRICS_PREFIX, MetricsResource from synapse.replication.slave.storage.deviceinbox import SlavedDeviceInboxStore from synapse.replication.slave.storage.devices import SlavedDeviceStore @@ -37,8 +38,10 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.replication.slave.storage.transactions import SlavedTransactionStore from synapse.replication.tcp.client import ReplicationClientHandler +from synapse.replication.tcp.streams import ReceiptsStream from synapse.server import HomeServer from synapse.storage.engines import create_engine +from synapse.types import ReadReceipt from synapse.util.async_helpers import Linearizer from synapse.util.httpresourcetree import create_resource_tree from synapse.util.logcontext import LoggingContext, run_in_background @@ -202,6 +205,7 @@ class FederationSenderHandler(object): """ def __init__(self, hs, replication_client): self.store = hs.get_datastore() + self._is_mine_id = hs.is_mine_id self.federation_sender = hs.get_federation_sender() self.replication_client = replication_client @@ -234,6 +238,32 @@ class FederationSenderHandler(object): elif stream_name == "events": self.federation_sender.notify_new_events(token) + # ... and when new receipts happen + elif stream_name == ReceiptsStream.NAME: + run_as_background_process( + "process_receipts_for_federation", self._on_new_receipts, rows, + ) + + @defer.inlineCallbacks + def _on_new_receipts(self, rows): + """ + Args: + rows (iterable[synapse.replication.tcp.streams.ReceiptsStreamRow]): + new receipts to be processed + """ + for receipt in rows: + # we only want to send on receipts for our own users + if not self._is_mine_id(receipt.user_id): + continue + receipt_info = ReadReceipt( + receipt.room_id, + receipt.receipt_type, + receipt.user_id, + [receipt.event_id], + receipt.data, + ) + yield self.federation_sender.send_read_receipt(receipt_info) + @defer.inlineCallbacks def update_token(self, token): try: diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index b7d0b25781..bcb41da338 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -183,6 +183,15 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() + def send_read_receipt(self, receipt): + """As per TransactionQueue + + Args: + receipt (synapse.types.ReadReceipt): + """ + # nothing to do here: the replication listener will handle it. + pass + def send_presence(self, states): """As per TransactionQueue diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index e5e42c647d..288cb5045c 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -290,6 +290,41 @@ class TransactionQueue(object): self._attempt_new_transaction(destination) + @defer.inlineCallbacks + def send_read_receipt(self, receipt): + """Send a RR to any other servers in the room + + Args: + receipt (synapse.types.ReadReceipt): receipt to be sent + """ + # Work out which remote servers should be poked and poke them. + domains = yield self.state.get_current_hosts_in_room(receipt.room_id) + domains = [d for d in domains if d != self.server_name] + if not domains: + return + + logger.debug("Sending receipt to: %r", domains) + + content = { + receipt.room_id: { + receipt.receipt_type: { + receipt.user_id: { + "event_ids": receipt.event_ids, + "data": receipt.data, + }, + }, + }, + } + key = (receipt.room_id, receipt.receipt_type, receipt.user_id) + + for domain in domains: + self.build_and_send_edu( + destination=domain, + edu_type="m.receipt", + content=content, + key=key, + ) + @logcontext.preserve_fn # the caller should not yield on this @defer.inlineCallbacks def send_presence(self, states): diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 733e7c3752..dd783ae134 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -16,9 +16,8 @@ import logging from twisted.internet import defer -from synapse.types import ReadReceipt, get_domain_from_id - -from ._base import BaseHandler +from synapse.handlers._base import BaseHandler +from synapse.types import ReadReceipt logger = logging.getLogger(__name__) @@ -87,7 +86,7 @@ class ReceiptsHandler(BaseHandler): # no new receipts defer.returnValue(False) - affected_room_ids = list(set([r["room_id"] for r in receipts])) + affected_room_ids = list(set([r.room_id for r in receipts])) self.notifier.on_new_event( "receipt_key", max_batch_id, rooms=affected_room_ids @@ -119,35 +118,7 @@ class ReceiptsHandler(BaseHandler): if not is_new: return - # Work out which remote servers should be poked and poke them. - - # TODO: optimise this to move some of the work to the workers. - data = receipt.data - - # XXX why does this not use state.get_current_hosts_in_room() ? - users = yield self.state.get_current_user_in_room(room_id) - remotedomains = set(get_domain_from_id(u) for u in users) - remotedomains = remotedomains.copy() - remotedomains.discard(self.server_name) - - logger.debug("Sending receipt to: %r", remotedomains) - - for domain in remotedomains: - self.federation.build_and_send_edu( - destination=domain, - edu_type="m.receipt", - content={ - room_id: { - receipt_type: { - user_id: { - "event_ids": [event_id], - "data": data, - } - } - }, - }, - key=(room_id, receipt_type, user_id), - ) + self.federation.send_read_receipt(receipt) @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): -- cgit 1.5.1 From 72bfaf746d17505df01dfa68b23ee43eb9f54144 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 13 Mar 2019 17:33:54 +0000 Subject: Allow passing --daemonize to workers --- synapse/config/_base.py | 8 +++++++- synapse/config/workers.py | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/config/_base.py b/synapse/config/_base.py index c4d3087fa4..5613f38e4d 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -214,14 +214,20 @@ class Config(object): " Defaults to the directory containing the last config file", ) + obj = cls() + + obj.invoke_all("add_arguments", config_parser) + config_args = config_parser.parse_args(argv) config_files = find_config_files(search_paths=config_args.config_path) - obj = cls() obj.read_config_files( config_files, keys_directory=config_args.keys_directory, generate_keys=False ) + + obj.invoke_all("read_arguments", config_args) + return obj @classmethod diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 80baf0ce0e..8dc013d0e8 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -57,3 +57,7 @@ class WorkerConfig(Config): bind_addresses.append(bind_address) elif not bind_addresses: bind_addresses.append('') + + def read_arguments(self, args): + if args.daemonize is not None: + self.worker_daemonize = args.daemonize -- cgit 1.5.1 From 5d89a526f1bd217148b3f82efd6ec156a78af894 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 12 Mar 2019 21:57:47 +0000 Subject: Factor per-destination stuff out of TransactionQueue This is easier than having to have a million fields keyed on destination. --- synapse/federation/transaction_queue.py | 314 ++++++++++++++++++-------------- 1 file changed, 182 insertions(+), 132 deletions(-) (limited to 'synapse') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 288cb5045c..c1f6985ae4 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -27,6 +27,7 @@ from synapse.api.errors import ( HttpResponseException, RequestSendFailed, ) +from synapse.events import EventBase from synapse.handlers.presence import format_user_presence_state, get_interested_remotes from synapse.metrics import ( LaterGauge, @@ -36,6 +37,7 @@ from synapse.metrics import ( sent_transactions_counter, ) from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage import UserPresenceState from synapse.util import logcontext from synapse.util.metrics import measure_func from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter @@ -80,73 +82,47 @@ class TransactionQueue(object): self.store = hs.get_datastore() self.state = hs.get_state_handler() - self.transaction_actions = TransactionActions(self.store) - - self.transport_layer = hs.get_federation_transport_client() self.clock = hs.get_clock() self.is_mine_id = hs.is_mine_id - # Is a mapping from destinations -> deferreds. Used to keep track - # of which destinations have transactions in flight and when they are - # done - self.pending_transactions = {} + self._transaction_sender = TransactionSender(hs) + + # map from destination to PerDestinationQueue + self._per_destination_queues = {} # type: dict[str, PerDestinationQueue] LaterGauge( "synapse_federation_transaction_queue_pending_destinations", "", [], - lambda: len(self.pending_transactions), + lambda: sum( + 1 for d in self._per_destination_queues.values() + if d.transmission_loop_running + ), ) - # Is a mapping from destination -> list of - # tuple(pending pdus, deferred, order) - self.pending_pdus_by_dest = pdus = {} - # destination -> list of tuple(edu, deferred) - self.pending_edus_by_dest = edus = {} - # Map of user_id -> UserPresenceState for all the pending presence # to be sent out by user_id. Entries here get processed and put in # pending_presence_by_dest self.pending_presence = {} - # Map of destination -> user_id -> UserPresenceState of pending presence - # to be sent to each destinations - self.pending_presence_by_dest = presence = {} - - # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered - # based on their key (e.g. typing events by room_id) - # Map of destination -> (edu_type, key) -> Edu - self.pending_edus_keyed_by_dest = edus_keyed = {} - LaterGauge( "synapse_federation_transaction_queue_pending_pdus", "", [], - lambda: sum(map(len, pdus.values())), + lambda: sum( + d.pending_pdu_count() for d in self._per_destination_queues.values() + ), ) LaterGauge( "synapse_federation_transaction_queue_pending_edus", "", [], - lambda: ( - sum(map(len, edus.values())) - + sum(map(len, presence.values())) - + sum(map(len, edus_keyed.values())) + lambda: sum( + d.pending_edu_count() for d in self._per_destination_queues.values() ), ) - # destination -> stream_id of last successfully sent to-device message. - # NB: may be a long or an int. - self.last_device_stream_id_by_dest = {} - - # destination -> stream_id of last successfully sent device list - # update. - self.last_device_list_stream_id_by_dest = {} - - # HACK to get unique tx id - self._next_txn_id = int(self.clock.time_msec()) - self._order = 1 self._is_processing = False @@ -154,6 +130,13 @@ class TransactionQueue(object): self._processing_pending_presence = False + def _get_per_destination_queue(self, destination): + queue = self._per_destination_queues.get(destination) + if not queue: + queue = PerDestinationQueue(self.hs, self._transaction_sender, destination) + self._per_destination_queues[destination] = queue + return queue + def notify_new_events(self, current_id): """This gets called when we have some new events we might want to send out to other servers. @@ -284,11 +267,7 @@ class TransactionQueue(object): sent_pdus_destination_dist_count.inc() for destination in destinations: - self.pending_pdus_by_dest.setdefault(destination, []).append( - (pdu, order) - ) - - self._attempt_new_transaction(destination) + self._get_per_destination_queue(destination).send_pdu(pdu, order) @defer.inlineCallbacks def send_read_receipt(self, receipt): @@ -387,14 +366,7 @@ class TransactionQueue(object): for destination in destinations: if destination == self.server_name: continue - - self.pending_presence_by_dest.setdefault( - destination, {} - ).update({ - state.user_id: state for state in states - }) - - self._attempt_new_transaction(destination) + self._get_per_destination_queue(destination).send_presence(states) def build_and_send_edu(self, destination, edu_type, content, key=None): """Construct an Edu object, and queue it for sending @@ -425,73 +397,136 @@ class TransactionQueue(object): edu (Edu): edu to send key (Any|None): clobbering key for this edu """ + queue = self._get_per_destination_queue(edu.destination) if key: - self.pending_edus_keyed_by_dest.setdefault( - edu.destination, {} - )[(edu.edu_type, key)] = edu + queue.send_keyed_edu(edu, key) else: - self.pending_edus_by_dest.setdefault(edu.destination, []).append(edu) - - self._attempt_new_transaction(edu.destination) + queue.send_edu(edu) def send_device_messages(self, destination): if destination == self.server_name: logger.info("Not sending device update to ourselves") return - self._attempt_new_transaction(destination) + self._get_per_destination_queue(destination).attempt_new_transaction() def get_current_token(self): return 0 - def _attempt_new_transaction(self, destination): + +class PerDestinationQueue(object): + """ + Manages the per-destination transmission queues. + """ + def __init__(self, hs, transaction_sender, destination): + self._server_name = hs.hostname + self._clock = hs.get_clock() + self._store = hs.get_datastore() + self._transaction_sender = transaction_sender + + self._destination = destination + self.transmission_loop_running = False + + # a list of tuples of (pending pdu, order) + self._pending_pdus = [] # type: list[tuple[EventBase, int]] + self._pending_edus = [] # type: list[Edu] + + # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered + # based on their key (e.g. typing events by room_id) + # Map of (edu_type, key) -> Edu + self._pending_edus_keyed = {} # type: dict[tuple[str, str], Edu] + + # Map of user_id -> UserPresenceState of pending presence to be sent to this + # destination + self._pending_presence = {} # type: dict[str, UserPresenceState] + + # stream_id of last successfully sent to-device message. + # NB: may be a long or an int. + self._last_device_stream_id = 0 + + # stream_id of last successfully sent device list update. + self._last_device_list_stream_id = 0 + + def pending_pdu_count(self): + return len(self._pending_pdus) + + def pending_edu_count(self): + return ( + len(self._pending_edus) + + len(self._pending_presence) + + len(self._pending_edus_keyed) + ) + + def send_pdu(self, pdu, order): + """Add a PDU to the queue, and start the transmission loop if neccessary + + Args: + pdu (EventBase): pdu to send + order (int): + """ + self._pending_pdus.append((pdu, order)) + self.attempt_new_transaction() + + def send_presence(self, states): + """Add presence updates to the queue. Start the transmission loop if neccessary. + + Args: + states (iterable[UserPresenceState]): presence to send + """ + self._pending_presence.update({ + state.user_id: state for state in states + }) + self.attempt_new_transaction() + + def send_keyed_edu(self, edu, key): + self._pending_edus_keyed[(edu.edu_type, key)] = edu + self.attempt_new_transaction() + + def send_edu(self, edu): + self._pending_edus.append(edu) + self.attempt_new_transaction() + + def attempt_new_transaction(self): """Try to start a new transaction to this destination If there is already a transaction in progress to this destination, returns immediately. Otherwise kicks off the process of sending a transaction in the background. - - Args: - destination (str): - - Returns: - None """ # list of (pending_pdu, deferred, order) - if destination in self.pending_transactions: - # XXX: pending_transactions can get stuck on by a never-ending - # request at which point pending_pdus_by_dest just keeps growing. + if self.transmission_loop_running: + # XXX: this can get stuck on by a never-ending + # request at which point pending_pdus just keeps growing. # we need application-layer timeouts of some flavour of these # requests logger.debug( "TX [%s] Transaction already in progress", - destination + self._destination ) return - logger.debug("TX [%s] Starting transaction loop", destination) + logger.debug("TX [%s] Starting transaction loop", self._destination) run_as_background_process( "federation_transaction_transmission_loop", self._transaction_transmission_loop, - destination, ) @defer.inlineCallbacks - def _transaction_transmission_loop(self, destination): + def _transaction_transmission_loop(self): pending_pdus = [] try: - self.pending_transactions[destination] = 1 + self.transmission_loop_running = True # This will throw if we wouldn't retry. We do this here so we fail # quickly, but we will later check this again in the http client, # hence why we throw the result away. - yield get_retry_limiter(destination, self.clock, self.store) + yield get_retry_limiter(self._destination, self._clock, self._store) pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( - yield self._get_new_device_messages(destination) + yield self._get_new_device_messages() ) # BEGIN CRITICAL SECTION @@ -501,39 +536,38 @@ class TransactionQueue(object): # where we decide if we actually have any pending messages) is # atomic - otherwise new PDUs or EDUs might arrive in the # meantime, but not get sent because we hold the - # pending_transactions flag. + # transmission_loop_running flag. - pending_pdus = self.pending_pdus_by_dest.pop(destination, []) + pending_pdus = self._pending_pdus # We can only include at most 50 PDUs per transactions - pending_pdus, leftover_pdus = pending_pdus[:50], pending_pdus[50:] - if leftover_pdus: - self.pending_pdus_by_dest[destination] = leftover_pdus + pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] - pending_edus = self.pending_edus_by_dest.pop(destination, []) + pending_edus = self._pending_edus # We can only include at most 100 EDUs per transactions - pending_edus, leftover_edus = pending_edus[:100], pending_edus[100:] - if leftover_edus: - self.pending_edus_by_dest[destination] = leftover_edus - - pending_presence = self.pending_presence_by_dest.pop(destination, {}) + pending_edus, self._pending_edus = pending_edus[:100], pending_edus[100:] pending_edus.extend( - self.pending_edus_keyed_by_dest.pop(destination, {}).values() + self._pending_edus_keyed.values() ) + self._pending_edus_keyed = {} + pending_edus.extend(device_message_edus) + + pending_presence = self._pending_presence + self._pending_presence = {} if pending_presence: pending_edus.append( Edu( - origin=self.server_name, - destination=destination, + origin=self._server_name, + destination=self._destination, edu_type="m.presence", content={ "push": [ format_user_presence_state( - presence, self.clock.time_msec() + presence, self._clock.time_msec() ) for presence in pending_presence.values() ] @@ -543,19 +577,17 @@ class TransactionQueue(object): if pending_pdus: logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", - destination, len(pending_pdus)) + self._destination, len(pending_pdus)) if not pending_pdus and not pending_edus: - logger.debug("TX [%s] Nothing to send", destination) - self.last_device_stream_id_by_dest[destination] = ( - device_stream_id - ) + logger.debug("TX [%s] Nothing to send", self._destination) + self._last_device_stream_id = device_stream_id return # END CRITICAL SECTION - success = yield self._send_new_transaction( - destination, pending_pdus, pending_edus, + success = yield self._transaction_sender.send_new_transaction( + self._destination, pending_pdus, pending_edus ) if success: sent_transactions_counter.inc() @@ -565,23 +597,25 @@ class TransactionQueue(object): # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages if device_message_edus: - yield self.store.delete_device_msgs_for_remote( - destination, device_stream_id + yield self._store.delete_device_msgs_for_remote( + self._destination, device_stream_id + ) + logger.info( + "Marking as sent %r %r", self._destination, dev_list_id ) - logger.info("Marking as sent %r %r", destination, dev_list_id) - yield self.store.mark_as_sent_devices_by_remote( - destination, dev_list_id + yield self._store.mark_as_sent_devices_by_remote( + self._destination, dev_list_id ) - self.last_device_stream_id_by_dest[destination] = device_stream_id - self.last_device_list_stream_id_by_dest[destination] = dev_list_id + self._last_device_stream_id = device_stream_id + self._last_device_list_stream_id = dev_list_id else: break except NotRetryingDestination as e: logger.debug( "TX [%s] not ready for retry yet (next retry at %s) - " "dropping transaction for now", - destination, + self._destination, datetime.datetime.fromtimestamp( (e.retry_last_ts + e.retry_interval) / 1000.0 ), @@ -591,51 +625,51 @@ class TransactionQueue(object): except HttpResponseException as e: logger.warning( "TX [%s] Received %d response to transaction: %s", - destination, e.code, e, + self._destination, e.code, e, ) except RequestSendFailed as e: - logger.warning("TX [%s] Failed to send transaction: %s", destination, e) + logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, - destination) + self._destination) except Exception: logger.exception( "TX [%s] Failed to send transaction", - destination, + self._destination, ) for p, _ in pending_pdus: logger.info("Failed to send event %s to %s", p.event_id, - destination) + self._destination) finally: - # We want to be *very* sure we delete this after we stop processing - self.pending_transactions.pop(destination, None) + # We want to be *very* sure we clear this after we stop processing + self.transmission_loop_running = False @defer.inlineCallbacks - def _get_new_device_messages(self, destination): - last_device_stream_id = self.last_device_stream_id_by_dest.get(destination, 0) - to_device_stream_id = self.store.get_to_device_stream_token() - contents, stream_id = yield self.store.get_new_device_msgs_for_remote( - destination, last_device_stream_id, to_device_stream_id + def _get_new_device_messages(self): + last_device_stream_id = self._last_device_stream_id + to_device_stream_id = self._store.get_to_device_stream_token() + contents, stream_id = yield self._store.get_new_device_msgs_for_remote( + self._destination, last_device_stream_id, to_device_stream_id ) edus = [ Edu( - origin=self.server_name, - destination=destination, + origin=self._server_name, + destination=self._destination, edu_type="m.direct_to_device", content=content, ) for content in contents ] - last_device_list = self.last_device_list_stream_id_by_dest.get(destination, 0) - now_stream_id, results = yield self.store.get_devices_by_remote( - destination, last_device_list + last_device_list = self._last_device_list_stream_id + now_stream_id, results = yield self._store.get_devices_by_remote( + self._destination, last_device_list ) edus.extend( Edu( - origin=self.server_name, - destination=destination, + origin=self._server_name, + destination=self._destination, edu_type="m.device_list_update", content=content, ) @@ -643,9 +677,25 @@ class TransactionQueue(object): ) defer.returnValue((edus, stream_id, now_stream_id)) + +class TransactionSender(object): + """Helper class which handles building and sending transactions + + shared between PerDestinationQueue objects + """ + def __init__(self, hs): + self._server_name = hs.hostname + self._clock = hs.get_clock() + self._store = hs.get_datastore() + self._transaction_actions = TransactionActions(self._store) + self._transport_layer = hs.get_federation_transport_client() + + # HACK to get unique tx id + self._next_txn_id = int(self._clock.time_msec()) + @measure_func("_send_new_transaction") @defer.inlineCallbacks - def _send_new_transaction(self, destination, pending_pdus, pending_edus): + def send_new_transaction(self, destination, pending_pdus, pending_edus): # Sort based on the order field pending_pdus.sort(key=lambda t: t[1]) @@ -669,9 +719,9 @@ class TransactionQueue(object): logger.debug("TX [%s] Persisting transaction...", destination) transaction = Transaction.create_new( - origin_server_ts=int(self.clock.time_msec()), + origin_server_ts=int(self._clock.time_msec()), transaction_id=txn_id, - origin=self.server_name, + origin=self._server_name, destination=destination, pdus=pdus, edus=edus, @@ -679,7 +729,7 @@ class TransactionQueue(object): self._next_txn_id += 1 - yield self.transaction_actions.prepare_to_send(transaction) + yield self._transaction_actions.prepare_to_send(transaction) logger.debug("TX [%s] Persisted transaction", destination) logger.info( @@ -697,7 +747,7 @@ class TransactionQueue(object): # keys work def json_data_cb(): data = transaction.get_dict() - now = int(self.clock.time_msec()) + now = int(self._clock.time_msec()) if "pdus" in data: for p in data["pdus"]: if "age_ts" in p: @@ -707,7 +757,7 @@ class TransactionQueue(object): return data try: - response = yield self.transport_layer.send_transaction( + response = yield self._transport_layer.send_transaction( transaction, json_data_cb ) code = 200 @@ -727,7 +777,7 @@ class TransactionQueue(object): destination, txn_id, code ) - yield self.transaction_actions.delivered( + yield self._transaction_actions.delivered( transaction, code, response ) -- cgit 1.5.1 From 02e23b36bc7cb892734699aed28c71616cf7fe79 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 13 Mar 2019 20:02:56 +0000 Subject: Rename and move the classes --- synapse/federation/send_queue.py | 14 +- synapse/federation/sender/__init__.py | 388 ++++++++++ synapse/federation/sender/per_destination_queue.py | 318 ++++++++ synapse/federation/sender/transaction_manager.py | 147 ++++ synapse/federation/transaction_queue.py | 801 --------------------- synapse/server.py | 4 +- synapse/server.pyi | 3 +- 7 files changed, 864 insertions(+), 811 deletions(-) create mode 100644 synapse/federation/sender/__init__.py create mode 100644 synapse/federation/sender/per_destination_queue.py create mode 100644 synapse/federation/sender/transaction_manager.py delete mode 100644 synapse/federation/transaction_queue.py (limited to 'synapse') diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index bcb41da338..04d04a4457 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -46,7 +46,7 @@ logger = logging.getLogger(__name__) class FederationRemoteSendQueue(object): - """A drop in replacement for TransactionQueue""" + """A drop in replacement for FederationSender""" def __init__(self, hs): self.server_name = hs.hostname @@ -154,13 +154,13 @@ class FederationRemoteSendQueue(object): del self.device_messages[key] def notify_new_events(self, current_id): - """As per TransactionQueue""" + """As per FederationSender""" # We don't need to replicate this as it gets sent down a different # stream. pass def build_and_send_edu(self, destination, edu_type, content, key=None): - """As per TransactionQueue""" + """As per FederationSender""" if destination == self.server_name: logger.info("Not sending EDU to ourselves") return @@ -184,7 +184,7 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() def send_read_receipt(self, receipt): - """As per TransactionQueue + """As per FederationSender Args: receipt (synapse.types.ReadReceipt): @@ -193,7 +193,7 @@ class FederationRemoteSendQueue(object): pass def send_presence(self, states): - """As per TransactionQueue + """As per FederationSender Args: states (list(UserPresenceState)) @@ -210,7 +210,7 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() def send_device_messages(self, destination): - """As per TransactionQueue""" + """As per FederationSender""" pos = self._next_pos() self.device_messages[pos] = destination self.notifier.on_new_replication_data() @@ -448,7 +448,7 @@ def process_rows_for_federation(transaction_queue, rows): transaction queue ready for sending to the relevant homeservers. Args: - transaction_queue (TransactionQueue) + transaction_queue (FederationSender) rows (list(synapse.replication.tcp.streams.FederationStreamRow)) """ diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py new file mode 100644 index 0000000000..1bcc353d18 --- /dev/null +++ b/synapse/federation/sender/__init__.py @@ -0,0 +1,388 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from six import itervalues + +from prometheus_client import Counter + +from twisted.internet import defer + +import synapse.metrics +from synapse.federation.sender.per_destination_queue import PerDestinationQueue +from synapse.federation.sender.transaction_manager import TransactionManager +from synapse.federation.units import Edu +from synapse.handlers.presence import get_interested_remotes +from synapse.metrics import ( + LaterGauge, + event_processing_loop_counter, + event_processing_loop_room_count, + events_processed_counter, +) +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.util import logcontext +from synapse.util.metrics import measure_func + +logger = logging.getLogger(__name__) + +sent_pdus_destination_dist_count = Counter( + "synapse_federation_client_sent_pdu_destinations:count", + "Number of PDUs queued for sending to one or more destinations", +) + +sent_pdus_destination_dist_total = Counter( + "synapse_federation_client_sent_pdu_destinations:total", "" + "Total number of PDUs queued for sending across all destinations", +) + + +class FederationSender(object): + def __init__(self, hs): + self.hs = hs + self.server_name = hs.hostname + + self.store = hs.get_datastore() + self.state = hs.get_state_handler() + + self.clock = hs.get_clock() + self.is_mine_id = hs.is_mine_id + + self._transaction_manager = TransactionManager(hs) + + # map from destination to PerDestinationQueue + self._per_destination_queues = {} # type: dict[str, PerDestinationQueue] + + LaterGauge( + "synapse_federation_transaction_queue_pending_destinations", + "", + [], + lambda: sum( + 1 for d in self._per_destination_queues.values() + if d.transmission_loop_running + ), + ) + + # Map of user_id -> UserPresenceState for all the pending presence + # to be sent out by user_id. Entries here get processed and put in + # pending_presence_by_dest + self.pending_presence = {} + + LaterGauge( + "synapse_federation_transaction_queue_pending_pdus", + "", + [], + lambda: sum( + d.pending_pdu_count() for d in self._per_destination_queues.values() + ), + ) + LaterGauge( + "synapse_federation_transaction_queue_pending_edus", + "", + [], + lambda: sum( + d.pending_edu_count() for d in self._per_destination_queues.values() + ), + ) + + self._order = 1 + + self._is_processing = False + self._last_poked_id = -1 + + self._processing_pending_presence = False + + def _get_per_destination_queue(self, destination): + queue = self._per_destination_queues.get(destination) + if not queue: + queue = PerDestinationQueue(self.hs, self._transaction_manager, destination) + self._per_destination_queues[destination] = queue + return queue + + def notify_new_events(self, current_id): + """This gets called when we have some new events we might want to + send out to other servers. + """ + self._last_poked_id = max(current_id, self._last_poked_id) + + if self._is_processing: + return + + # fire off a processing loop in the background + run_as_background_process( + "process_event_queue_for_federation", + self._process_event_queue_loop, + ) + + @defer.inlineCallbacks + def _process_event_queue_loop(self): + try: + self._is_processing = True + while True: + last_token = yield self.store.get_federation_out_pos("events") + next_token, events = yield self.store.get_all_new_events_stream( + last_token, self._last_poked_id, limit=100, + ) + + logger.debug("Handling %s -> %s", last_token, next_token) + + if not events and next_token >= self._last_poked_id: + break + + @defer.inlineCallbacks + def handle_event(event): + # Only send events for this server. + send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() + is_mine = self.is_mine_id(event.sender) + if not is_mine and send_on_behalf_of is None: + return + + try: + # Get the state from before the event. + # We need to make sure that this is the state from before + # the event and not from after it. + # Otherwise if the last member on a server in a room is + # banned then it won't receive the event because it won't + # be in the room after the ban. + destinations = yield self.state.get_current_hosts_in_room( + event.room_id, latest_event_ids=event.prev_event_ids(), + ) + except Exception: + logger.exception( + "Failed to calculate hosts in room for event: %s", + event.event_id, + ) + return + + destinations = set(destinations) + + if send_on_behalf_of is not None: + # If we are sending the event on behalf of another server + # then it already has the event and there is no reason to + # send the event to it. + destinations.discard(send_on_behalf_of) + + logger.debug("Sending %s to %r", event, destinations) + + self._send_pdu(event, destinations) + + @defer.inlineCallbacks + def handle_room_events(events): + for event in events: + yield handle_event(event) + + events_by_room = {} + for event in events: + events_by_room.setdefault(event.room_id, []).append(event) + + yield logcontext.make_deferred_yieldable(defer.gatherResults( + [ + logcontext.run_in_background(handle_room_events, evs) + for evs in itervalues(events_by_room) + ], + consumeErrors=True + )) + + yield self.store.update_federation_out_pos( + "events", next_token + ) + + if events: + now = self.clock.time_msec() + ts = yield self.store.get_received_ts(events[-1].event_id) + + synapse.metrics.event_processing_lag.labels( + "federation_sender").set(now - ts) + synapse.metrics.event_processing_last_ts.labels( + "federation_sender").set(ts) + + events_processed_counter.inc(len(events)) + + event_processing_loop_room_count.labels( + "federation_sender" + ).inc(len(events_by_room)) + + event_processing_loop_counter.labels("federation_sender").inc() + + synapse.metrics.event_processing_positions.labels( + "federation_sender").set(next_token) + + finally: + self._is_processing = False + + def _send_pdu(self, pdu, destinations): + # We loop through all destinations to see whether we already have + # a transaction in progress. If we do, stick it in the pending_pdus + # table and we'll get back to it later. + + order = self._order + self._order += 1 + + destinations = set(destinations) + destinations.discard(self.server_name) + logger.debug("Sending to: %s", str(destinations)) + + if not destinations: + return + + sent_pdus_destination_dist_total.inc(len(destinations)) + sent_pdus_destination_dist_count.inc() + + for destination in destinations: + self._get_per_destination_queue(destination).send_pdu(pdu, order) + + @defer.inlineCallbacks + def send_read_receipt(self, receipt): + """Send a RR to any other servers in the room + + Args: + receipt (synapse.types.ReadReceipt): receipt to be sent + """ + # Work out which remote servers should be poked and poke them. + domains = yield self.state.get_current_hosts_in_room(receipt.room_id) + domains = [d for d in domains if d != self.server_name] + if not domains: + return + + logger.debug("Sending receipt to: %r", domains) + + content = { + receipt.room_id: { + receipt.receipt_type: { + receipt.user_id: { + "event_ids": receipt.event_ids, + "data": receipt.data, + }, + }, + }, + } + key = (receipt.room_id, receipt.receipt_type, receipt.user_id) + + for domain in domains: + self.build_and_send_edu( + destination=domain, + edu_type="m.receipt", + content=content, + key=key, + ) + + @logcontext.preserve_fn # the caller should not yield on this + @defer.inlineCallbacks + def send_presence(self, states): + """Send the new presence states to the appropriate destinations. + + This actually queues up the presence states ready for sending and + triggers a background task to process them and send out the transactions. + + Args: + states (list(UserPresenceState)) + """ + if not self.hs.config.use_presence: + # No-op if presence is disabled. + return + + # First we queue up the new presence by user ID, so multiple presence + # updates in quick successtion are correctly handled + # We only want to send presence for our own users, so lets always just + # filter here just in case. + self.pending_presence.update({ + state.user_id: state for state in states + if self.is_mine_id(state.user_id) + }) + + # We then handle the new pending presence in batches, first figuring + # out the destinations we need to send each state to and then poking it + # to attempt a new transaction. We linearize this so that we don't + # accidentally mess up the ordering and send multiple presence updates + # in the wrong order + if self._processing_pending_presence: + return + + self._processing_pending_presence = True + try: + while True: + states_map = self.pending_presence + self.pending_presence = {} + + if not states_map: + break + + yield self._process_presence_inner(list(states_map.values())) + except Exception: + logger.exception("Error sending presence states to servers") + finally: + self._processing_pending_presence = False + + @measure_func("txnqueue._process_presence") + @defer.inlineCallbacks + def _process_presence_inner(self, states): + """Given a list of states populate self.pending_presence_by_dest and + poke to send a new transaction to each destination + + Args: + states (list(UserPresenceState)) + """ + hosts_and_states = yield get_interested_remotes(self.store, states, self.state) + + for destinations, states in hosts_and_states: + for destination in destinations: + if destination == self.server_name: + continue + self._get_per_destination_queue(destination).send_presence(states) + + def build_and_send_edu(self, destination, edu_type, content, key=None): + """Construct an Edu object, and queue it for sending + + Args: + destination (str): name of server to send to + edu_type (str): type of EDU to send + content (dict): content of EDU + key (Any|None): clobbering key for this edu + """ + if destination == self.server_name: + logger.info("Not sending EDU to ourselves") + return + + edu = Edu( + origin=self.server_name, + destination=destination, + edu_type=edu_type, + content=content, + ) + + self.send_edu(edu, key) + + def send_edu(self, edu, key): + """Queue an EDU for sending + + Args: + edu (Edu): edu to send + key (Any|None): clobbering key for this edu + """ + queue = self._get_per_destination_queue(edu.destination) + if key: + queue.send_keyed_edu(edu, key) + else: + queue.send_edu(edu) + + def send_device_messages(self, destination): + if destination == self.server_name: + logger.info("Not sending device update to ourselves") + return + + self._get_per_destination_queue(destination).attempt_new_transaction() + + def get_current_token(self): + return 0 diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py new file mode 100644 index 0000000000..385039add4 --- /dev/null +++ b/synapse/federation/sender/per_destination_queue.py @@ -0,0 +1,318 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import datetime +import logging + +from prometheus_client import Counter + +from twisted.internet import defer + +from synapse.api.errors import ( + FederationDeniedError, + HttpResponseException, + RequestSendFailed, +) +from synapse.events import EventBase +from synapse.federation.units import Edu +from synapse.handlers.presence import format_user_presence_state +from synapse.metrics import sent_transactions_counter +from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage import UserPresenceState +from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter + +logger = logging.getLogger(__name__) + + +sent_edus_counter = Counter( + "synapse_federation_client_sent_edus", + "Total number of EDUs successfully sent", +) + +sent_edus_by_type = Counter( + "synapse_federation_client_sent_edus_by_type", + "Number of sent EDUs successfully sent, by event type", + ["type"], +) + + +class PerDestinationQueue(object): + """ + Manages the per-destination transmission queues. + + Args: + hs (synapse.HomeServer): + transaction_sender (TransactionManager): + destination (str): the server_name of the destination that we are managing + transmission for. + """ + def __init__(self, hs, transaction_manager, destination): + self._server_name = hs.hostname + self._clock = hs.get_clock() + self._store = hs.get_datastore() + self._transaction_manager = transaction_manager + + self._destination = destination + self.transmission_loop_running = False + + # a list of tuples of (pending pdu, order) + self._pending_pdus = [] # type: list[tuple[EventBase, int]] + self._pending_edus = [] # type: list[Edu] + + # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered + # based on their key (e.g. typing events by room_id) + # Map of (edu_type, key) -> Edu + self._pending_edus_keyed = {} # type: dict[tuple[str, str], Edu] + + # Map of user_id -> UserPresenceState of pending presence to be sent to this + # destination + self._pending_presence = {} # type: dict[str, UserPresenceState] + + # stream_id of last successfully sent to-device message. + # NB: may be a long or an int. + self._last_device_stream_id = 0 + + # stream_id of last successfully sent device list update. + self._last_device_list_stream_id = 0 + + def pending_pdu_count(self): + return len(self._pending_pdus) + + def pending_edu_count(self): + return ( + len(self._pending_edus) + + len(self._pending_presence) + + len(self._pending_edus_keyed) + ) + + def send_pdu(self, pdu, order): + """Add a PDU to the queue, and start the transmission loop if neccessary + + Args: + pdu (EventBase): pdu to send + order (int): + """ + self._pending_pdus.append((pdu, order)) + self.attempt_new_transaction() + + def send_presence(self, states): + """Add presence updates to the queue. Start the transmission loop if neccessary. + + Args: + states (iterable[UserPresenceState]): presence to send + """ + self._pending_presence.update({ + state.user_id: state for state in states + }) + self.attempt_new_transaction() + + def send_keyed_edu(self, edu, key): + self._pending_edus_keyed[(edu.edu_type, key)] = edu + self.attempt_new_transaction() + + def send_edu(self, edu): + self._pending_edus.append(edu) + self.attempt_new_transaction() + + def attempt_new_transaction(self): + """Try to start a new transaction to this destination + + If there is already a transaction in progress to this destination, + returns immediately. Otherwise kicks off the process of sending a + transaction in the background. + """ + # list of (pending_pdu, deferred, order) + if self.transmission_loop_running: + # XXX: this can get stuck on by a never-ending + # request at which point pending_pdus just keeps growing. + # we need application-layer timeouts of some flavour of these + # requests + logger.debug( + "TX [%s] Transaction already in progress", + self._destination + ) + return + + logger.debug("TX [%s] Starting transaction loop", self._destination) + + run_as_background_process( + "federation_transaction_transmission_loop", + self._transaction_transmission_loop, + ) + + @defer.inlineCallbacks + def _transaction_transmission_loop(self): + pending_pdus = [] + try: + self.transmission_loop_running = True + + # This will throw if we wouldn't retry. We do this here so we fail + # quickly, but we will later check this again in the http client, + # hence why we throw the result away. + yield get_retry_limiter(self._destination, self._clock, self._store) + + pending_pdus = [] + while True: + device_message_edus, device_stream_id, dev_list_id = ( + yield self._get_new_device_messages() + ) + + # BEGIN CRITICAL SECTION + # + # In order to avoid a race condition, we need to make sure that + # the following code (from popping the queues up to the point + # where we decide if we actually have any pending messages) is + # atomic - otherwise new PDUs or EDUs might arrive in the + # meantime, but not get sent because we hold the + # transmission_loop_running flag. + + pending_pdus = self._pending_pdus + + # We can only include at most 50 PDUs per transactions + pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] + + pending_edus = self._pending_edus + + # We can only include at most 100 EDUs per transactions + pending_edus, self._pending_edus = pending_edus[:100], pending_edus[100:] + + pending_edus.extend( + self._pending_edus_keyed.values() + ) + + self._pending_edus_keyed = {} + + pending_edus.extend(device_message_edus) + + pending_presence = self._pending_presence + self._pending_presence = {} + if pending_presence: + pending_edus.append( + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.presence", + content={ + "push": [ + format_user_presence_state( + presence, self._clock.time_msec() + ) + for presence in pending_presence.values() + ] + }, + ) + ) + + if pending_pdus: + logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", + self._destination, len(pending_pdus)) + + if not pending_pdus and not pending_edus: + logger.debug("TX [%s] Nothing to send", self._destination) + self._last_device_stream_id = device_stream_id + return + + # END CRITICAL SECTION + + success = yield self._transaction_manager.send_new_transaction( + self._destination, pending_pdus, pending_edus + ) + if success: + sent_transactions_counter.inc() + sent_edus_counter.inc(len(pending_edus)) + for edu in pending_edus: + sent_edus_by_type.labels(edu.edu_type).inc() + # Remove the acknowledged device messages from the database + # Only bother if we actually sent some device messages + if device_message_edus: + yield self._store.delete_device_msgs_for_remote( + self._destination, device_stream_id + ) + logger.info( + "Marking as sent %r %r", self._destination, dev_list_id + ) + yield self._store.mark_as_sent_devices_by_remote( + self._destination, dev_list_id + ) + + self._last_device_stream_id = device_stream_id + self._last_device_list_stream_id = dev_list_id + else: + break + except NotRetryingDestination as e: + logger.debug( + "TX [%s] not ready for retry yet (next retry at %s) - " + "dropping transaction for now", + self._destination, + datetime.datetime.fromtimestamp( + (e.retry_last_ts + e.retry_interval) / 1000.0 + ), + ) + except FederationDeniedError as e: + logger.info(e) + except HttpResponseException as e: + logger.warning( + "TX [%s] Received %d response to transaction: %s", + self._destination, e.code, e, + ) + except RequestSendFailed as e: + logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) + + for p, _ in pending_pdus: + logger.info("Failed to send event %s to %s", p.event_id, + self._destination) + except Exception: + logger.exception( + "TX [%s] Failed to send transaction", + self._destination, + ) + for p, _ in pending_pdus: + logger.info("Failed to send event %s to %s", p.event_id, + self._destination) + finally: + # We want to be *very* sure we clear this after we stop processing + self.transmission_loop_running = False + + @defer.inlineCallbacks + def _get_new_device_messages(self): + last_device_stream_id = self._last_device_stream_id + to_device_stream_id = self._store.get_to_device_stream_token() + contents, stream_id = yield self._store.get_new_device_msgs_for_remote( + self._destination, last_device_stream_id, to_device_stream_id + ) + edus = [ + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.direct_to_device", + content=content, + ) + for content in contents + ] + + last_device_list = self._last_device_list_stream_id + now_stream_id, results = yield self._store.get_devices_by_remote( + self._destination, last_device_list + ) + edus.extend( + Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.device_list_update", + content=content, + ) + for content in results + ) + defer.returnValue((edus, stream_id, now_stream_id)) diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py new file mode 100644 index 0000000000..35e6b8ff5b --- /dev/null +++ b/synapse/federation/sender/transaction_manager.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.internet import defer + +from synapse.api.errors import HttpResponseException +from synapse.federation.persistence import TransactionActions +from synapse.federation.units import Transaction +from synapse.util.metrics import measure_func + +logger = logging.getLogger(__name__) + + +class TransactionManager(object): + """Helper class which handles building and sending transactions + + shared between PerDestinationQueue objects + """ + def __init__(self, hs): + self._server_name = hs.hostname + self.clock = hs.get_clock() # nb must be called this for @measure_func + self._store = hs.get_datastore() + self._transaction_actions = TransactionActions(self._store) + self._transport_layer = hs.get_federation_transport_client() + + # HACK to get unique tx id + self._next_txn_id = int(self.clock.time_msec()) + + @measure_func("_send_new_transaction") + @defer.inlineCallbacks + def send_new_transaction(self, destination, pending_pdus, pending_edus): + + # Sort based on the order field + pending_pdus.sort(key=lambda t: t[1]) + pdus = [x[0] for x in pending_pdus] + edus = pending_edus + + success = True + + logger.debug("TX [%s] _attempt_new_transaction", destination) + + txn_id = str(self._next_txn_id) + + logger.debug( + "TX [%s] {%s} Attempting new transaction" + " (pdus: %d, edus: %d)", + destination, txn_id, + len(pdus), + len(edus), + ) + + logger.debug("TX [%s] Persisting transaction...", destination) + + transaction = Transaction.create_new( + origin_server_ts=int(self.clock.time_msec()), + transaction_id=txn_id, + origin=self._server_name, + destination=destination, + pdus=pdus, + edus=edus, + ) + + self._next_txn_id += 1 + + yield self._transaction_actions.prepare_to_send(transaction) + + logger.debug("TX [%s] Persisted transaction", destination) + logger.info( + "TX [%s] {%s} Sending transaction [%s]," + " (PDUs: %d, EDUs: %d)", + destination, txn_id, + transaction.transaction_id, + len(pdus), + len(edus), + ) + + # Actually send the transaction + + # FIXME (erikj): This is a bit of a hack to make the Pdu age + # keys work + def json_data_cb(): + data = transaction.get_dict() + now = int(self.clock.time_msec()) + if "pdus" in data: + for p in data["pdus"]: + if "age_ts" in p: + unsigned = p.setdefault("unsigned", {}) + unsigned["age"] = now - int(p["age_ts"]) + del p["age_ts"] + return data + + try: + response = yield self._transport_layer.send_transaction( + transaction, json_data_cb + ) + code = 200 + except HttpResponseException as e: + code = e.code + response = e.response + + if e.code in (401, 404, 429) or 500 <= e.code: + logger.info( + "TX [%s] {%s} got %d response", + destination, txn_id, code + ) + raise e + + logger.info( + "TX [%s] {%s} got %d response", + destination, txn_id, code + ) + + yield self._transaction_actions.delivered( + transaction, code, response + ) + + logger.debug("TX [%s] {%s} Marked as delivered", destination, txn_id) + + if code == 200: + for e_id, r in response.get("pdus", {}).items(): + if "error" in r: + logger.warn( + "TX [%s] {%s} Remote returned error for %s: %s", + destination, txn_id, e_id, r, + ) + else: + for p in pdus: + logger.warn( + "TX [%s] {%s} Failed to send event %s", + destination, txn_id, p.event_id, + ) + success = False + + defer.returnValue(success) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py deleted file mode 100644 index c1f6985ae4..0000000000 --- a/synapse/federation/transaction_queue.py +++ /dev/null @@ -1,801 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2014-2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import datetime -import logging - -from six import itervalues - -from prometheus_client import Counter - -from twisted.internet import defer - -import synapse.metrics -from synapse.api.errors import ( - FederationDeniedError, - HttpResponseException, - RequestSendFailed, -) -from synapse.events import EventBase -from synapse.handlers.presence import format_user_presence_state, get_interested_remotes -from synapse.metrics import ( - LaterGauge, - event_processing_loop_counter, - event_processing_loop_room_count, - events_processed_counter, - sent_transactions_counter, -) -from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage import UserPresenceState -from synapse.util import logcontext -from synapse.util.metrics import measure_func -from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter - -from .persistence import TransactionActions -from .units import Edu, Transaction - -logger = logging.getLogger(__name__) - -sent_pdus_destination_dist_count = Counter( - "synapse_federation_client_sent_pdu_destinations:count", - "Number of PDUs queued for sending to one or more destinations", -) - -sent_pdus_destination_dist_total = Counter( - "synapse_federation_client_sent_pdu_destinations:total", "" - "Total number of PDUs queued for sending across all destinations", -) - -sent_edus_counter = Counter( - "synapse_federation_client_sent_edus", - "Total number of EDUs successfully sent", -) - -sent_edus_by_type = Counter( - "synapse_federation_client_sent_edus_by_type", - "Number of sent EDUs successfully sent, by event type", - ["type"], -) - - -class TransactionQueue(object): - """This class makes sure we only have one transaction in flight at - a time for a given destination. - - It batches pending PDUs into single transactions. - """ - - def __init__(self, hs): - self.hs = hs - self.server_name = hs.hostname - - self.store = hs.get_datastore() - self.state = hs.get_state_handler() - - self.clock = hs.get_clock() - self.is_mine_id = hs.is_mine_id - - self._transaction_sender = TransactionSender(hs) - - # map from destination to PerDestinationQueue - self._per_destination_queues = {} # type: dict[str, PerDestinationQueue] - - LaterGauge( - "synapse_federation_transaction_queue_pending_destinations", - "", - [], - lambda: sum( - 1 for d in self._per_destination_queues.values() - if d.transmission_loop_running - ), - ) - - # Map of user_id -> UserPresenceState for all the pending presence - # to be sent out by user_id. Entries here get processed and put in - # pending_presence_by_dest - self.pending_presence = {} - - LaterGauge( - "synapse_federation_transaction_queue_pending_pdus", - "", - [], - lambda: sum( - d.pending_pdu_count() for d in self._per_destination_queues.values() - ), - ) - LaterGauge( - "synapse_federation_transaction_queue_pending_edus", - "", - [], - lambda: sum( - d.pending_edu_count() for d in self._per_destination_queues.values() - ), - ) - - self._order = 1 - - self._is_processing = False - self._last_poked_id = -1 - - self._processing_pending_presence = False - - def _get_per_destination_queue(self, destination): - queue = self._per_destination_queues.get(destination) - if not queue: - queue = PerDestinationQueue(self.hs, self._transaction_sender, destination) - self._per_destination_queues[destination] = queue - return queue - - def notify_new_events(self, current_id): - """This gets called when we have some new events we might want to - send out to other servers. - """ - self._last_poked_id = max(current_id, self._last_poked_id) - - if self._is_processing: - return - - # fire off a processing loop in the background - run_as_background_process( - "process_event_queue_for_federation", - self._process_event_queue_loop, - ) - - @defer.inlineCallbacks - def _process_event_queue_loop(self): - try: - self._is_processing = True - while True: - last_token = yield self.store.get_federation_out_pos("events") - next_token, events = yield self.store.get_all_new_events_stream( - last_token, self._last_poked_id, limit=100, - ) - - logger.debug("Handling %s -> %s", last_token, next_token) - - if not events and next_token >= self._last_poked_id: - break - - @defer.inlineCallbacks - def handle_event(event): - # Only send events for this server. - send_on_behalf_of = event.internal_metadata.get_send_on_behalf_of() - is_mine = self.is_mine_id(event.sender) - if not is_mine and send_on_behalf_of is None: - return - - try: - # Get the state from before the event. - # We need to make sure that this is the state from before - # the event and not from after it. - # Otherwise if the last member on a server in a room is - # banned then it won't receive the event because it won't - # be in the room after the ban. - destinations = yield self.state.get_current_hosts_in_room( - event.room_id, latest_event_ids=event.prev_event_ids(), - ) - except Exception: - logger.exception( - "Failed to calculate hosts in room for event: %s", - event.event_id, - ) - return - - destinations = set(destinations) - - if send_on_behalf_of is not None: - # If we are sending the event on behalf of another server - # then it already has the event and there is no reason to - # send the event to it. - destinations.discard(send_on_behalf_of) - - logger.debug("Sending %s to %r", event, destinations) - - self._send_pdu(event, destinations) - - @defer.inlineCallbacks - def handle_room_events(events): - for event in events: - yield handle_event(event) - - events_by_room = {} - for event in events: - events_by_room.setdefault(event.room_id, []).append(event) - - yield logcontext.make_deferred_yieldable(defer.gatherResults( - [ - logcontext.run_in_background(handle_room_events, evs) - for evs in itervalues(events_by_room) - ], - consumeErrors=True - )) - - yield self.store.update_federation_out_pos( - "events", next_token - ) - - if events: - now = self.clock.time_msec() - ts = yield self.store.get_received_ts(events[-1].event_id) - - synapse.metrics.event_processing_lag.labels( - "federation_sender").set(now - ts) - synapse.metrics.event_processing_last_ts.labels( - "federation_sender").set(ts) - - events_processed_counter.inc(len(events)) - - event_processing_loop_room_count.labels( - "federation_sender" - ).inc(len(events_by_room)) - - event_processing_loop_counter.labels("federation_sender").inc() - - synapse.metrics.event_processing_positions.labels( - "federation_sender").set(next_token) - - finally: - self._is_processing = False - - def _send_pdu(self, pdu, destinations): - # We loop through all destinations to see whether we already have - # a transaction in progress. If we do, stick it in the pending_pdus - # table and we'll get back to it later. - - order = self._order - self._order += 1 - - destinations = set(destinations) - destinations.discard(self.server_name) - logger.debug("Sending to: %s", str(destinations)) - - if not destinations: - return - - sent_pdus_destination_dist_total.inc(len(destinations)) - sent_pdus_destination_dist_count.inc() - - for destination in destinations: - self._get_per_destination_queue(destination).send_pdu(pdu, order) - - @defer.inlineCallbacks - def send_read_receipt(self, receipt): - """Send a RR to any other servers in the room - - Args: - receipt (synapse.types.ReadReceipt): receipt to be sent - """ - # Work out which remote servers should be poked and poke them. - domains = yield self.state.get_current_hosts_in_room(receipt.room_id) - domains = [d for d in domains if d != self.server_name] - if not domains: - return - - logger.debug("Sending receipt to: %r", domains) - - content = { - receipt.room_id: { - receipt.receipt_type: { - receipt.user_id: { - "event_ids": receipt.event_ids, - "data": receipt.data, - }, - }, - }, - } - key = (receipt.room_id, receipt.receipt_type, receipt.user_id) - - for domain in domains: - self.build_and_send_edu( - destination=domain, - edu_type="m.receipt", - content=content, - key=key, - ) - - @logcontext.preserve_fn # the caller should not yield on this - @defer.inlineCallbacks - def send_presence(self, states): - """Send the new presence states to the appropriate destinations. - - This actually queues up the presence states ready for sending and - triggers a background task to process them and send out the transactions. - - Args: - states (list(UserPresenceState)) - """ - if not self.hs.config.use_presence: - # No-op if presence is disabled. - return - - # First we queue up the new presence by user ID, so multiple presence - # updates in quick successtion are correctly handled - # We only want to send presence for our own users, so lets always just - # filter here just in case. - self.pending_presence.update({ - state.user_id: state for state in states - if self.is_mine_id(state.user_id) - }) - - # We then handle the new pending presence in batches, first figuring - # out the destinations we need to send each state to and then poking it - # to attempt a new transaction. We linearize this so that we don't - # accidentally mess up the ordering and send multiple presence updates - # in the wrong order - if self._processing_pending_presence: - return - - self._processing_pending_presence = True - try: - while True: - states_map = self.pending_presence - self.pending_presence = {} - - if not states_map: - break - - yield self._process_presence_inner(list(states_map.values())) - except Exception: - logger.exception("Error sending presence states to servers") - finally: - self._processing_pending_presence = False - - @measure_func("txnqueue._process_presence") - @defer.inlineCallbacks - def _process_presence_inner(self, states): - """Given a list of states populate self.pending_presence_by_dest and - poke to send a new transaction to each destination - - Args: - states (list(UserPresenceState)) - """ - hosts_and_states = yield get_interested_remotes(self.store, states, self.state) - - for destinations, states in hosts_and_states: - for destination in destinations: - if destination == self.server_name: - continue - self._get_per_destination_queue(destination).send_presence(states) - - def build_and_send_edu(self, destination, edu_type, content, key=None): - """Construct an Edu object, and queue it for sending - - Args: - destination (str): name of server to send to - edu_type (str): type of EDU to send - content (dict): content of EDU - key (Any|None): clobbering key for this edu - """ - if destination == self.server_name: - logger.info("Not sending EDU to ourselves") - return - - edu = Edu( - origin=self.server_name, - destination=destination, - edu_type=edu_type, - content=content, - ) - - self.send_edu(edu, key) - - def send_edu(self, edu, key): - """Queue an EDU for sending - - Args: - edu (Edu): edu to send - key (Any|None): clobbering key for this edu - """ - queue = self._get_per_destination_queue(edu.destination) - if key: - queue.send_keyed_edu(edu, key) - else: - queue.send_edu(edu) - - def send_device_messages(self, destination): - if destination == self.server_name: - logger.info("Not sending device update to ourselves") - return - - self._get_per_destination_queue(destination).attempt_new_transaction() - - def get_current_token(self): - return 0 - - -class PerDestinationQueue(object): - """ - Manages the per-destination transmission queues. - """ - def __init__(self, hs, transaction_sender, destination): - self._server_name = hs.hostname - self._clock = hs.get_clock() - self._store = hs.get_datastore() - self._transaction_sender = transaction_sender - - self._destination = destination - self.transmission_loop_running = False - - # a list of tuples of (pending pdu, order) - self._pending_pdus = [] # type: list[tuple[EventBase, int]] - self._pending_edus = [] # type: list[Edu] - - # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered - # based on their key (e.g. typing events by room_id) - # Map of (edu_type, key) -> Edu - self._pending_edus_keyed = {} # type: dict[tuple[str, str], Edu] - - # Map of user_id -> UserPresenceState of pending presence to be sent to this - # destination - self._pending_presence = {} # type: dict[str, UserPresenceState] - - # stream_id of last successfully sent to-device message. - # NB: may be a long or an int. - self._last_device_stream_id = 0 - - # stream_id of last successfully sent device list update. - self._last_device_list_stream_id = 0 - - def pending_pdu_count(self): - return len(self._pending_pdus) - - def pending_edu_count(self): - return ( - len(self._pending_edus) - + len(self._pending_presence) - + len(self._pending_edus_keyed) - ) - - def send_pdu(self, pdu, order): - """Add a PDU to the queue, and start the transmission loop if neccessary - - Args: - pdu (EventBase): pdu to send - order (int): - """ - self._pending_pdus.append((pdu, order)) - self.attempt_new_transaction() - - def send_presence(self, states): - """Add presence updates to the queue. Start the transmission loop if neccessary. - - Args: - states (iterable[UserPresenceState]): presence to send - """ - self._pending_presence.update({ - state.user_id: state for state in states - }) - self.attempt_new_transaction() - - def send_keyed_edu(self, edu, key): - self._pending_edus_keyed[(edu.edu_type, key)] = edu - self.attempt_new_transaction() - - def send_edu(self, edu): - self._pending_edus.append(edu) - self.attempt_new_transaction() - - def attempt_new_transaction(self): - """Try to start a new transaction to this destination - - If there is already a transaction in progress to this destination, - returns immediately. Otherwise kicks off the process of sending a - transaction in the background. - """ - # list of (pending_pdu, deferred, order) - if self.transmission_loop_running: - # XXX: this can get stuck on by a never-ending - # request at which point pending_pdus just keeps growing. - # we need application-layer timeouts of some flavour of these - # requests - logger.debug( - "TX [%s] Transaction already in progress", - self._destination - ) - return - - logger.debug("TX [%s] Starting transaction loop", self._destination) - - run_as_background_process( - "federation_transaction_transmission_loop", - self._transaction_transmission_loop, - ) - - @defer.inlineCallbacks - def _transaction_transmission_loop(self): - pending_pdus = [] - try: - self.transmission_loop_running = True - - # This will throw if we wouldn't retry. We do this here so we fail - # quickly, but we will later check this again in the http client, - # hence why we throw the result away. - yield get_retry_limiter(self._destination, self._clock, self._store) - - pending_pdus = [] - while True: - device_message_edus, device_stream_id, dev_list_id = ( - yield self._get_new_device_messages() - ) - - # BEGIN CRITICAL SECTION - # - # In order to avoid a race condition, we need to make sure that - # the following code (from popping the queues up to the point - # where we decide if we actually have any pending messages) is - # atomic - otherwise new PDUs or EDUs might arrive in the - # meantime, but not get sent because we hold the - # transmission_loop_running flag. - - pending_pdus = self._pending_pdus - - # We can only include at most 50 PDUs per transactions - pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] - - pending_edus = self._pending_edus - - # We can only include at most 100 EDUs per transactions - pending_edus, self._pending_edus = pending_edus[:100], pending_edus[100:] - - pending_edus.extend( - self._pending_edus_keyed.values() - ) - - self._pending_edus_keyed = {} - - pending_edus.extend(device_message_edus) - - pending_presence = self._pending_presence - self._pending_presence = {} - if pending_presence: - pending_edus.append( - Edu( - origin=self._server_name, - destination=self._destination, - edu_type="m.presence", - content={ - "push": [ - format_user_presence_state( - presence, self._clock.time_msec() - ) - for presence in pending_presence.values() - ] - }, - ) - ) - - if pending_pdus: - logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", - self._destination, len(pending_pdus)) - - if not pending_pdus and not pending_edus: - logger.debug("TX [%s] Nothing to send", self._destination) - self._last_device_stream_id = device_stream_id - return - - # END CRITICAL SECTION - - success = yield self._transaction_sender.send_new_transaction( - self._destination, pending_pdus, pending_edus - ) - if success: - sent_transactions_counter.inc() - sent_edus_counter.inc(len(pending_edus)) - for edu in pending_edus: - sent_edus_by_type.labels(edu.edu_type).inc() - # Remove the acknowledged device messages from the database - # Only bother if we actually sent some device messages - if device_message_edus: - yield self._store.delete_device_msgs_for_remote( - self._destination, device_stream_id - ) - logger.info( - "Marking as sent %r %r", self._destination, dev_list_id - ) - yield self._store.mark_as_sent_devices_by_remote( - self._destination, dev_list_id - ) - - self._last_device_stream_id = device_stream_id - self._last_device_list_stream_id = dev_list_id - else: - break - except NotRetryingDestination as e: - logger.debug( - "TX [%s] not ready for retry yet (next retry at %s) - " - "dropping transaction for now", - self._destination, - datetime.datetime.fromtimestamp( - (e.retry_last_ts + e.retry_interval) / 1000.0 - ), - ) - except FederationDeniedError as e: - logger.info(e) - except HttpResponseException as e: - logger.warning( - "TX [%s] Received %d response to transaction: %s", - self._destination, e.code, e, - ) - except RequestSendFailed as e: - logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) - - for p, _ in pending_pdus: - logger.info("Failed to send event %s to %s", p.event_id, - self._destination) - except Exception: - logger.exception( - "TX [%s] Failed to send transaction", - self._destination, - ) - for p, _ in pending_pdus: - logger.info("Failed to send event %s to %s", p.event_id, - self._destination) - finally: - # We want to be *very* sure we clear this after we stop processing - self.transmission_loop_running = False - - @defer.inlineCallbacks - def _get_new_device_messages(self): - last_device_stream_id = self._last_device_stream_id - to_device_stream_id = self._store.get_to_device_stream_token() - contents, stream_id = yield self._store.get_new_device_msgs_for_remote( - self._destination, last_device_stream_id, to_device_stream_id - ) - edus = [ - Edu( - origin=self._server_name, - destination=self._destination, - edu_type="m.direct_to_device", - content=content, - ) - for content in contents - ] - - last_device_list = self._last_device_list_stream_id - now_stream_id, results = yield self._store.get_devices_by_remote( - self._destination, last_device_list - ) - edus.extend( - Edu( - origin=self._server_name, - destination=self._destination, - edu_type="m.device_list_update", - content=content, - ) - for content in results - ) - defer.returnValue((edus, stream_id, now_stream_id)) - - -class TransactionSender(object): - """Helper class which handles building and sending transactions - - shared between PerDestinationQueue objects - """ - def __init__(self, hs): - self._server_name = hs.hostname - self._clock = hs.get_clock() - self._store = hs.get_datastore() - self._transaction_actions = TransactionActions(self._store) - self._transport_layer = hs.get_federation_transport_client() - - # HACK to get unique tx id - self._next_txn_id = int(self._clock.time_msec()) - - @measure_func("_send_new_transaction") - @defer.inlineCallbacks - def send_new_transaction(self, destination, pending_pdus, pending_edus): - - # Sort based on the order field - pending_pdus.sort(key=lambda t: t[1]) - pdus = [x[0] for x in pending_pdus] - edus = pending_edus - - success = True - - logger.debug("TX [%s] _attempt_new_transaction", destination) - - txn_id = str(self._next_txn_id) - - logger.debug( - "TX [%s] {%s} Attempting new transaction" - " (pdus: %d, edus: %d)", - destination, txn_id, - len(pdus), - len(edus), - ) - - logger.debug("TX [%s] Persisting transaction...", destination) - - transaction = Transaction.create_new( - origin_server_ts=int(self._clock.time_msec()), - transaction_id=txn_id, - origin=self._server_name, - destination=destination, - pdus=pdus, - edus=edus, - ) - - self._next_txn_id += 1 - - yield self._transaction_actions.prepare_to_send(transaction) - - logger.debug("TX [%s] Persisted transaction", destination) - logger.info( - "TX [%s] {%s} Sending transaction [%s]," - " (PDUs: %d, EDUs: %d)", - destination, txn_id, - transaction.transaction_id, - len(pdus), - len(edus), - ) - - # Actually send the transaction - - # FIXME (erikj): This is a bit of a hack to make the Pdu age - # keys work - def json_data_cb(): - data = transaction.get_dict() - now = int(self._clock.time_msec()) - if "pdus" in data: - for p in data["pdus"]: - if "age_ts" in p: - unsigned = p.setdefault("unsigned", {}) - unsigned["age"] = now - int(p["age_ts"]) - del p["age_ts"] - return data - - try: - response = yield self._transport_layer.send_transaction( - transaction, json_data_cb - ) - code = 200 - except HttpResponseException as e: - code = e.code - response = e.response - - if e.code in (401, 404, 429) or 500 <= e.code: - logger.info( - "TX [%s] {%s} got %d response", - destination, txn_id, code - ) - raise e - - logger.info( - "TX [%s] {%s} got %d response", - destination, txn_id, code - ) - - yield self._transaction_actions.delivered( - transaction, code, response - ) - - logger.debug("TX [%s] {%s} Marked as delivered", destination, txn_id) - - if code == 200: - for e_id, r in response.get("pdus", {}).items(): - if "error" in r: - logger.warn( - "TX [%s] {%s} Remote returned error for %s: %s", - destination, txn_id, e_id, r, - ) - else: - for p in pdus: - logger.warn( - "TX [%s] {%s} Failed to send event %s", - destination, txn_id, p.event_id, - ) - success = False - - defer.returnValue(success) diff --git a/synapse/server.py b/synapse/server.py index 72835e8c86..ee1506f2ad 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -42,7 +42,7 @@ from synapse.federation.federation_server import ( ReplicationFederationHandlerRegistry, ) from synapse.federation.send_queue import FederationRemoteSendQueue -from synapse.federation.transaction_queue import TransactionQueue +from synapse.federation.sender import FederationSender from synapse.federation.transport.client import TransportLayerClient from synapse.groups.attestations import GroupAttestationSigning, GroupAttestionRenewer from synapse.groups.groups_server import GroupsServerHandler @@ -421,7 +421,7 @@ class HomeServer(object): def build_federation_sender(self): if self.should_send_federation(): - return TransactionQueue(self) + return FederationSender(self) elif not self.config.worker_app: return FederationRemoteSendQueue(self) else: diff --git a/synapse/server.pyi b/synapse/server.pyi index fb8df56cd5..3ba3a967c2 100644 --- a/synapse/server.pyi +++ b/synapse/server.pyi @@ -1,5 +1,6 @@ import synapse.api.auth import synapse.config.homeserver +import synapse.federation.sender import synapse.federation.transaction_queue import synapse.federation.transport.client import synapse.handlers @@ -62,7 +63,7 @@ class HomeServer(object): def get_set_password_handler(self) -> synapse.handlers.set_password.SetPasswordHandler: pass - def get_federation_sender(self) -> synapse.federation.transaction_queue.TransactionQueue: + def get_federation_sender(self) -> synapse.federation.sender.FederationSender: pass def get_federation_transport_client(self) -> synapse.federation.transport.client.TransportLayerClient: -- cgit 1.5.1 From 9ad448c1e56dc548d8ee22494deabe802637b58a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 14 Mar 2019 13:32:14 +0000 Subject: Correctly handle all command line options --- synapse/app/_base.py | 18 ++++++++++++------ synapse/app/homeserver.py | 16 +++++++--------- synapse/config/workers.py | 24 +++++++++++++++++++++++- 3 files changed, 42 insertions(+), 16 deletions(-) (limited to 'synapse') diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 32e8b8a3f5..d4c6c4c8e2 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -63,12 +63,13 @@ def start_worker_reactor(appname, config): start_reactor( appname, - config.soft_file_limit, - config.gc_thresholds, - config.worker_pid_file, - config.worker_daemonize, - config.worker_cpu_affinity, - logger, + soft_file_limit=config.soft_file_limit, + gc_thresholds=config.gc_thresholds, + pid_file=config.worker_pid_file, + daemonize=config.worker_daemonize, + cpu_affinity=config.worker_cpu_affinity, + print_pidfile=config.print_pidfile, + logger=logger, ) @@ -79,6 +80,7 @@ def start_reactor( pid_file, daemonize, cpu_affinity, + print_pidfile, logger, ): """ Run the reactor in the main process @@ -93,6 +95,7 @@ def start_reactor( pid_file (str): name of pid file to write to if daemonize is True daemonize (bool): true to run the reactor in a background process cpu_affinity (int|None): cpu affinity mask + print_pidfile (bool): whether to print the pid file, if daemonize is True logger (logging.Logger): logger instance to pass to Daemonize """ @@ -124,6 +127,9 @@ def start_reactor( reactor.run() if daemonize: + if print_pidfile: + print(pid_file) + daemon = Daemonize( app=appname, pid=pid_file, diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e8b6cc3114..ce89d48bf7 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -636,17 +636,15 @@ def run(hs): # be quite busy the first few minutes clock.call_later(5 * 60, start_phone_stats_home) - if hs.config.daemonize and hs.config.print_pidfile: - print(hs.config.pid_file) - _base.start_reactor( "synapse-homeserver", - hs.config.soft_file_limit, - hs.config.gc_thresholds, - hs.config.pid_file, - hs.config.daemonize, - hs.config.cpu_affinity, - logger, + soft_file_limit=hs.config.soft_file_limit, + gc_thresholds=hs.config.gc_thresholds, + pid_file=hs.config.pid_file, + daemonize=hs.config.daemonize, + cpu_affinity=hs.config.cpu_affinity, + print_pidfile=hs.config.print_pidfile, + logger=logger, ) diff --git a/synapse/config/workers.py b/synapse/config/workers.py index 8dc013d0e8..bfbd8b6c91 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -28,7 +28,7 @@ class WorkerConfig(Config): if self.worker_app == "synapse.app.homeserver": self.worker_app = None - self.worker_listeners = config.get("worker_listeners") + self.worker_listeners = config.get("worker_listeners", []) self.worker_daemonize = config.get("worker_daemonize") self.worker_pid_file = config.get("worker_pid_file") self.worker_log_file = config.get("worker_log_file") @@ -48,6 +48,17 @@ class WorkerConfig(Config): self.worker_main_http_uri = config.get("worker_main_http_uri", None) self.worker_cpu_affinity = config.get("worker_cpu_affinity") + # This option is really only here to support `--manhole` command line + # argument. + manhole = config.get("worker_manhole") + if manhole: + self.worker_listeners.append({ + "port": manhole, + "bind_addresses": ["127.0.0.1"], + "type": "manhole", + "tls": False, + }) + if self.worker_listeners: for listener in self.worker_listeners: bind_address = listener.pop("bind_address", None) @@ -59,5 +70,16 @@ class WorkerConfig(Config): bind_addresses.append('') def read_arguments(self, args): + # We support a bunch of command line arguments that override options in + # the config. A lot of these options have a worker_* prefix when running + # on workers so we also have to override them when command line options + # are specified. + if args.daemonize is not None: self.worker_daemonize = args.daemonize + if args.log_config is not None: + self.worker_log_config = args.log_config + if args.log_file is not None: + self.worker_log_file = args.log_file + if args.manhole is not None: + self.worker_manhole = args.worker_manhole -- cgit 1.5.1 From 271cb1998bab21a3c2a62f72d3179c6af835ad70 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 14 Mar 2019 14:30:54 +0000 Subject: Revert "Make federation endpoints more tolerant of trailing slashes for some endpoints (#4793)" This reverts commit 290552fd836f4ae2dc1d893a7f72f7fff85365d3. --- changelog.d/4793.feature | 1 - synapse/federation/transport/client.py | 2 +- synapse/federation/transport/server.py | 14 +++++++------- tests/handlers/test_typing.py | 6 +++--- 4 files changed, 11 insertions(+), 12 deletions(-) delete mode 100644 changelog.d/4793.feature (limited to 'synapse') diff --git a/changelog.d/4793.feature b/changelog.d/4793.feature deleted file mode 100644 index 90dba7d122..0000000000 --- a/changelog.d/4793.feature +++ /dev/null @@ -1 +0,0 @@ -Synapse is now permissive about trailing slashes on some of its federation endpoints, allowing zero or more to be present. \ No newline at end of file diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 4e8919d657..8e2be218e2 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -167,7 +167,7 @@ class TransportLayerClient(object): # generated by the json_data_callback. json_data = transaction.get_dict() - path = _create_v1_path("/send/%s", transaction.transaction_id) + path = _create_v1_path("/send/%s/", transaction.transaction_id) response = yield self.client.put_json( transaction.destination, diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index efb6bdca48..96d680a5ad 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -312,7 +312,7 @@ class BaseFederationServlet(object): class FederationSendServlet(BaseFederationServlet): - PATH = "/send/(?P[^/]*)/?" + PATH = "/send/(?P[^/]*)/" def __init__(self, handler, server_name, **kwargs): super(FederationSendServlet, self).__init__( @@ -378,7 +378,7 @@ class FederationSendServlet(BaseFederationServlet): class FederationEventServlet(BaseFederationServlet): - PATH = "/event/(?P[^/]*)/?" + PATH = "/event/(?P[^/]*)/" # This is when someone asks for a data item for a given server data_id pair. def on_GET(self, origin, content, query, event_id): @@ -386,7 +386,7 @@ class FederationEventServlet(BaseFederationServlet): class FederationStateServlet(BaseFederationServlet): - PATH = "/state/(?P[^/]*)/?" + PATH = "/state/(?P[^/]*)/" # This is when someone asks for all data for a given context. def on_GET(self, origin, content, query, context): @@ -398,7 +398,7 @@ class FederationStateServlet(BaseFederationServlet): class FederationStateIdsServlet(BaseFederationServlet): - PATH = "/state_ids/(?P[^/]*)/?" + PATH = "/state_ids/(?P[^/]*)/" def on_GET(self, origin, content, query, room_id): return self.handler.on_state_ids_request( @@ -409,7 +409,7 @@ class FederationStateIdsServlet(BaseFederationServlet): class FederationBackfillServlet(BaseFederationServlet): - PATH = "/backfill/(?P[^/]*)/?" + PATH = "/backfill/(?P[^/]*)/" def on_GET(self, origin, content, query, context): versions = [x.decode('ascii') for x in query[b"v"]] @@ -1080,7 +1080,7 @@ class FederationGroupsCategoriesServlet(BaseFederationServlet): """Get all categories for a group """ PATH = ( - "/groups/(?P[^/]*)/categories/?" + "/groups/(?P[^/]*)/categories/" ) @defer.inlineCallbacks @@ -1150,7 +1150,7 @@ class FederationGroupsRolesServlet(BaseFederationServlet): """Get roles in a group """ PATH = ( - "/groups/(?P[^/]*)/roles/?" + "/groups/(?P[^/]*)/roles/" ) @defer.inlineCallbacks diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index b8e97390de..13486930fb 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -180,7 +180,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): put_json = self.hs.get_http_client().put_json put_json.assert_called_once_with( "farm", - path="/_matrix/federation/v1/send/1000000", + path="/_matrix/federation/v1/send/1000000/", data=_expect_edu_transaction( "m.typing", content={ @@ -201,7 +201,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): (request, channel) = self.make_request( "PUT", - "/_matrix/federation/v1/send/1000000", + "/_matrix/federation/v1/send/1000000/", _make_edu_transaction_json( "m.typing", content={ @@ -257,7 +257,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): put_json = self.hs.get_http_client().put_json put_json.assert_called_once_with( "farm", - path="/_matrix/federation/v1/send/1000000", + path="/_matrix/federation/v1/send/1000000/", data=_expect_edu_transaction( "m.typing", content={ -- cgit 1.5.1 From 899e523d6d92dfbc17dce81eb36f63053e447a97 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 15 Mar 2019 17:46:16 +0000 Subject: Add ratelimiting on login (#4821) Add two ratelimiters on login (per-IP address and per-userID). --- changelog.d/4821.feature | 1 + docs/sample_config.yaml | 39 ++++++--- synapse/api/ratelimiting.py | 12 +++ synapse/config/ratelimiting.py | 58 +++++++++----- synapse/handlers/auth.py | 36 +++++++++ synapse/handlers/register.py | 4 +- synapse/rest/client/v1/login.py | 10 +++ synapse/rest/client/v2_alpha/register.py | 4 +- tests/rest/client/v1/test_login.py | 118 ++++++++++++++++++++++++++++ tests/rest/client/v2_alpha/test_register.py | 6 +- tests/utils.py | 8 +- 11 files changed, 259 insertions(+), 37 deletions(-) create mode 100644 changelog.d/4821.feature create mode 100644 tests/rest/client/v1/test_login.py (limited to 'synapse') diff --git a/changelog.d/4821.feature b/changelog.d/4821.feature new file mode 100644 index 0000000000..61d4eb8d60 --- /dev/null +++ b/changelog.d/4821.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /login endpoint. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 5f2534e465..b3df272c54 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -379,6 +379,34 @@ rc_messages_per_second: 0.2 # rc_message_burst_count: 10.0 +# Ratelimiting settings for registration and login. +# +# Each ratelimiting configuration is made of two parameters: +# - per_second: number of requests a client can send per second. +# - burst_count: number of requests a client can send before being throttled. +# +# Synapse currently uses the following configurations: +# - one for registration that ratelimits registration requests based on the +# client's IP address. +# - one for login that ratelimits login requests based on the client's IP +# address. +# - one for login that ratelimits login requests based on the account the +# client is attempting to log into. +# +# The defaults are as shown below. +# +#rc_registration: +# per_second: 0.17 +# burst_count: 3 +# +#rc_login: +# address: +# per_second: 0.17 +# burst_count: 3 +# account: +# per_second: 0.17 +# burst_count: 3 + # The federation window size in milliseconds # federation_rc_window_size: 1000 @@ -403,17 +431,6 @@ federation_rc_reject_limit: 50 # federation_rc_concurrent: 3 -# Number of registration requests a client can send per second. -# Defaults to 1/minute (0.17). -# -#rc_registration_requests_per_second: 0.17 - -# Number of registration requests a client can send before being -# throttled. -# Defaults to 3. -# -#rc_registration_request_burst_count: 3.0 - # Directory where uploaded images and attachments are stored. diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index ad68079eeb..296c4a1c17 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -14,6 +14,8 @@ import collections +from synapse.api.errors import LimitExceededError + class Ratelimiter(object): """ @@ -82,3 +84,13 @@ class Ratelimiter(object): break else: del self.message_counts[key] + + def ratelimit(self, key, time_now_s, rate_hz, burst_count, update=True): + allowed, time_allowed = self.can_do_action( + key, time_now_s, rate_hz, burst_count, update + ) + + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now_s)), + ) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 093042fdb9..649f018356 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -15,25 +15,30 @@ from ._base import Config +class RateLimitConfig(object): + def __init__(self, config): + self.per_second = config.get("per_second", 0.17) + self.burst_count = config.get("burst_count", 3.0) + + class RatelimitConfig(Config): def read_config(self, config): self.rc_messages_per_second = config["rc_messages_per_second"] self.rc_message_burst_count = config["rc_message_burst_count"] + self.rc_registration = RateLimitConfig(config.get("rc_registration", {})) + + rc_login_config = config.get("rc_login", {}) + self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {})) + self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {})) + self.federation_rc_window_size = config["federation_rc_window_size"] self.federation_rc_sleep_limit = config["federation_rc_sleep_limit"] self.federation_rc_sleep_delay = config["federation_rc_sleep_delay"] self.federation_rc_reject_limit = config["federation_rc_reject_limit"] self.federation_rc_concurrent = config["federation_rc_concurrent"] - self.rc_registration_requests_per_second = config.get( - "rc_registration_requests_per_second", 0.17, - ) - self.rc_registration_request_burst_count = config.get( - "rc_registration_request_burst_count", 3, - ) - def default_config(self, **kwargs): return """\ ## Ratelimiting ## @@ -46,6 +51,34 @@ class RatelimitConfig(Config): # rc_message_burst_count: 10.0 + # Ratelimiting settings for registration and login. + # + # Each ratelimiting configuration is made of two parameters: + # - per_second: number of requests a client can send per second. + # - burst_count: number of requests a client can send before being throttled. + # + # Synapse currently uses the following configurations: + # - one for registration that ratelimits registration requests based on the + # client's IP address. + # - one for login that ratelimits login requests based on the client's IP + # address. + # - one for login that ratelimits login requests based on the account the + # client is attempting to log into. + # + # The defaults are as shown below. + # + #rc_registration: + # per_second: 0.17 + # burst_count: 3 + # + #rc_login: + # address: + # per_second: 0.17 + # burst_count: 3 + # account: + # per_second: 0.17 + # burst_count: 3 + # The federation window size in milliseconds # federation_rc_window_size: 1000 @@ -69,15 +102,4 @@ class RatelimitConfig(Config): # single server # federation_rc_concurrent: 3 - - # Number of registration requests a client can send per second. - # Defaults to 1/minute (0.17). - # - #rc_registration_requests_per_second: 0.17 - - # Number of registration requests a client can send before being - # throttled. - # Defaults to 3. - # - #rc_registration_request_burst_count: 3.0 """ diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 2abd9af94f..74f3790f25 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -35,6 +35,7 @@ from synapse.api.errors import ( StoreError, SynapseError, ) +from synapse.api.ratelimiting import Ratelimiter from synapse.module_api import ModuleApi from synapse.types import UserID from synapse.util import logcontext @@ -99,6 +100,10 @@ class AuthHandler(BaseHandler): login_types.append(t) self._supported_login_types = login_types + self._account_ratelimiter = Ratelimiter() + + self._clock = self.hs.get_clock() + @defer.inlineCallbacks def validate_user_via_ui_auth(self, requester, request_body, clientip): """ @@ -568,7 +573,12 @@ class AuthHandler(BaseHandler): Returns: defer.Deferred: (unicode) canonical_user_id, or None if zero or multiple matches + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ + self.ratelimit_login_per_account(user_id) res = yield self._find_user_id_and_pwd_hash(user_id) if res is not None: defer.returnValue(res[0]) @@ -634,6 +644,8 @@ class AuthHandler(BaseHandler): StoreError if there was a problem accessing the database SynapseError if there was a problem with the request LoginError if there was an authentication problem. + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ if username.startswith('@'): @@ -643,6 +655,8 @@ class AuthHandler(BaseHandler): username, self.hs.hostname ).to_string() + self.ratelimit_login_per_account(qualified_user_id) + login_type = login_submission.get("type") known_login_type = False @@ -735,6 +749,10 @@ class AuthHandler(BaseHandler): password (unicode): the provided password Returns: (unicode) the canonical_user_id, or None if unknown user / bad password + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. """ lookupres = yield self._find_user_id_and_pwd_hash(user_id) if not lookupres: @@ -763,6 +781,7 @@ class AuthHandler(BaseHandler): auth_api.validate_macaroon(macaroon, "login", True, user_id) except Exception: raise AuthError(403, "Invalid token", errcode=Codes.FORBIDDEN) + self.ratelimit_login_per_account(user_id) yield self.auth.check_auth_blocking(user_id) defer.returnValue(user_id) @@ -934,6 +953,23 @@ class AuthHandler(BaseHandler): else: return defer.succeed(False) + def ratelimit_login_per_account(self, user_id): + """Checks whether the process must be stopped because of ratelimiting. + + Args: + user_id (unicode): complete @user:id + + Raises: + LimitExceededError if the ratelimiter's login requests count for this + user is too high too proceed. + """ + self._account_ratelimiter.ratelimit( + user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_account.per_second, + burst_count=self.hs.config.rc_login_account.burst_count, + update=True, + ) + @attr.s class MacaroonGenerator(object): diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 03130edc54..0ec16b1d2e 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -629,8 +629,8 @@ class RegistrationHandler(BaseHandler): allowed, time_allowed = self.ratelimiter.can_do_action( address, time_now_s=time_now, - rate_hz=self.hs.config.rc_registration_requests_per_second, - burst_count=self.hs.config.rc_registration_request_burst_count, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, ) if not allowed: diff --git a/synapse/rest/client/v1/login.py b/synapse/rest/client/v1/login.py index 6121c5b6df..8d56effbb8 100644 --- a/synapse/rest/client/v1/login.py +++ b/synapse/rest/client/v1/login.py @@ -22,6 +22,7 @@ from twisted.internet import defer from twisted.web.client import PartialDownloadError from synapse.api.errors import Codes, LoginError, SynapseError +from synapse.api.ratelimiting import Ratelimiter from synapse.http.server import finish_request from synapse.http.servlet import ( RestServlet, @@ -97,6 +98,7 @@ class LoginRestServlet(ClientV1RestServlet): self.registration_handler = hs.get_registration_handler() self.handlers = hs.get_handlers() self._well_known_builder = WellKnownBuilder(hs) + self._address_ratelimiter = Ratelimiter() def on_GET(self, request): flows = [] @@ -129,6 +131,13 @@ class LoginRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_POST(self, request): + self._address_ratelimiter.ratelimit( + request.getClientIP(), time_now_s=self.hs.clock.time(), + rate_hz=self.hs.config.rc_login_address.per_second, + burst_count=self.hs.config.rc_login_address.burst_count, + update=True, + ) + login_submission = parse_json_object_from_request(request) try: if self.jwt_enabled and (login_submission["type"] == @@ -285,6 +294,7 @@ class LoginRestServlet(ClientV1RestServlet): raise LoginError(401, "Invalid JWT", errcode=Codes.UNAUTHORIZED) user_id = UserID(user, self.hs.hostname).to_string() + auth_handler = self.auth_handler registered_user_id = yield auth_handler.check_user_exists(user_id) if registered_user_id: diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 6f34029431..6d235262c8 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -210,8 +210,8 @@ class RegisterRestServlet(RestServlet): allowed, time_allowed = self.ratelimiter.can_do_action( client_addr, time_now_s=time_now, - rate_hz=self.hs.config.rc_registration_requests_per_second, - burst_count=self.hs.config.rc_registration_request_burst_count, + rate_hz=self.hs.config.rc_registration.per_second, + burst_count=self.hs.config.rc_registration.burst_count, update=False, ) diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py new file mode 100644 index 0000000000..4035f76cca --- /dev/null +++ b/tests/rest/client/v1/test_login.py @@ -0,0 +1,118 @@ +import json + +from synapse.rest.client.v1 import admin, login + +from tests import unittest + +LOGIN_URL = b"/_matrix/client/r0/login" + + +class LoginRestServletTestCase(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + login.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + + self.hs = self.setup_test_homeserver() + self.hs.config.enable_registration = True + self.hs.config.registrations_require_3pid = [] + self.hs.config.auto_join_rooms = [] + self.hs.config.enable_registration_captcha = False + + return self.hs + + def test_POST_ratelimiting_per_address(self): + self.hs.config.rc_login_address.burst_count = 5 + self.hs.config.rc_login_address.per_second = 0.17 + + # Create different users so we're sure not to be bothered by the per-user + # ratelimiter. + for i in range(0, 6): + self.register_user("kermit" + str(i), "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit" + str(i), + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit" + str(i), + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting_per_account(self): + self.hs.config.rc_login_account.burst_count = 5 + self.hs.config.rc_login_account.per_second = 0.17 + + self.register_user("kermit", "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"200", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "monkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"200", channel.result) diff --git a/tests/rest/client/v2_alpha/test_register.py b/tests/rest/client/v2_alpha/test_register.py index 3600434858..8fb525d3bf 100644 --- a/tests/rest/client/v2_alpha/test_register.py +++ b/tests/rest/client/v2_alpha/test_register.py @@ -132,7 +132,8 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.json_body["error"], "Guest access is disabled") def test_POST_ratelimiting_guest(self): - self.hs.config.rc_registration_request_burst_count = 5 + self.hs.config.rc_registration.burst_count = 5 + self.hs.config.rc_registration.per_second = 0.17 for i in range(0, 6): url = self.url + b"?kind=guest" @@ -153,7 +154,8 @@ class RegisterRestServletTestCase(unittest.HomeserverTestCase): self.assertEquals(channel.result["code"], b"200", channel.result) def test_POST_ratelimiting(self): - self.hs.config.rc_registration_request_burst_count = 5 + self.hs.config.rc_registration.burst_count = 5 + self.hs.config.rc_registration.per_second = 0.17 for i in range(0, 6): params = { diff --git a/tests/utils.py b/tests/utils.py index 03b5a05b22..a412736492 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -151,8 +151,12 @@ def default_config(name): config.admin_contact = None config.rc_messages_per_second = 10000 config.rc_message_burst_count = 10000 - config.rc_registration_request_burst_count = 3.0 - config.rc_registration_requests_per_second = 0.17 + config.rc_registration.per_second = 10000 + config.rc_registration.burst_count = 10000 + config.rc_login_address.per_second = 10000 + config.rc_login_address.burst_count = 10000 + config.rc_login_account.per_second = 10000 + config.rc_login_account.burst_count = 10000 config.saml2_enabled = False config.public_baseurl = None config.default_identity_server = None -- cgit 1.5.1 From 651ad8bc96d360500e7f5953d05ef418b51acc86 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 18 Mar 2019 12:57:20 +0000 Subject: Add ratelimiting on failed login attempts (#4865) --- changelog.d/4865.feature | 1 + docs/sample_config.yaml | 6 +++++ synapse/config/ratelimiting.py | 9 ++++++++ synapse/handlers/auth.py | 28 +++++++++++++++++++----- tests/rest/client/v1/test_login.py | 45 ++++++++++++++++++++++++++++++++++++++ tests/utils.py | 2 ++ 6 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 changelog.d/4865.feature (limited to 'synapse') diff --git a/changelog.d/4865.feature b/changelog.d/4865.feature new file mode 100644 index 0000000000..61d4eb8d60 --- /dev/null +++ b/changelog.d/4865.feature @@ -0,0 +1 @@ +Add configurable rate limiting to the /login endpoint. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index b3df272c54..84e2cc97f9 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -392,6 +392,9 @@ rc_message_burst_count: 10.0 # address. # - one for login that ratelimits login requests based on the account the # client is attempting to log into. +# - one for login that ratelimits login requests based on the account the +# client is attempting to log into, based on the amount of failed login +# attempts for this account. # # The defaults are as shown below. # @@ -406,6 +409,9 @@ rc_message_burst_count: 10.0 # account: # per_second: 0.17 # burst_count: 3 +# failed_attempts: +# per_second: 0.17 +# burst_count: 3 # The federation window size in milliseconds # diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 649f018356..7e6cc5d0ea 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -32,6 +32,9 @@ class RatelimitConfig(Config): rc_login_config = config.get("rc_login", {}) self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {})) self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {})) + self.rc_login_failed_attempts = RateLimitConfig( + rc_login_config.get("failed_attempts", {}), + ) self.federation_rc_window_size = config["federation_rc_window_size"] self.federation_rc_sleep_limit = config["federation_rc_sleep_limit"] @@ -64,6 +67,9 @@ class RatelimitConfig(Config): # address. # - one for login that ratelimits login requests based on the account the # client is attempting to log into. + # - one for login that ratelimits login requests based on the account the + # client is attempting to log into, based on the amount of failed login + # attempts for this account. # # The defaults are as shown below. # @@ -78,6 +84,9 @@ class RatelimitConfig(Config): # account: # per_second: 0.17 # burst_count: 3 + # failed_attempts: + # per_second: 0.17 + # burst_count: 3 # The federation window size in milliseconds # diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 74f3790f25..caad9ae2dd 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -101,6 +101,7 @@ class AuthHandler(BaseHandler): self._supported_login_types = login_types self._account_ratelimiter = Ratelimiter() + self._failed_attempts_ratelimiter = Ratelimiter() self._clock = self.hs.get_clock() @@ -729,9 +730,16 @@ class AuthHandler(BaseHandler): if not known_login_type: raise SynapseError(400, "Unknown login type %s" % login_type) - # unknown username or invalid password. We raise a 403 here, but note - # that if we're doing user-interactive login, it turns all LoginErrors - # into a 401 anyway. + # unknown username or invalid password. + self._failed_attempts_ratelimiter.ratelimit( + qualified_user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=True, + ) + + # We raise a 403 here, but note that if we're doing user-interactive + # login, it turns all LoginErrors into a 401 anyway. raise LoginError( 403, "Invalid password", errcode=Codes.FORBIDDEN @@ -956,13 +964,23 @@ class AuthHandler(BaseHandler): def ratelimit_login_per_account(self, user_id): """Checks whether the process must be stopped because of ratelimiting. + Checks against two ratelimiters: the generic one for login attempts per + account and the one specific to failed attempts. + Args: user_id (unicode): complete @user:id Raises: - LimitExceededError if the ratelimiter's login requests count for this - user is too high too proceed. + LimitExceededError if one of the ratelimiters' login requests count + for this user is too high too proceed. """ + self._failed_attempts_ratelimiter.ratelimit( + user_id.lower(), time_now_s=self._clock.time(), + rate_hz=self.hs.config.rc_login_failed_attempts.per_second, + burst_count=self.hs.config.rc_login_failed_attempts.burst_count, + update=False, + ) + self._account_ratelimiter.ratelimit( user_id.lower(), time_now_s=self._clock.time(), rate_hz=self.hs.config.rc_login_account.per_second, diff --git a/tests/rest/client/v1/test_login.py b/tests/rest/client/v1/test_login.py index 4035f76cca..86312f1096 100644 --- a/tests/rest/client/v1/test_login.py +++ b/tests/rest/client/v1/test_login.py @@ -116,3 +116,48 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): self.render(request) self.assertEquals(channel.result["code"], b"200", channel.result) + + def test_POST_ratelimiting_per_account_failed_attempts(self): + self.hs.config.rc_login_failed_attempts.burst_count = 5 + self.hs.config.rc_login_failed_attempts.per_second = 0.17 + + self.register_user("kermit", "monkey") + + for i in range(0, 6): + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "notamonkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, request_data) + self.render(request) + + if i == 5: + self.assertEquals(channel.result["code"], b"429", channel.result) + retry_after_ms = int(channel.json_body["retry_after_ms"]) + else: + self.assertEquals(channel.result["code"], b"403", channel.result) + + # Since we're ratelimiting at 1 request/min, retry_after_ms should be lower + # than 1min. + self.assertTrue(retry_after_ms < 6000) + + self.reactor.advance(retry_after_ms / 1000.) + + params = { + "type": "m.login.password", + "identifier": { + "type": "m.id.user", + "user": "kermit", + }, + "password": "notamonkey", + } + request_data = json.dumps(params) + request, channel = self.make_request(b"POST", LOGIN_URL, params) + self.render(request) + + self.assertEquals(channel.result["code"], b"403", channel.result) diff --git a/tests/utils.py b/tests/utils.py index a412736492..b58b674aa4 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -157,6 +157,8 @@ def default_config(name): config.rc_login_address.burst_count = 10000 config.rc_login_account.per_second = 10000 config.rc_login_account.burst_count = 10000 + config.rc_login_failed_attempts.per_second = 10000 + config.rc_login_failed_attempts.burst_count = 10000 config.saml2_enabled = False config.public_baseurl = None config.default_identity_server = None -- cgit 1.5.1 From 282c97327f150a37d53f90ab6207bc1f98e70da3 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 19 Mar 2019 04:50:24 +1100 Subject: Migrate the user directory initial population to a background task (#4864) --- changelog.d/4864.feature | 1 + synapse/handlers/user_directory.py | 173 +--------- synapse/storage/background_updates.py | 8 +- .../storage/schema/delta/53/user_dir_populate.sql | 30 ++ synapse/storage/user_directory.py | 370 +++++++++++++++------ tests/handlers/test_user_directory.py | 109 ++++-- tests/storage/test_user_directory.py | 11 +- tests/unittest.py | 4 +- 8 files changed, 405 insertions(+), 301 deletions(-) create mode 100644 changelog.d/4864.feature create mode 100644 synapse/storage/schema/delta/53/user_dir_populate.sql (limited to 'synapse') diff --git a/changelog.d/4864.feature b/changelog.d/4864.feature new file mode 100644 index 0000000000..57927f2620 --- /dev/null +++ b/changelog.d/4864.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. \ No newline at end of file diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index d92f8c529c..7dc0e236e7 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -38,18 +38,8 @@ class UserDirectoryHandler(object): world_readable or publically joinable room. We keep a database table up to date by streaming changes of the current state and recalculating whether users should be in the directory or not when necessary. - - For each user in the directory we also store a room_id which is public and that the - user is joined to. This allows us to ignore history_visibility and join_rules changes - for that user in all other public rooms, as we know they'll still be in at least - one public room. """ - INITIAL_ROOM_SLEEP_MS = 50 - INITIAL_ROOM_SLEEP_COUNT = 100 - INITIAL_ROOM_BATCH_SIZE = 100 - INITIAL_USER_SLEEP_MS = 10 - def __init__(self, hs): self.store = hs.get_datastore() self.state = hs.get_state_handler() @@ -59,17 +49,6 @@ class UserDirectoryHandler(object): self.is_mine_id = hs.is_mine_id self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users - - # If we're a worker, don't sleep when doing the initial room work, as it - # won't monopolise the master's CPU. - if hs.config.worker_app: - self.INITIAL_ROOM_SLEEP_MS = 0 - self.INITIAL_USER_SLEEP_MS = 0 - - # When start up for the first time we need to populate the user_directory. - # This is a set of user_id's we've inserted already - self.initially_handled_users = set() - # The current position in the current_state_delta stream self.pos = None @@ -132,7 +111,7 @@ class UserDirectoryHandler(object): # Support users are for diagnostics and should not appear in the user directory. if not is_support: yield self.store.update_profile_in_user_dir( - user_id, profile.display_name, profile.avatar_url, None + user_id, profile.display_name, profile.avatar_url ) @defer.inlineCallbacks @@ -149,10 +128,9 @@ class UserDirectoryHandler(object): if self.pos is None: self.pos = yield self.store.get_user_directory_stream_pos() - # If still None then we need to do the initial fill of directory + # If still None then the initial background update hasn't happened yet if self.pos is None: - yield self._do_initial_spam() - self.pos = yield self.store.get_user_directory_stream_pos() + defer.returnValue(None) # Loop round handling deltas until we're up to date while True: @@ -173,133 +151,6 @@ class UserDirectoryHandler(object): yield self.store.update_user_directory_stream_pos(self.pos) - @defer.inlineCallbacks - def _do_initial_spam(self): - """Populates the user_directory from the current state of the DB, used - when synapse first starts with user_directory support - """ - new_pos = yield self.store.get_max_stream_id_in_current_state_deltas() - - # Delete any existing entries just in case there are any - yield self.store.delete_all_from_user_dir() - - # We process by going through each existing room at a time. - room_ids = yield self.store.get_all_rooms() - - logger.info("Doing initial update of user directory. %d rooms", len(room_ids)) - num_processed_rooms = 0 - - for room_id in room_ids: - logger.info("Handling room %d/%d", num_processed_rooms + 1, len(room_ids)) - yield self._handle_initial_room(room_id) - num_processed_rooms += 1 - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - logger.info("Processed all rooms.") - - if self.search_all_users: - num_processed_users = 0 - user_ids = yield self.store.get_all_local_users() - logger.info( - "Doing initial update of user directory. %d users", len(user_ids) - ) - for user_id in user_ids: - # We add profiles for all users even if they don't match the - # include pattern, just in case we want to change it in future - logger.info( - "Handling user %d/%d", num_processed_users + 1, len(user_ids) - ) - yield self._handle_local_user(user_id) - num_processed_users += 1 - yield self.clock.sleep(self.INITIAL_USER_SLEEP_MS / 1000.0) - - logger.info("Processed all users") - - self.initially_handled_users = None - - yield self.store.update_user_directory_stream_pos(new_pos) - - @defer.inlineCallbacks - def _handle_initial_room(self, room_id): - """ - Called when we initially fill out user_directory one room at a time - """ - is_in_room = yield self.store.is_host_joined(room_id, self.server_name) - if not is_in_room: - return - - is_public = yield self.store.is_room_world_readable_or_publicly_joinable( - room_id - ) - - users_with_profile = yield self.state.get_current_user_in_room(room_id) - user_ids = set(users_with_profile) - unhandled_users = user_ids - self.initially_handled_users - - yield self.store.add_profiles_to_user_dir( - {user_id: users_with_profile[user_id] for user_id in unhandled_users} - ) - - self.initially_handled_users |= unhandled_users - - # We now go and figure out the new users who share rooms with user entries - # We sleep aggressively here as otherwise it can starve resources. - # We also batch up inserts/updates, but try to avoid too many at once. - to_insert = set() - count = 0 - - if is_public: - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue - - to_insert.add(user_id) - if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_in_public_rooms(room_id, to_insert) - to_insert.clear() - - if to_insert: - yield self.store.add_users_in_public_rooms(room_id, to_insert) - to_insert.clear() - else: - - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - if not self.is_mine_id(user_id): - count += 1 - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue - - for other_user_id in user_ids: - if user_id == other_user_id: - continue - - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - count += 1 - - user_set = (user_id, other_user_id) - to_insert.add(user_set) - - if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_who_share_private_room( - room_id, not is_public, to_insert - ) - to_insert.clear() - - if to_insert: - yield self.store.add_users_who_share_private_room(room_id, to_insert) - to_insert.clear() - @defer.inlineCallbacks def _handle_deltas(self, deltas): """Called with the state deltas to process @@ -449,7 +300,9 @@ class UserDirectoryHandler(object): row = yield self.store.get_user_in_directory(user_id) if not row: - yield self.store.add_profiles_to_user_dir({user_id: profile}) + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) @defer.inlineCallbacks def _handle_new_user(self, room_id, user_id, profile): @@ -461,9 +314,9 @@ class UserDirectoryHandler(object): """ logger.debug("Adding new user to dir, %r", user_id) - row = yield self.store.get_user_in_directory(user_id) - if not row: - yield self.store.add_profiles_to_user_dir({user_id: profile}) + yield self.store.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) is_public = yield self.store.is_room_world_readable_or_publicly_joinable( room_id @@ -479,7 +332,9 @@ class UserDirectoryHandler(object): # First, if they're our user then we need to update for every user if self.is_mine_id(user_id): - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + is_appservice = self.store.get_if_app_services_interested_in_user( + user_id + ) # We don't care about appservice users. if not is_appservice: @@ -546,9 +401,7 @@ class UserDirectoryHandler(object): new_avatar = event.content.get("avatar_url") if prev_name != new_name or prev_avatar != new_avatar: - yield self.store.update_profile_in_user_dir( - user_id, new_name, new_avatar, room_id - ) + yield self.store.update_profile_in_user_dir(user_id, new_name, new_avatar) @defer.inlineCallbacks def _get_key_change(self, prev_event_id, event_id, key_name, public_value): diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 60cdc884e6..a2f8c23a65 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -52,7 +52,9 @@ class BackgroundUpdatePerformance(object): Returns: A duration in ms as a float """ - if self.total_item_count == 0: + if self.avg_duration_ms == 0: + return 0 + elif self.total_item_count == 0: return None else: # Use the exponential moving average so that we can adapt to @@ -64,7 +66,9 @@ class BackgroundUpdatePerformance(object): Returns: A duration in ms as a float """ - if self.total_item_count == 0: + if self.total_duration_ms == 0: + return 0 + elif self.total_item_count == 0: return None else: return float(self.total_item_count) / float(self.total_duration_ms) diff --git a/synapse/storage/schema/delta/53/user_dir_populate.sql b/synapse/storage/schema/delta/53/user_dir_populate.sql new file mode 100644 index 0000000000..955b8fdbd6 --- /dev/null +++ b/synapse/storage/schema/delta/53/user_dir_populate.sql @@ -0,0 +1,30 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Set up staging tables +INSERT INTO background_updates (update_name, progress_json) VALUES + ('populate_user_directory_createtables', '{}'); + +-- Run through each room and update the user directory according to who is in it +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_rooms', '{}', 'populate_user_directory_createtables'); + +-- Insert all users, if search_all_users is on +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_process_users', '{}', 'populate_user_directory_rooms'); + +-- Clean up staging tables +INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES + ('populate_user_directory_cleanup', '{}', 'populate_user_directory_process_users'); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 1c00b956e5..4ee653210f 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -16,12 +16,10 @@ import logging import re -from six import iteritems - from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules -from synapse.storage._base import SQLBaseStore +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id @@ -30,7 +28,276 @@ from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(SQLBaseStore): +TEMP_TABLE = "_temp_populate_user_directory" + + +class UserDirectoryStore(BackgroundUpdateStore): + def __init__(self, db_conn, hs): + super(UserDirectoryStore, self).__init__(db_conn, hs) + + self.server_name = hs.hostname + + self.register_background_update_handler( + "populate_user_directory_createtables", + self._populate_user_directory_createtables, + ) + self.register_background_update_handler( + "populate_user_directory_process_rooms", + self._populate_user_directory_process_rooms, + ) + self.register_background_update_handler( + "populate_user_directory_process_users", + self._populate_user_directory_process_users, + ) + self.register_background_update_handler( + "populate_user_directory_cleanup", self._populate_user_directory_cleanup + ) + + @defer.inlineCallbacks + def _populate_user_directory_createtables(self, progress, batch_size): + + # Get all the rooms that we want to process. + def _make_staging_area(txn): + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)" + ) + txn.execute(sql) + + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_position(position TEXT NOT NULL)" + ) + txn.execute(sql) + + # Get rooms we want to process from the database + sql = """ + SELECT room_id, count(*) FROM current_state_events + GROUP BY room_id + """ + txn.execute(sql) + rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()] + self._simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) + del rooms + + # If search all users is on, get all the users we want to add. + if self.hs.config.user_directory_search_all_users: + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_users(user_id TEXT NOT NULL)" + ) + txn.execute(sql) + + txn.execute("SELECT name FROM users") + users = [{"user_id": x[0]} for x in txn.fetchall()] + + self._simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) + + new_pos = yield self.get_max_stream_id_in_current_state_deltas() + yield self.runInteraction( + "populate_user_directory_temp_build", _make_staging_area + ) + yield self._simple_insert(TEMP_TABLE + "_position", {"position": new_pos}) + + yield self._end_background_update("populate_user_directory_createtables") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_user_directory_cleanup(self, progress, batch_size): + """ + Update the user directory stream position, then clean up the old tables. + """ + position = yield self._simple_select_one_onecol( + TEMP_TABLE + "_position", None, "position" + ) + yield self.update_user_directory_stream_pos(position) + + def _delete_staging_area(txn): + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users") + txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position") + + yield self.runInteraction( + "populate_user_directory_cleanup", _delete_staging_area + ) + + yield self._end_background_update("populate_user_directory_cleanup") + defer.returnValue(1) + + @defer.inlineCallbacks + def _populate_user_directory_process_rooms(self, progress, batch_size): + + state = self.hs.get_state_handler() + + # If we don't have progress filed, delete everything. + if not progress: + yield self.delete_all_from_user_dir() + + def _get_next_batch(txn): + sql = """ + SELECT room_id FROM %s + ORDER BY events DESC + LIMIT %s + """ % ( + TEMP_TABLE + "_rooms", + str(batch_size), + ) + txn.execute(sql) + rooms_to_work_on = txn.fetchall() + + if not rooms_to_work_on: + return None + + rooms_to_work_on = [x[0] for x in rooms_to_work_on] + + # Get how many are left to process, so we can give status on how + # far we are in processing + txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms") + progress["remaining"] = txn.fetchone()[0] + + return rooms_to_work_on + + rooms_to_work_on = yield self.runInteraction( + "populate_user_directory_temp_read", _get_next_batch + ) + + # No more rooms -- complete the transaction. + if not rooms_to_work_on: + yield self._end_background_update("populate_user_directory_process_rooms") + defer.returnValue(1) + + logger.info( + "Processing the next %d rooms of %d remaining" + % (len(rooms_to_work_on), progress["remaining"]) + ) + + for room_id in rooms_to_work_on: + is_in_room = yield self.is_host_joined(room_id, self.server_name) + + if is_in_room: + is_public = yield self.is_room_world_readable_or_publicly_joinable( + room_id + ) + + users_with_profile = yield state.get_current_user_in_room(room_id) + user_ids = set(users_with_profile) + + # Update each user in the user directory. + for user_id, profile in users_with_profile.items(): + yield self.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) + + to_insert = set() + + if is_public: + for user_id in user_ids: + if self.get_if_app_services_interested_in_user(user_id): + continue + + to_insert.add(user_id) + + if to_insert: + yield self.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + else: + for user_id in user_ids: + if not self.hs.is_mine_id(user_id): + continue + + if self.get_if_app_services_interested_in_user(user_id): + continue + + for other_user_id in user_ids: + if user_id == other_user_id: + continue + + user_set = (user_id, other_user_id) + to_insert.add(user_set) + + if to_insert: + yield self.add_users_who_share_private_room(room_id, to_insert) + to_insert.clear() + + # We've finished a room. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_rooms", {"room_id": room_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_user_directory", + self._background_update_progress_txn, + "populate_user_directory_process_rooms", + progress, + ) + + defer.returnValue(len(rooms_to_work_on)) + + @defer.inlineCallbacks + def _populate_user_directory_process_users(self, progress, batch_size): + """ + If search_all_users is enabled, add all of the users to the user directory. + """ + if not self.hs.config.user_directory_search_all_users: + yield self._end_background_update("populate_user_directory_process_users") + defer.returnValue(1) + + def _get_next_batch(txn): + sql = "SELECT user_id FROM %s LIMIT %s" % ( + TEMP_TABLE + "_users", + str(batch_size), + ) + txn.execute(sql) + users_to_work_on = txn.fetchall() + + if not users_to_work_on: + return None + + users_to_work_on = [x[0] for x in users_to_work_on] + + # Get how many are left to process, so we can give status on how + # far we are in processing + sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users" + txn.execute(sql) + progress["remaining"] = txn.fetchone()[0] + + return users_to_work_on + + users_to_work_on = yield self.runInteraction( + "populate_user_directory_temp_read", _get_next_batch + ) + + # No more users -- complete the transaction. + if not users_to_work_on: + yield self._end_background_update("populate_user_directory_process_users") + defer.returnValue(1) + + logger.info( + "Processing the next %d users of %d remaining" + % (len(users_to_work_on), progress["remaining"]) + ) + + for user_id in users_to_work_on: + profile = yield self.get_profileinfo(get_localpart_from_id(user_id)) + yield self.update_profile_in_user_dir( + user_id, profile.display_name, profile.avatar_url + ) + + # We've finished processing a user. Delete it from the table. + yield self._simple_delete_one(TEMP_TABLE + "_users", {"user_id": user_id}) + # Update the remaining counter. + progress["remaining"] -= 1 + yield self.runInteraction( + "populate_user_directory", + self._background_update_progress_txn, + "populate_user_directory_process_users", + progress, + ) + + defer.returnValue(len(users_to_work_on)) + @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -62,89 +329,16 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(False) - def add_profiles_to_user_dir(self, users_with_profile): - """Add profiles to the user directory - - Args: - users_with_profile (dict): Users to add to directory in the form of - mapping of user_id -> ProfileInfo + def update_profile_in_user_dir(self, user_id, display_name, avatar_url): + """ + Update or add a user's profile in the user directory. """ - if isinstance(self.database_engine, PostgresEngine): - # We weight the loclpart most highly, then display name and finally - # server name - sql = """ - INSERT INTO user_directory_search(user_id, vector) - VALUES (?, - setweight(to_tsvector('english', ?), 'A') - || setweight(to_tsvector('english', ?), 'D') - || setweight(to_tsvector('english', COALESCE(?, '')), 'B') - ) - """ - args = ( - ( - user_id, - get_localpart_from_id(user_id), - get_domain_from_id(user_id), - profile.display_name, - ) - for user_id, profile in iteritems(users_with_profile) - ) - elif isinstance(self.database_engine, Sqlite3Engine): - sql = """ - INSERT INTO user_directory_search(user_id, value) - VALUES (?,?) - """ - args = tuple( - ( - user_id, - "%s %s" % (user_id, p.display_name) if p.display_name else user_id, - ) - for user_id, p in iteritems(users_with_profile) - ) - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - - def _add_profiles_to_user_dir_txn(txn): - txn.executemany(sql, args) - self._simple_insert_many_txn( - txn, - table="user_directory", - values=[ - { - "user_id": user_id, - "room_id": None, - "display_name": profile.display_name, - "avatar_url": profile.avatar_url, - } - for user_id, profile in iteritems(users_with_profile) - ], - ) - for user_id in users_with_profile: - txn.call_after(self.get_user_in_directory.invalidate, (user_id,)) - - return self.runInteraction( - "add_profiles_to_user_dir", _add_profiles_to_user_dir_txn - ) - - @defer.inlineCallbacks - def update_user_in_user_dir(self, user_id, room_id): - yield self._simple_update_one( - table="user_directory", - keyvalues={"user_id": user_id}, - updatevalues={"room_id": room_id}, - desc="update_user_in_user_dir", - ) - self.get_user_in_directory.invalidate((user_id,)) - - def update_profile_in_user_dir(self, user_id, display_name, avatar_url, room_id): def _update_profile_in_user_dir_txn(txn): new_entry = self._simple_upsert_txn( txn, table="user_directory", keyvalues={"user_id": user_id}, - insertion_values={"room_id": room_id}, values={"display_name": display_name, "avatar_url": avatar_url}, lock=False, # We're only inserter ) @@ -281,18 +475,6 @@ class UserDirectoryStore(SQLBaseStore): defer.returnValue(user_ids) - @defer.inlineCallbacks - def get_all_rooms(self): - """Get all room_ids we've ever known about, in ascending order of "size" - """ - sql = """ - SELECT room_id FROM current_state_events - GROUP BY room_id - ORDER BY count(*) ASC - """ - rows = yield self._execute("get_all_rooms", None, sql) - defer.returnValue([room_id for room_id, in rows]) - @defer.inlineCallbacks def get_all_local_users(self): """Get all local users @@ -553,8 +735,8 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - join_args = () - where_clause = "1=1" + join_args = (user_id,) + where_clause = "user_id != ?" else: join_args = (user_id,) where_clause = """ diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 114807efc1..aefe11ac28 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -163,9 +163,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def get_users_in_public_rooms(self): r = self.get_success( self.store._simple_select_list( - "users_in_public_rooms", - None, - ("user_id", "room_id"), + "users_in_public_rooms", None, ("user_id", "room_id") ) ) retval = [] @@ -182,6 +180,53 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): ) ) + def _add_background_updates(self): + """ + Add the background updates we need to run. + """ + # Ugh, have to reset this flag + self.store._all_done = False + + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_createtables", + "progress_json": "{}", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_process_rooms", + "progress_json": "{}", + "depends_on": "populate_user_directory_createtables", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_process_users", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_rooms", + }, + ) + ) + self.get_success( + self.store._simple_insert( + "background_updates", + { + "update_name": "populate_user_directory_cleanup", + "progress_json": "{}", + "depends_on": "populate_user_directory_process_users", + }, + ) + ) + def test_initial(self): """ The user directory's initial handler correctly updates the search tables. @@ -211,26 +256,17 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual(shares_private, []) self.assertEqual(public_users, []) - # Reset the handled users caches - self.handler.initially_handled_users = set() + # Do the initial population of the user directory via the background update + self._add_background_updates() - # Do the initial population - d = self.handler._do_initial_spam() - - # This takes a while, so pump it a bunch of times to get through the - # sleep delays - for i in range(10): - self.pump(1) - - self.get_success(d) + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() # User 1 and User 2 are in the same public room - self.assertEqual( - set(public_users), set([(u1, room), (u2, room)]) - ) + self.assertEqual(set(public_users), set([(u1, room), (u2, room)])) # User 1 and User 3 share private rooms self.assertEqual( @@ -238,7 +274,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): set([(u1, u3, private_room), (u3, u1, private_room)]), ) - def test_search_all_users(self): + def test_initial_share_all_users(self): """ Search all users = True means that a user does not have to share a private room with the searching user or be in a public room to be search @@ -248,33 +284,36 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.hs.config.user_directory_search_all_users = True u1 = self.register_user("user1", "pass") - u1_token = self.login(u1, "pass") - u2 = self.register_user("user2", "pass") - u2_token = self.login(u2, "pass") + self.register_user("user2", "pass") u3 = self.register_user("user3", "pass") - # User 1 and User 2 join a room. User 3 never does. - room = self.helper.create_room_as(u1, is_public=True, tok=u1_token) - self.helper.invite(room, src=u1, targ=u2, tok=u1_token) - self.helper.join(room, user=u2, tok=u2_token) - + # Wipe the user dir self.get_success(self.store.update_user_directory_stream_pos(None)) self.get_success(self.store.delete_all_from_user_dir()) - # Reset the handled users caches - self.handler.initially_handled_users = set() + # Do the initial population of the user directory via the background update + self._add_background_updates() - # Do the initial population - d = self.handler._do_initial_spam() + while not self.get_success(self.store.has_completed_background_updates()): + self.get_success(self.store.do_next_background_update(100), by=0.1) - # This takes a while, so pump it a bunch of times to get through the - # sleep delays - for i in range(10): - self.pump(1) + shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() - self.get_success(d) + # No users share rooms + self.assertEqual(public_users, []) + self.assertEqual(self._compress_shared(shares_private), set([])) # Despite not sharing a room, search_all_users means we get a search # result. s = self.get_success(self.handler.search_users(u1, u3, 10)) self.assertEqual(len(s["results"]), 1) + + # We can find the other two users + s = self.get_success(self.handler.search_users(u1, "user", 10)) + self.assertEqual(len(s["results"]), 2) + + # Registering a user and then searching for them works. + u4 = self.register_user("user4", "pass") + s = self.get_success(self.handler.search_users(u1, u4, 10)) + self.assertEqual(len(s["results"]), 1) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 512d76e7a3..fd3361404f 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -16,7 +16,6 @@ from twisted.internet import defer from synapse.storage import UserDirectoryStore -from synapse.storage.roommember import ProfileInfo from tests import unittest from tests.utils import setup_test_homeserver @@ -34,13 +33,9 @@ class UserDirectoryStoreTestCase(unittest.TestCase): # alice and bob are both in !room_id. bobby is not but shares # a homeserver with alice. - yield self.store.add_profiles_to_user_dir( - { - ALICE: ProfileInfo(None, "alice"), - BOB: ProfileInfo(None, "bob"), - BOBBY: ProfileInfo(None, "bobby"), - }, - ) + yield self.store.update_profile_in_user_dir(ALICE, "alice", None) + yield self.store.update_profile_in_user_dir(BOB, "bob", None) + yield self.store.update_profile_in_user_dir(BOBBY, "bobby", None) yield self.store.add_users_in_public_rooms( "!room:id", (ALICE, BOB) ) diff --git a/tests/unittest.py b/tests/unittest.py index ef31321bc8..7772a47078 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -330,10 +330,10 @@ class HomeserverTestCase(TestCase): """ self.reactor.pump([by] * 100) - def get_success(self, d): + def get_success(self, d, by=0.0): if not isinstance(d, Deferred): return d - self.pump() + self.pump(by=by) return self.successResultOf(d) def register_user(self, username, password, admin=False): -- cgit 1.5.1 From fd463b4f5db3fb164505bc2b2300c6a5f73c73e4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 19 Mar 2019 10:06:40 +0000 Subject: Comment out most options in the generated config. (#4863) Make it so that most options in the config are optional, and commented out in the generated config. The reasons this is a good thing are as follows: * If we decide that we should change the default for an option, we can do so, and only those admins that have deliberately chosen to override that option will be stuck on the old setting. * It moves us towards a point where we can get rid of the super-surprising feature of synapse where the default settings for the config come from the generated yaml. * It makes setting up a test config for unit testing an order of magnitude easier (see forthcoming PR). * It makes the generated config more consistent, and hopefully easier for users to understand. --- changelog.d/4863.misc | 1 + docs/sample_config.yaml | 153 +++++++++++++++++++++-------------------- synapse/config/api.py | 12 ++-- synapse/config/appservice.py | 10 +-- synapse/config/captcha.py | 23 ++++--- synapse/config/database.py | 3 +- synapse/config/groups.py | 4 +- synapse/config/key.py | 30 ++++---- synapse/config/metrics.py | 4 +- synapse/config/password.py | 15 ++-- synapse/config/ratelimiting.py | 28 ++++---- synapse/config/registration.py | 25 ++++--- synapse/config/repository.py | 77 +++++++++++++-------- synapse/config/saml2_config.py | 2 +- synapse/config/server.py | 6 +- synapse/config/voip.py | 8 ++- 16 files changed, 230 insertions(+), 171 deletions(-) create mode 100644 changelog.d/4863.misc (limited to 'synapse') diff --git a/changelog.d/4863.misc b/changelog.d/4863.misc new file mode 100644 index 0000000000..bfe03cbedc --- /dev/null +++ b/changelog.d/4863.misc @@ -0,0 +1 @@ +Comment out most options in the generated config. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 84e2cc97f9..f9886a900d 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -63,11 +63,11 @@ pid_file: DATADIR/homeserver.pid # Zero is used to indicate synapse should set the soft limit to the # hard limit. # -soft_file_limit: 0 +#soft_file_limit: 0 # Set to false to disable presence tracking on this homeserver. # -use_presence: true +#use_presence: false # The GC threshold parameters to pass to `gc.set_threshold`, if defined # @@ -359,7 +359,8 @@ database: database: "DATADIR/homeserver.db" # Number of events to cache in memory. -event_cache_size: "10K" +# +#event_cache_size: 10K ## Logging ## @@ -373,11 +374,11 @@ log_config: "CONFDIR/SERVERNAME.log.config" # Number of messages a client can send per second # -rc_messages_per_second: 0.2 +#rc_messages_per_second: 0.2 # Number of message a client can send before being throttled # -rc_message_burst_count: 10.0 +#rc_message_burst_count: 10.0 # Ratelimiting settings for registration and login. # @@ -415,27 +416,27 @@ rc_message_burst_count: 10.0 # The federation window size in milliseconds # -federation_rc_window_size: 1000 +#federation_rc_window_size: 1000 # The number of federation requests from a single server in a window # before the server will delay processing the request. # -federation_rc_sleep_limit: 10 +#federation_rc_sleep_limit: 10 # The duration in milliseconds to delay processing events from # remote servers by if they go over the sleep limit. # -federation_rc_sleep_delay: 500 +#federation_rc_sleep_delay: 500 # The maximum number of concurrent federation requests allowed # from a single server # -federation_rc_reject_limit: 50 +#federation_rc_reject_limit: 50 # The number of federation requests to concurrently process from a # single server # -federation_rc_concurrent: 3 +#federation_rc_concurrent: 3 @@ -464,11 +465,11 @@ uploads_path: "DATADIR/uploads" # The largest allowed upload size in bytes # -max_upload_size: "10M" +#max_upload_size: 10M # Maximum number of pixels that will be thumbnailed # -max_image_pixels: "32M" +#max_image_pixels: 32M # Whether to generate new thumbnails on the fly to precisely match # the resolution requested by the client. If true then whenever @@ -476,32 +477,32 @@ max_image_pixels: "32M" # generate a new thumbnail. If false the server will pick a thumbnail # from a precalculated list. # -dynamic_thumbnails: false +#dynamic_thumbnails: false # List of thumbnails to precalculate when an image is uploaded. # -thumbnail_sizes: -- width: 32 - height: 32 - method: crop -- width: 96 - height: 96 - method: crop -- width: 320 - height: 240 - method: scale -- width: 640 - height: 480 - method: scale -- width: 800 - height: 600 - method: scale +#thumbnail_sizes: +# - width: 32 +# height: 32 +# method: crop +# - width: 96 +# height: 96 +# method: crop +# - width: 320 +# height: 240 +# method: scale +# - width: 640 +# height: 480 +# method: scale +# - width: 800 +# height: 600 +# method: scale # Is the preview URL API enabled? If enabled, you *must* specify # an explicit url_preview_ip_range_blacklist of IPs that the spider is # denied from accessing. # -url_preview_enabled: False +#url_preview_enabled: false # List of IP address CIDR ranges that the URL preview spider is denied # from accessing. There are no defaults: you must explicitly @@ -566,8 +567,8 @@ url_preview_enabled: False # - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' # The largest allowed URL preview spidering size in bytes -max_spider_size: "10M" - +# +#max_spider_size: 10M ## Captcha ## @@ -575,23 +576,25 @@ max_spider_size: "10M" # This Home Server's ReCAPTCHA public key. # -recaptcha_public_key: "YOUR_PUBLIC_KEY" +#recaptcha_public_key: "YOUR_PUBLIC_KEY" # This Home Server's ReCAPTCHA private key. # -recaptcha_private_key: "YOUR_PRIVATE_KEY" +#recaptcha_private_key: "YOUR_PRIVATE_KEY" # Enables ReCaptcha checks when registering, preventing signup # unless a captcha is answered. Requires a valid ReCaptcha # public/private key. # -enable_registration_captcha: False +#enable_registration_captcha: false # A secret key used to bypass the captcha test entirely. +# #captcha_bypass_secret: "YOUR_SECRET_HERE" # The API endpoint to use for verifying m.login.recaptcha responses. -recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" +# +#recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" ## TURN ## @@ -612,7 +615,7 @@ recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" # How long generated TURN credentials last # -turn_user_lifetime: "1h" +#turn_user_lifetime: 1h # Whether guests should be allowed to use the TURN server. # This defaults to True, otherwise VoIP will be unreliable for guests. @@ -620,15 +623,17 @@ turn_user_lifetime: "1h" # connect to arbitrary endpoints without having first signed up for a # valid account (e.g. by passing a CAPTCHA). # -turn_allow_guests: True +#turn_allow_guests: True ## Registration ## +# # Registration can be rate-limited using the parameters in the "Ratelimiting" # section of this file. # Enable registration for new users. -enable_registration: False +# +#enable_registration: false # The user must provide all of the below types of 3PID when registering. # @@ -639,7 +644,7 @@ enable_registration: False # Explicitly disable asking for MSISDNs from the registration # flow (overrides registrations_require_3pid if MSISDNs are set as required) # -#disable_msisdn_registration: True +#disable_msisdn_registration: true # Mandate that users are only allowed to associate certain formats of # 3PIDs with accounts on this server. @@ -663,13 +668,13 @@ enable_registration: False # N.B. that increasing this will exponentially increase the time required # to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. # -bcrypt_rounds: 12 +#bcrypt_rounds: 12 # Allows users to register as guests without a password/email/etc, and # participate in rooms hosted on this server which have been made # accessible to anonymous users. # -allow_guest_access: False +#allow_guest_access: false # The identity server which we suggest that clients should use when users log # in on this server. @@ -685,9 +690,9 @@ allow_guest_access: False # Also defines the ID server which will be called when an account is # deactivated (one will be picked arbitrarily). # -trusted_third_party_id_servers: - - matrix.org - - vector.im +#trusted_third_party_id_servers: +# - matrix.org +# - vector.im # Users who register on this homeserver will automatically be joined # to these rooms @@ -701,14 +706,14 @@ trusted_third_party_id_servers: # Setting to false means that if the rooms are not manually created, # users cannot be auto-joined since they do not exist. # -autocreate_auto_join_rooms: true +#autocreate_auto_join_rooms: true ## Metrics ### # Enable collection and rendering of performance metrics # -enable_metrics: False +#enable_metrics: False # Enable sentry integration # NOTE: While attempts are made to ensure that the logs don't contain @@ -728,22 +733,24 @@ enable_metrics: False # A list of event types that will be included in the room_invite_state # -room_invite_state_types: - - "m.room.join_rules" - - "m.room.canonical_alias" - - "m.room.avatar" - - "m.room.encryption" - - "m.room.name" +#room_invite_state_types: +# - "m.room.join_rules" +# - "m.room.canonical_alias" +# - "m.room.avatar" +# - "m.room.encryption" +# - "m.room.name" -# A list of application service config file to use +# A list of application service config files to use # -app_service_config_files: [] +#app_service_config_files: +# - app_service_1.yaml +# - app_service_2.yaml -# Whether or not to track application service IP addresses. Implicitly +# Uncomment to enable tracking of application service IP addresses. Implicitly # enables MAU tracking for application service users. # -track_appservice_user_ips: False +#track_appservice_user_ips: True # a secret which is used to sign access tokens. If none is specified, @@ -754,7 +761,7 @@ track_appservice_user_ips: False # Used to enable access token expiration. # -expire_access_token: False +#expire_access_token: False # a secret which is used to calculate HMACs for form values, to stop # falsification of values. Must be specified for the User Consent @@ -783,17 +790,16 @@ signing_key_path: "CONFDIR/SERVERNAME.signing.key" # Determines how quickly servers will query to check which keys # are still valid. # -key_refresh_interval: "1d" # 1 Day. +#key_refresh_interval: 1d # The trusted servers to download signing keys from. # -perspectives: - servers: - "matrix.org": - verify_keys: - "ed25519:auto": - key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" - +#perspectives: +# servers: +# "matrix.org": +# verify_keys: +# "ed25519:auto": +# key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" # Enable SAML2 for registration and login. Uses pysaml2. @@ -858,14 +864,15 @@ perspectives: # algorithm: "HS256" - -# Enable password for login. -# password_config: - enabled: true + # Uncomment to disable password login + # + #enabled: false + # Uncomment and change to a secret random string for extra security. # DO NOT CHANGE THIS AFTER INITIAL SETUP! - #pepper: "" + # + #pepper: "EVEN_MORE_SECRET" @@ -934,9 +941,9 @@ password_config: # example_option: 'things' -# Whether to allow non server admins to create groups on this server +# Uncomment to allow non-server-admin users to create groups on this server # -enable_group_creation: false +#enable_group_creation: true # If enabled, non server admins can only create groups with local parts # starting with this prefix diff --git a/synapse/config/api.py b/synapse/config/api.py index e8a753f002..5eb4f86fa2 100644 --- a/synapse/config/api.py +++ b/synapse/config/api.py @@ -34,10 +34,10 @@ class ApiConfig(Config): # A list of event types that will be included in the room_invite_state # - room_invite_state_types: - - "{JoinRules}" - - "{CanonicalAlias}" - - "{RoomAvatar}" - - "{RoomEncryption}" - - "{Name}" + #room_invite_state_types: + # - "{JoinRules}" + # - "{CanonicalAlias}" + # - "{RoomAvatar}" + # - "{RoomEncryption}" + # - "{Name}" """.format(**vars(EventTypes)) diff --git a/synapse/config/appservice.py b/synapse/config/appservice.py index c260d59464..9e64c76544 100644 --- a/synapse/config/appservice.py +++ b/synapse/config/appservice.py @@ -37,14 +37,16 @@ class AppServiceConfig(Config): def default_config(cls, **kwargs): return """\ - # A list of application service config file to use + # A list of application service config files to use # - app_service_config_files: [] + #app_service_config_files: + # - app_service_1.yaml + # - app_service_2.yaml - # Whether or not to track application service IP addresses. Implicitly + # Uncomment to enable tracking of application service IP addresses. Implicitly # enables MAU tracking for application service users. # - track_appservice_user_ips: False + #track_appservice_user_ips: True """ diff --git a/synapse/config/captcha.py b/synapse/config/captcha.py index d25196be08..f7eebf26d2 100644 --- a/synapse/config/captcha.py +++ b/synapse/config/captcha.py @@ -18,11 +18,16 @@ from ._base import Config class CaptchaConfig(Config): def read_config(self, config): - self.recaptcha_private_key = config["recaptcha_private_key"] - self.recaptcha_public_key = config["recaptcha_public_key"] - self.enable_registration_captcha = config["enable_registration_captcha"] + self.recaptcha_private_key = config.get("recaptcha_private_key") + self.recaptcha_public_key = config.get("recaptcha_public_key") + self.enable_registration_captcha = config.get( + "enable_registration_captcha", False + ) self.captcha_bypass_secret = config.get("captcha_bypass_secret") - self.recaptcha_siteverify_api = config["recaptcha_siteverify_api"] + self.recaptcha_siteverify_api = config.get( + "recaptcha_siteverify_api", + "https://www.recaptcha.net/recaptcha/api/siteverify", + ) def default_config(self, **kwargs): return """\ @@ -31,21 +36,23 @@ class CaptchaConfig(Config): # This Home Server's ReCAPTCHA public key. # - recaptcha_public_key: "YOUR_PUBLIC_KEY" + #recaptcha_public_key: "YOUR_PUBLIC_KEY" # This Home Server's ReCAPTCHA private key. # - recaptcha_private_key: "YOUR_PRIVATE_KEY" + #recaptcha_private_key: "YOUR_PRIVATE_KEY" # Enables ReCaptcha checks when registering, preventing signup # unless a captcha is answered. Requires a valid ReCaptcha # public/private key. # - enable_registration_captcha: False + #enable_registration_captcha: false # A secret key used to bypass the captcha test entirely. + # #captcha_bypass_secret: "YOUR_SECRET_HERE" # The API endpoint to use for verifying m.login.recaptcha responses. - recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" + # + #recaptcha_siteverify_api: "https://www.recaptcha.net/recaptcha/api/siteverify" """ diff --git a/synapse/config/database.py b/synapse/config/database.py index 63e9cb63f8..3c27ed6b4a 100644 --- a/synapse/config/database.py +++ b/synapse/config/database.py @@ -60,7 +60,8 @@ class DatabaseConfig(Config): database: "%(database_path)s" # Number of events to cache in memory. - event_cache_size: "10K" + # + #event_cache_size: 10K """ % locals() def read_arguments(self, args): diff --git a/synapse/config/groups.py b/synapse/config/groups.py index 46933a904c..e4be172a79 100644 --- a/synapse/config/groups.py +++ b/synapse/config/groups.py @@ -23,9 +23,9 @@ class GroupsConfig(Config): def default_config(self, **kwargs): return """\ - # Whether to allow non server admins to create groups on this server + # Uncomment to allow non-server-admin users to create groups on this server # - enable_group_creation: false + #enable_group_creation: true # If enabled, non server admins can only create groups with local parts # starting with this prefix diff --git a/synapse/config/key.py b/synapse/config/key.py index 35f05fa974..2bd5531acb 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -43,10 +43,16 @@ class KeyConfig(Config): config.get("old_signing_keys", {}) ) self.key_refresh_interval = self.parse_duration( - config["key_refresh_interval"] + config.get("key_refresh_interval", "1d"), ) self.perspectives = self.read_perspectives( - config["perspectives"] + config.get("perspectives", {}).get("servers", { + "matrix.org": {"verify_keys": { + "ed25519:auto": { + "key": "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw", + } + }} + }) ) self.macaroon_secret_key = config.get( @@ -88,7 +94,7 @@ class KeyConfig(Config): # Used to enable access token expiration. # - expire_access_token: False + #expire_access_token: False # a secret which is used to calculate HMACs for form values, to stop # falsification of values. Must be specified for the User Consent @@ -117,21 +123,21 @@ class KeyConfig(Config): # Determines how quickly servers will query to check which keys # are still valid. # - key_refresh_interval: "1d" # 1 Day. + #key_refresh_interval: 1d # The trusted servers to download signing keys from. # - perspectives: - servers: - "matrix.org": - verify_keys: - "ed25519:auto": - key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" + #perspectives: + # servers: + # "matrix.org": + # verify_keys: + # "ed25519:auto": + # key: "Noi6WqcDj0QmPxCNQqgezwTlBKrfqehY1u2FyWP9uYw" """ % locals() - def read_perspectives(self, perspectives_config): + def read_perspectives(self, perspectives_servers): servers = {} - for server_name, server_config in perspectives_config["servers"].items(): + for server_name, server_config in perspectives_servers.items(): for key_id, key_data in server_config["verify_keys"].items(): if is_signing_algorithm_supported(key_id): key_base64 = key_data["key"] diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index ed0498c634..2de51979d8 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -24,7 +24,7 @@ MISSING_SENTRY = ( class MetricsConfig(Config): def read_config(self, config): - self.enable_metrics = config["enable_metrics"] + self.enable_metrics = config.get("enable_metrics", False) self.report_stats = config.get("report_stats", None) self.metrics_port = config.get("metrics_port") self.metrics_bind_host = config.get("metrics_bind_host", "127.0.0.1") @@ -48,7 +48,7 @@ class MetricsConfig(Config): # Enable collection and rendering of performance metrics # - enable_metrics: False + #enable_metrics: False # Enable sentry integration # NOTE: While attempts are made to ensure that the logs don't contain diff --git a/synapse/config/password.py b/synapse/config/password.py index 2a52b9db54..eea59e772b 100644 --- a/synapse/config/password.py +++ b/synapse/config/password.py @@ -22,16 +22,21 @@ class PasswordConfig(Config): def read_config(self, config): password_config = config.get("password_config", {}) + if password_config is None: + password_config = {} + self.password_enabled = password_config.get("enabled", True) self.password_pepper = password_config.get("pepper", "") def default_config(self, config_dir_path, server_name, **kwargs): - return """ - # Enable password for login. - # + return """\ password_config: - enabled: true + # Uncomment to disable password login + # + #enabled: false + # Uncomment and change to a secret random string for extra security. # DO NOT CHANGE THIS AFTER INITIAL SETUP! - #pepper: "" + # + #pepper: "EVEN_MORE_SECRET" """ diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 7e6cc5d0ea..898a19dd8c 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -24,8 +24,8 @@ class RateLimitConfig(object): class RatelimitConfig(Config): def read_config(self, config): - self.rc_messages_per_second = config["rc_messages_per_second"] - self.rc_message_burst_count = config["rc_message_burst_count"] + self.rc_messages_per_second = config.get("rc_messages_per_second", 0.2) + self.rc_message_burst_count = config.get("rc_message_burst_count", 10.0) self.rc_registration = RateLimitConfig(config.get("rc_registration", {})) @@ -36,11 +36,11 @@ class RatelimitConfig(Config): rc_login_config.get("failed_attempts", {}), ) - self.federation_rc_window_size = config["federation_rc_window_size"] - self.federation_rc_sleep_limit = config["federation_rc_sleep_limit"] - self.federation_rc_sleep_delay = config["federation_rc_sleep_delay"] - self.federation_rc_reject_limit = config["federation_rc_reject_limit"] - self.federation_rc_concurrent = config["federation_rc_concurrent"] + self.federation_rc_window_size = config.get("federation_rc_window_size", 1000) + self.federation_rc_sleep_limit = config.get("federation_rc_sleep_limit", 10) + self.federation_rc_sleep_delay = config.get("federation_rc_sleep_delay", 500) + self.federation_rc_reject_limit = config.get("federation_rc_reject_limit", 50) + self.federation_rc_concurrent = config.get("federation_rc_concurrent", 3) def default_config(self, **kwargs): return """\ @@ -48,11 +48,11 @@ class RatelimitConfig(Config): # Number of messages a client can send per second # - rc_messages_per_second: 0.2 + #rc_messages_per_second: 0.2 # Number of message a client can send before being throttled # - rc_message_burst_count: 10.0 + #rc_message_burst_count: 10.0 # Ratelimiting settings for registration and login. # @@ -90,25 +90,25 @@ class RatelimitConfig(Config): # The federation window size in milliseconds # - federation_rc_window_size: 1000 + #federation_rc_window_size: 1000 # The number of federation requests from a single server in a window # before the server will delay processing the request. # - federation_rc_sleep_limit: 10 + #federation_rc_sleep_limit: 10 # The duration in milliseconds to delay processing events from # remote servers by if they go over the sleep limit. # - federation_rc_sleep_delay: 500 + #federation_rc_sleep_delay: 500 # The maximum number of concurrent federation requests allowed # from a single server # - federation_rc_reject_limit: 50 + #federation_rc_reject_limit: 50 # The number of federation requests to concurrently process from a # single server # - federation_rc_concurrent: 3 + #federation_rc_concurrent: 3 """ diff --git a/synapse/config/registration.py b/synapse/config/registration.py index a123f25a68..f6b2b9ceee 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -24,7 +24,7 @@ class RegistrationConfig(Config): def read_config(self, config): self.enable_registration = bool( - strtobool(str(config["enable_registration"])) + strtobool(str(config.get("enable_registration", False))) ) if "disable_registration" in config: self.enable_registration = not bool( @@ -36,7 +36,10 @@ class RegistrationConfig(Config): self.registration_shared_secret = config.get("registration_shared_secret") self.bcrypt_rounds = config.get("bcrypt_rounds", 12) - self.trusted_third_party_id_servers = config["trusted_third_party_id_servers"] + self.trusted_third_party_id_servers = config.get( + "trusted_third_party_id_servers", + ["matrix.org", "vector.im"], + ) self.default_identity_server = config.get("default_identity_server") self.allow_guest_access = config.get("allow_guest_access", False) @@ -64,11 +67,13 @@ class RegistrationConfig(Config): return """\ ## Registration ## + # # Registration can be rate-limited using the parameters in the "Ratelimiting" # section of this file. # Enable registration for new users. - enable_registration: False + # + #enable_registration: false # The user must provide all of the below types of 3PID when registering. # @@ -79,7 +84,7 @@ class RegistrationConfig(Config): # Explicitly disable asking for MSISDNs from the registration # flow (overrides registrations_require_3pid if MSISDNs are set as required) # - #disable_msisdn_registration: True + #disable_msisdn_registration: true # Mandate that users are only allowed to associate certain formats of # 3PIDs with accounts on this server. @@ -103,13 +108,13 @@ class RegistrationConfig(Config): # N.B. that increasing this will exponentially increase the time required # to register or login - e.g. 24 => 2^24 rounds which will take >20 mins. # - bcrypt_rounds: 12 + #bcrypt_rounds: 12 # Allows users to register as guests without a password/email/etc, and # participate in rooms hosted on this server which have been made # accessible to anonymous users. # - allow_guest_access: False + #allow_guest_access: false # The identity server which we suggest that clients should use when users log # in on this server. @@ -125,9 +130,9 @@ class RegistrationConfig(Config): # Also defines the ID server which will be called when an account is # deactivated (one will be picked arbitrarily). # - trusted_third_party_id_servers: - - matrix.org - - vector.im + #trusted_third_party_id_servers: + # - matrix.org + # - vector.im # Users who register on this homeserver will automatically be joined # to these rooms @@ -141,7 +146,7 @@ class RegistrationConfig(Config): # Setting to false means that if the rooms are not manually created, # users cannot be auto-joined since they do not exist. # - autocreate_auto_join_rooms: true + #autocreate_auto_join_rooms: true """ % locals() def add_arguments(self, parser): diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 97db2a5b7a..3f34ad9b2a 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -19,6 +19,36 @@ from synapse.util.module_loader import load_module from ._base import Config, ConfigError +DEFAULT_THUMBNAIL_SIZES = [ + { + "width": 32, + "height": 32, + "method": "crop", + }, { + "width": 96, + "height": 96, + "method": "crop", + }, { + "width": 320, + "height": 240, + "method": "scale", + }, { + "width": 640, + "height": 480, + "method": "scale", + }, { + "width": 800, + "height": 600, + "method": "scale" + }, +] + +THUMBNAIL_SIZE_YAML = """\ + # - width: %(width)i + # height: %(height)i + # method: %(method)s +""" + MISSING_NETADDR = ( "Missing netaddr library. This is required for URL preview API." ) @@ -77,9 +107,9 @@ def parse_thumbnail_requirements(thumbnail_sizes): class ContentRepositoryConfig(Config): def read_config(self, config): - self.max_upload_size = self.parse_size(config["max_upload_size"]) - self.max_image_pixels = self.parse_size(config["max_image_pixels"]) - self.max_spider_size = self.parse_size(config["max_spider_size"]) + self.max_upload_size = self.parse_size(config.get("max_upload_size", "10M")) + self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M")) + self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M")) self.media_store_path = self.ensure_directory(config["media_store_path"]) @@ -139,9 +169,9 @@ class ContentRepositoryConfig(Config): ) self.uploads_path = self.ensure_directory(config["uploads_path"]) - self.dynamic_thumbnails = config["dynamic_thumbnails"] + self.dynamic_thumbnails = config.get("dynamic_thumbnails", False) self.thumbnail_requirements = parse_thumbnail_requirements( - config["thumbnail_sizes"] + config.get("thumbnail_sizes", DEFAULT_THUMBNAIL_SIZES), ) self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: @@ -178,6 +208,13 @@ class ContentRepositoryConfig(Config): def default_config(self, data_dir_path, **kwargs): media_store = os.path.join(data_dir_path, "media_store") uploads_path = os.path.join(data_dir_path, "uploads") + + formatted_thumbnail_sizes = "".join( + THUMBNAIL_SIZE_YAML % s for s in DEFAULT_THUMBNAIL_SIZES + ) + # strip final NL + formatted_thumbnail_sizes = formatted_thumbnail_sizes[:-1] + return r""" # Directory where uploaded images and attachments are stored. # @@ -204,11 +241,11 @@ class ContentRepositoryConfig(Config): # The largest allowed upload size in bytes # - max_upload_size: "10M" + #max_upload_size: 10M # Maximum number of pixels that will be thumbnailed # - max_image_pixels: "32M" + #max_image_pixels: 32M # Whether to generate new thumbnails on the fly to precisely match # the resolution requested by the client. If true then whenever @@ -216,32 +253,18 @@ class ContentRepositoryConfig(Config): # generate a new thumbnail. If false the server will pick a thumbnail # from a precalculated list. # - dynamic_thumbnails: false + #dynamic_thumbnails: false # List of thumbnails to precalculate when an image is uploaded. # - thumbnail_sizes: - - width: 32 - height: 32 - method: crop - - width: 96 - height: 96 - method: crop - - width: 320 - height: 240 - method: scale - - width: 640 - height: 480 - method: scale - - width: 800 - height: 600 - method: scale + #thumbnail_sizes: +%(formatted_thumbnail_sizes)s # Is the preview URL API enabled? If enabled, you *must* specify # an explicit url_preview_ip_range_blacklist of IPs that the spider is # denied from accessing. # - url_preview_enabled: False + #url_preview_enabled: false # List of IP address CIDR ranges that the URL preview spider is denied # from accessing. There are no defaults: you must explicitly @@ -306,6 +329,6 @@ class ContentRepositoryConfig(Config): # - netloc: '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$' # The largest allowed URL preview spidering size in bytes - max_spider_size: "10M" - + # + #max_spider_size: 10M """ % locals() diff --git a/synapse/config/saml2_config.py b/synapse/config/saml2_config.py index aff0a1f00c..39b9eb29c2 100644 --- a/synapse/config/saml2_config.py +++ b/synapse/config/saml2_config.py @@ -64,7 +64,7 @@ class SAML2Config(Config): } def default_config(self, config_dir_path, server_name, **kwargs): - return """ + return """\ # Enable SAML2 for registration and login. Uses pysaml2. # # `sp_config` is the configuration for the pysaml2 Service Provider. diff --git a/synapse/config/server.py b/synapse/config/server.py index 35a322fee0..499eb30bea 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -45,7 +45,7 @@ class ServerConfig(Config): self.pid_file = self.abspath(config.get("pid_file")) self.web_client_location = config.get("web_client_location", None) - self.soft_file_limit = config["soft_file_limit"] + self.soft_file_limit = config.get("soft_file_limit", 0) self.daemonize = config.get("daemonize") self.print_pidfile = config.get("print_pidfile") self.user_agent_suffix = config.get("user_agent_suffix") @@ -307,11 +307,11 @@ class ServerConfig(Config): # Zero is used to indicate synapse should set the soft limit to the # hard limit. # - soft_file_limit: 0 + #soft_file_limit: 0 # Set to false to disable presence tracking on this homeserver. # - use_presence: true + #use_presence: false # The GC threshold parameters to pass to `gc.set_threshold`, if defined # diff --git a/synapse/config/voip.py b/synapse/config/voip.py index 257f7c86e7..2a1f005a37 100644 --- a/synapse/config/voip.py +++ b/synapse/config/voip.py @@ -22,7 +22,9 @@ class VoipConfig(Config): self.turn_shared_secret = config.get("turn_shared_secret") self.turn_username = config.get("turn_username") self.turn_password = config.get("turn_password") - self.turn_user_lifetime = self.parse_duration(config["turn_user_lifetime"]) + self.turn_user_lifetime = self.parse_duration( + config.get("turn_user_lifetime", "1h"), + ) self.turn_allow_guests = config.get("turn_allow_guests", True) def default_config(self, **kwargs): @@ -45,7 +47,7 @@ class VoipConfig(Config): # How long generated TURN credentials last # - turn_user_lifetime: "1h" + #turn_user_lifetime: 1h # Whether guests should be allowed to use the TURN server. # This defaults to True, otherwise VoIP will be unreliable for guests. @@ -53,5 +55,5 @@ class VoipConfig(Config): # connect to arbitrary endpoints without having first signed up for a # valid account (e.g. by passing a CAPTCHA). # - turn_allow_guests: True + #turn_allow_guests: True """ -- cgit 1.5.1 From e9eeca1314b1e5b29d155d664faa5e818169183a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Mar 2019 11:13:53 +0000 Subject: Fix user directory background update (#4887) --- changelog.d/4887.feature | 1 + synapse/storage/schema/delta/53/user_dir_populate.sql | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4887.feature (limited to 'synapse') diff --git a/changelog.d/4887.feature b/changelog.d/4887.feature new file mode 100644 index 0000000000..e7ff0b9297 --- /dev/null +++ b/changelog.d/4887.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. diff --git a/synapse/storage/schema/delta/53/user_dir_populate.sql b/synapse/storage/schema/delta/53/user_dir_populate.sql index 955b8fdbd6..ffcc896b58 100644 --- a/synapse/storage/schema/delta/53/user_dir_populate.sql +++ b/synapse/storage/schema/delta/53/user_dir_populate.sql @@ -23,7 +23,7 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES -- Insert all users, if search_all_users is on INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES - ('populate_user_directory_process_users', '{}', 'populate_user_directory_rooms'); + ('populate_user_directory_process_users', '{}', 'populate_user_directory_process_rooms'); -- Clean up staging tables INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES -- cgit 1.5.1 From 0dbfae03f96ae14672bbdc5cf5c3aecd93636803 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 19 Mar 2019 11:04:12 +0000 Subject: Enforce hs_disabled_message correctly Fixes a bug where hs_disabled_message was not enforced for 3pid-based requests if there was no server_notices_mxid configured. --- changelog.d/4888.bugfix | 2 ++ synapse/api/auth.py | 8 +++++--- tests/api/test_auth.py | 17 +++++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 changelog.d/4888.bugfix (limited to 'synapse') diff --git a/changelog.d/4888.bugfix b/changelog.d/4888.bugfix new file mode 100644 index 0000000000..0e193709e5 --- /dev/null +++ b/changelog.d/4888.bugfix @@ -0,0 +1,2 @@ +Fix a bug where hs_disabled_message was sometimes not correctly enforced. + diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 5992d30623..ee646a97e8 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -788,9 +788,11 @@ class Auth(object): # Never fail an auth check for the server notices users or support user # This can be a problem where event creation is prohibited due to blocking - is_support = yield self.store.is_support_user(user_id) - if user_id == self.hs.config.server_notices_mxid or is_support: - return + if user_id is not None: + if user_id == self.hs.config.server_notices_mxid: + return + if (yield self.store.is_support_user(user_id)): + return if self.hs.config.hs_disabled: raise ResourceLimitError( diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index d77f20e876..d0d36f96fa 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -344,6 +344,23 @@ class AuthTestCase(unittest.TestCase): self.assertEquals(e.exception.errcode, Codes.RESOURCE_LIMIT_EXCEEDED) self.assertEquals(e.exception.code, 403) + @defer.inlineCallbacks + def test_hs_disabled_no_server_notices_user(self): + """Check that 'hs_disabled_message' works correctly when there is no + server_notices user. + """ + # this should be the default, but we had a bug where the test was doing the wrong + # thing, so let's make it explicit + self.hs.config.server_notices_mxid = None + + self.hs.config.hs_disabled = True + self.hs.config.hs_disabled_message = "Reason for being disabled" + with self.assertRaises(ResourceLimitError) as e: + yield self.auth.check_auth_blocking() + self.assertEquals(e.exception.admin_contact, self.hs.config.admin_contact) + self.assertEquals(e.exception.errcode, Codes.RESOURCE_LIMIT_EXCEEDED) + self.assertEquals(e.exception.code, 403) + @defer.inlineCallbacks def test_server_notices_mxid_special_cased(self): self.hs.config.hs_disabled = True -- cgit 1.5.1 From 88f0675967d629ed14ae6ea6d4815bd0b5e0a44e Mon Sep 17 00:00:00 2001 From: Neil Johnson Date: Tue, 19 Mar 2019 11:38:59 +0000 Subject: fix test_auto_create_auto_join_where_no_consent (#4886) --- changelog.d/4886.bugfix | 1 + changelog.d/4886.misc | 1 + synapse/handlers/message.py | 13 ++++++++++--- synapse/handlers/register.py | 5 +++++ tests/handlers/test_register.py | 24 ++++++++++++++++++++++-- 5 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 changelog.d/4886.bugfix create mode 100644 changelog.d/4886.misc (limited to 'synapse') diff --git a/changelog.d/4886.bugfix b/changelog.d/4886.bugfix new file mode 100644 index 0000000000..b17aa92485 --- /dev/null +++ b/changelog.d/4886.bugfix @@ -0,0 +1 @@ +fix test_auto_create_auto_join_where_no_consent. diff --git a/changelog.d/4886.misc b/changelog.d/4886.misc new file mode 100644 index 0000000000..b17aa92485 --- /dev/null +++ b/changelog.d/4886.misc @@ -0,0 +1 @@ +fix test_auto_create_auto_join_where_no_consent. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c762b58902..55787563c0 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -243,7 +243,14 @@ class EventCreationHandler(object): self.spam_checker = hs.get_spam_checker() - if self.config.block_events_without_consent_error is not None: + self._block_events_without_consent_error = ( + self.config.block_events_without_consent_error + ) + + # we need to construct a ConsentURIBuilder here, as it checks that the necessary + # config options, but *only* if we have a configuration for which we are + # going to need it. + if self._block_events_without_consent_error: self._consent_uri_builder = ConsentURIBuilder(self.config) @defer.inlineCallbacks @@ -378,7 +385,7 @@ class EventCreationHandler(object): Raises: ConsentNotGivenError: if the user has not given consent yet """ - if self.config.block_events_without_consent_error is None: + if self._block_events_without_consent_error is None: return # exempt AS users from needing consent @@ -405,7 +412,7 @@ class EventCreationHandler(object): consent_uri = self._consent_uri_builder.build_user_consent_uri( requester.user.localpart, ) - msg = self.config.block_events_without_consent_error % { + msg = self._block_events_without_consent_error % { 'consent_uri': consent_uri, } raise ConsentNotGivenError( diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 0ec16b1d2e..68f73d3793 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -23,6 +23,7 @@ from synapse.api.constants import LoginType from synapse.api.errors import ( AuthError, Codes, + ConsentNotGivenError, InvalidCaptchaError, LimitExceededError, RegistrationError, @@ -311,6 +312,10 @@ class RegistrationHandler(BaseHandler): ) else: yield self._join_user_to_room(fake_requester, r) + except ConsentNotGivenError as e: + # Technically not necessary to pull out this error though + # moving away from bare excepts is a good thing to do. + logger.error("Failed to join new user to %r: %r", r, e) except Exception as e: logger.error("Failed to join new user to %r: %r", r, e) diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index c9c1506273..010e65829e 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -187,12 +187,32 @@ class RegistrationTestCase(unittest.TestCase): @defer.inlineCallbacks def test_auto_create_auto_join_where_no_consent(self): - self.hs.config.user_consent_at_registration = True - self.hs.config.block_events_without_consent_error = "Error" + """Test to ensure that the first user is not auto-joined to a room if + they have not given general consent. + """ + + # Given:- + # * a user must give consent, + # * they have not given that consent + # * The server is configured to auto-join to a room + # (and autocreate if necessary) + + event_creation_handler = self.hs.get_event_creation_handler() + # (Messing with the internals of event_creation_handler is fragile + # but can't see a better way to do this. One option could be to subclass + # the test with custom config.) + event_creation_handler._block_events_without_consent_error = ("Error") + event_creation_handler._consent_uri_builder = Mock() room_alias_str = "#room:test" self.hs.config.auto_join_rooms = [room_alias_str] + + # When:- + # * the user is registered and post consent actions are called res = yield self.handler.register(localpart='jeff') yield self.handler.post_consent_actions(res[0]) + + # Then:- + # * Ensure that they have not been joined to the room rooms = yield self.store.get_rooms_for_user(res[0]) self.assertEqual(len(rooms), 0) -- cgit 1.5.1 From 13bc1e0746aa0442aa5d43555cbbc2dc75e8ef43 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 15 Mar 2019 15:50:37 +0000 Subject: Use a regular HomeServerConfig object for unit tests Rather than using a Mock for the homeserver config, use a genuine HomeServerConfig object. This makes for a more realistic test, and means that we don't have to keep remembering to add things to the mock config every time we add a new config setting. --- synapse/config/_base.py | 5 ++++- synapse/config/key.py | 7 ++++++- tests/utils.py | 26 +++++++++++++++----------- 3 files changed, 25 insertions(+), 13 deletions(-) (limited to 'synapse') diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 5613f38e4d..a219a83550 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -405,7 +405,10 @@ class Config(object): self.invoke_all("generate_files", config) return - self.invoke_all("read_config", config) + self.parse_config_dict(config) + + def parse_config_dict(self, config_dict): + self.invoke_all("read_config", config_dict) def find_config_files(search_paths): diff --git a/synapse/config/key.py b/synapse/config/key.py index 2bd5531acb..933928885a 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -38,7 +38,12 @@ logger = logging.getLogger(__name__) class KeyConfig(Config): def read_config(self, config): - self.signing_key = self.read_signing_key(config["signing_key_path"]) + # the signing key can be specified inline or in a separate file + if "signing_key" in config: + self.signing_key = read_signing_keys([config["signing_key"]]) + else: + self.signing_key = self.read_signing_key(config["signing_key_path"]) + self.old_signing_keys = self.read_old_signing_keys( config.get("old_signing_keys", {}) ) diff --git a/tests/utils.py b/tests/utils.py index b58b674aa4..eeb4bce5a2 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -28,7 +28,7 @@ from twisted.internet import defer, reactor from synapse.api.constants import EventTypes, RoomVersions from synapse.api.errors import CodeMessageException, cs_error -from synapse.config.server import ServerConfig +from synapse.config.homeserver import HomeServerConfig from synapse.federation.transport import server as federation_server from synapse.http.server import HttpServer from synapse.server import HomeServer @@ -111,14 +111,25 @@ def default_config(name): """ Create a reasonable test config. """ - config = Mock() - config.signing_key = [MockKey()] + config_dict = { + "server_name": name, + "media_store_path": "media", + "uploads_path": "uploads", + + # the test signing key is just an arbitrary ed25519 key to keep the config + # parser happy + "signing_key": "ed25519 a_lPym qvioDNmfExFBRPgdTU+wtFYKq4JfwFRv7sYVgWvmgJg", + } + + config = HomeServerConfig() + config.parse_config_dict(config_dict) + + # TODO: move this stuff into config_dict or get rid of it config.event_cache_size = 1 config.enable_registration = True config.enable_registration_captcha = False config.macaroon_secret_key = "not even a little secret" config.expire_access_token = False - config.server_name = name config.trusted_third_party_id_servers = [] config.room_invite_state_types = [] config.password_providers = [] @@ -176,13 +187,6 @@ def default_config(name): # background, which upsets the test runner. config.update_user_directory = False - def is_threepid_reserved(threepid): - return ServerConfig.is_threepid_reserved( - config.mau_limits_reserved_threepids, threepid - ) - - config.is_threepid_reserved.side_effect = is_threepid_reserved - return config -- cgit 1.5.1 From 320667a47977ebbc9a1c0f320a06c80f953a4f86 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Mar 2019 16:40:19 +0000 Subject: Add option to disable searching in the user dir We still populate it, as it can still be accessed via the admin API. --- synapse/config/user_directory.py | 7 +++++++ synapse/rest/client/v2_alpha/user_directory.py | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'synapse') diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index fab3a7d1c8..e3c063c148 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -22,9 +22,13 @@ class UserDirectoryConfig(Config): """ def read_config(self, config): + self.user_directory_search_enabled = True self.user_directory_search_all_users = False user_directory_config = config.get("user_directory", None) if user_directory_config: + self.user_directory_search_enabled = ( + user_directory_config.get("enabled", True) + ) self.user_directory_search_all_users = ( user_directory_config.get("search_all_users", False) ) @@ -33,6 +37,8 @@ class UserDirectoryConfig(Config): return """ # User Directory configuration # + # 'enabled' defines whether users can search the user directory, + # defaults to True. # 'search_all_users' defines whether to search all users visible to your HS # when searching the user directory, rather than limiting to users visible # in public rooms. Defaults to false. If you set it True, you'll have to run @@ -40,5 +46,6 @@ class UserDirectoryConfig(Config): # on your database to tell it to rebuild the user_directory search indexes. # #user_directory: + # enabled: true # search_all_users: false """ diff --git a/synapse/rest/client/v2_alpha/user_directory.py b/synapse/rest/client/v2_alpha/user_directory.py index cac0624ba7..36b02de37f 100644 --- a/synapse/rest/client/v2_alpha/user_directory.py +++ b/synapse/rest/client/v2_alpha/user_directory.py @@ -59,6 +59,12 @@ class UserDirectorySearchRestServlet(RestServlet): requester = yield self.auth.get_user_by_req(request, allow_guest=False) user_id = requester.user.to_string() + if not self.hs.config.user_directory_search_enabled: + defer.returnValue((200, { + "limited": False, + "results": [], + })) + body = parse_json_object_from_request(request) limit = body.get("limit", 10) -- cgit 1.5.1 From 213c98c00a473bac7363e1a728828e0f056550b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 19 Mar 2019 16:50:51 +0000 Subject: Add option to disable search room lists This disables both local and remote room list searching. --- docs/sample_config.yaml | 5 +++++ synapse/config/room_directory.py | 9 +++++++++ synapse/handlers/room_list.py | 13 +++++++++++++ 3 files changed, 27 insertions(+) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index f9886a900d..f7b1825d61 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1036,6 +1036,11 @@ password_config: +# Wether the public room list can be searched. When disabled blocks +# searching local and remote room list for local and remote users. +# +#enable_room_list_search: true + # The `alias_creation` option controls who's allowed to create aliases # on this server. # diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 9b897abe3c..a25a41d16d 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -20,6 +20,10 @@ from ._base import Config, ConfigError class RoomDirectoryConfig(Config): def read_config(self, config): + self.enable_room_list_search = config.get( + "enable_room_list_search", True, + ) + alias_creation_rules = config.get("alias_creation_rules") if alias_creation_rules is not None: @@ -54,6 +58,11 @@ class RoomDirectoryConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ + # Wether the public room list can be searched. When disabled blocks + # searching local and remote room list for local and remote users. + # + #enable_room_list_search: true + # The `alias_creation` option controls who's allowed to create aliases # on this server. # diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index afa508d729..ba50c8aa95 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -44,6 +44,7 @@ EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) class RoomListHandler(BaseHandler): def __init__(self, hs): super(RoomListHandler, self).__init__(hs) + self.config = hs.config self.response_cache = ResponseCache(hs, "room_list") self.remote_response_cache = ResponseCache(hs, "remote_room_list", timeout_ms=30 * 1000) @@ -70,6 +71,12 @@ class RoomListHandler(BaseHandler): "Getting public room list: limit=%r, since=%r, search=%r, network=%r", limit, since_token, bool(search_filter), network_tuple, ) + if not self.config.enable_room_list_search: + return defer.succeed({ + "chunk": [], + "total_room_count_estimate": 0, + }) + if search_filter: # We explicitly don't bother caching searches or requests for # appservice specific lists. @@ -441,6 +448,12 @@ class RoomListHandler(BaseHandler): def get_remote_public_room_list(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): + if not self.config.enable_room_list_search: + defer.returnValue({ + "chunk": [], + "total_room_count_estimate": 0, + }) + if search_filter: # We currently don't support searching across federation, so we have # to do it manually without pagination -- cgit 1.5.1 From 926f29ea6d820d1d14fb5677fe948fa2e15d748e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 14:24:53 +0000 Subject: Fix up config comments --- docs/sample_config.yaml | 7 ++++--- synapse/config/room_directory.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index f7b1825d61..d1a419b240 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1036,10 +1036,11 @@ password_config: -# Wether the public room list can be searched. When disabled blocks -# searching local and remote room list for local and remote users. +# Uncomment to disable searching the public room list. When disabled +# blocks searching local and remote room lists for local and remote +# users by always returning an empty list for all queries. # -#enable_room_list_search: true +#enable_room_list_search: false # The `alias_creation` option controls who's allowed to create aliases # on this server. diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index a25a41d16d..8a9fded4c5 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -58,10 +58,11 @@ class RoomDirectoryConfig(Config): def default_config(self, config_dir_path, server_name, **kwargs): return """ - # Wether the public room list can be searched. When disabled blocks - # searching local and remote room list for local and remote users. + # Uncomment to disable searching the public room list. When disabled + # blocks searching local and remote room lists for local and remote + # users by always returning an empty list for all queries. # - #enable_room_list_search: true + #enable_room_list_search: false # The `alias_creation` option controls who's allowed to create aliases # on this server. -- cgit 1.5.1 From 7529038e66a81d36a71c654f26165a4215d918b3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 14:25:28 +0000 Subject: Return before we log --- synapse/handlers/room_list.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index ba50c8aa95..dc54634107 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -67,16 +67,17 @@ class RoomListHandler(BaseHandler): appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. """ - logger.info( - "Getting public room list: limit=%r, since=%r, search=%r, network=%r", - limit, since_token, bool(search_filter), network_tuple, - ) if not self.config.enable_room_list_search: return defer.succeed({ "chunk": [], "total_room_count_estimate": 0, }) + logger.info( + "Getting public room list: limit=%r, since=%r, search=%r, network=%r", + limit, since_token, bool(search_filter), network_tuple, + ) + if search_filter: # We explicitly don't bother caching searches or requests for # appservice specific lists. -- cgit 1.5.1 From 2c90422146b169cd43df12ab98e4e02ae53243c7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 14:25:58 +0000 Subject: Pull out config option --- synapse/handlers/room_list.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index dc54634107..d6c9d56007 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -44,7 +44,7 @@ EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) class RoomListHandler(BaseHandler): def __init__(self, hs): super(RoomListHandler, self).__init__(hs) - self.config = hs.config + self.enable_room_list_search = hs.config.enable_room_list_search self.response_cache = ResponseCache(hs, "room_list") self.remote_response_cache = ResponseCache(hs, "remote_room_list", timeout_ms=30 * 1000) @@ -67,7 +67,7 @@ class RoomListHandler(BaseHandler): appservice and network id to use an appservice specific one. Setting to None returns all public rooms across all lists. """ - if not self.config.enable_room_list_search: + if not self.enable_room_list_search: return defer.succeed({ "chunk": [], "total_room_count_estimate": 0, @@ -449,7 +449,7 @@ class RoomListHandler(BaseHandler): def get_remote_public_room_list(self, server_name, limit=None, since_token=None, search_filter=None, include_all_networks=False, third_party_instance_id=None,): - if not self.config.enable_room_list_search: + if not self.enable_room_list_search: defer.returnValue({ "chunk": [], "total_room_count_estimate": 0, -- cgit 1.5.1 From cc197a61a1b494e5f8a7fbbc299161845f2ab8af Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 14:30:36 +0000 Subject: Disable publishing to room list when its disabled --- synapse/handlers/directory.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'synapse') diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8b113307d2..fe128d9c88 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -44,6 +44,7 @@ class DirectoryHandler(BaseHandler): self.appservice_handler = hs.get_application_service_handler() self.event_creation_handler = hs.get_event_creation_handler() self.config = hs.config + self.enable_room_list_search = hs.config.enable_room_list_search self.federation = hs.get_federation_client() hs.get_federation_registry().register_query_handler( @@ -411,6 +412,13 @@ class DirectoryHandler(BaseHandler): if visibility not in ["public", "private"]: raise SynapseError(400, "Invalid visibility setting") + if visibility == "public" and not self.enable_room_list_search: + # The room list has been disabled. + raise AuthError( + 403, + "This user is not permitted to publish rooms to the room list" + ) + room = yield self.store.get_room(room_id) if room is None: raise SynapseError(400, "Unknown room") -- cgit 1.5.1 From ab20f85c59c4b7ef1a5248c2a9af37899dbfa280 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 20 Mar 2019 14:33:11 +0000 Subject: Update synapse/config/user_directory.py Co-Authored-By: erikjohnston --- synapse/config/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index e3c063c148..9dd83b794d 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -38,7 +38,7 @@ class UserDirectoryConfig(Config): # User Directory configuration # # 'enabled' defines whether users can search the user directory, - # defaults to True. + # Defaults to true. # 'search_all_users' defines whether to search all users visible to your HS # when searching the user directory, rather than limiting to users visible # in public rooms. Defaults to false. If you set it True, you'll have to run -- cgit 1.5.1 From cd8c5b91addf716ad76c315462a6d09e87241b25 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 14:35:41 +0000 Subject: Fix up sample config --- docs/sample_config.yaml | 6 ++++-- synapse/config/user_directory.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index f242b1ffbc..93aa6a6754 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -954,8 +954,10 @@ password_config: # User Directory configuration # -# 'enabled' defines whether users can search the user directory, -# defaults to True. +# 'enabled' defines whether users can search the user directory. If +# false then empty responses are returned to all queries. Defaults to +# true. +# # 'search_all_users' defines whether to search all users visible to your HS # when searching the user directory, rather than limiting to users visible # in public rooms. Defaults to false. If you set it True, you'll have to run diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index 9dd83b794d..142754a7dc 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -37,8 +37,10 @@ class UserDirectoryConfig(Config): return """ # User Directory configuration # - # 'enabled' defines whether users can search the user directory, - # Defaults to true. + # 'enabled' defines whether users can search the user directory. If + # false then empty responses are returned to all queries. Defaults to + # true. + # # 'search_all_users' defines whether to search all users visible to your HS # when searching the user directory, rather than limiting to users visible # in public rooms. Defaults to false. If you set it True, you'll have to run -- cgit 1.5.1 From a902d131804890ee6cc4137a669be92ceb2253c4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 20 Mar 2019 16:02:25 +0000 Subject: Batch up outgoing read-receipts to reduce federation traffic. (#4890) Rate-limit outgoing read-receipts as per #4730. --- changelog.d/4890.feature | 1 + docs/sample_config.yaml | 8 ++ synapse/config/ratelimiting.py | 12 ++ synapse/federation/sender/__init__.py | 115 +++++++++++++++--- synapse/federation/sender/per_destination_queue.py | 64 ++++++++++- synapse/handlers/receipts.py | 2 +- tests/federation/test_federation_sender.py | 128 +++++++++++++++++++++ 7 files changed, 308 insertions(+), 22 deletions(-) create mode 100644 changelog.d/4890.feature create mode 100644 tests/federation/test_federation_sender.py (limited to 'synapse') diff --git a/changelog.d/4890.feature b/changelog.d/4890.feature new file mode 100644 index 0000000000..8d74262250 --- /dev/null +++ b/changelog.d/4890.feature @@ -0,0 +1 @@ +Batch up outgoing read-receipts to reduce federation traffic. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index f9886a900d..d72b90a37b 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -438,6 +438,14 @@ log_config: "CONFDIR/SERVERNAME.log.config" # #federation_rc_concurrent: 3 +# Target outgoing federation transaction frequency for sending read-receipts, +# per-room. +# +# If we end up trying to send out more read-receipts, they will get buffered up +# into fewer transactions. +# +#federation_rr_transactions_per_room_per_second: 50 + # Directory where uploaded images and attachments are stored. diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 898a19dd8c..5a68399e63 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -42,6 +42,10 @@ class RatelimitConfig(Config): self.federation_rc_reject_limit = config.get("federation_rc_reject_limit", 50) self.federation_rc_concurrent = config.get("federation_rc_concurrent", 3) + self.federation_rr_transactions_per_room_per_second = config.get( + "federation_rr_transactions_per_room_per_second", 50, + ) + def default_config(self, **kwargs): return """\ ## Ratelimiting ## @@ -111,4 +115,12 @@ class RatelimitConfig(Config): # single server # #federation_rc_concurrent: 3 + + # Target outgoing federation transaction frequency for sending read-receipts, + # per-room. + # + # If we end up trying to send out more read-receipts, they will get buffered up + # into fewer transactions. + # + #federation_rr_transactions_per_room_per_second: 50 """ diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 1bcc353d18..1dc041752b 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -104,7 +104,26 @@ class FederationSender(object): self._processing_pending_presence = False + # map from room_id to a set of PerDestinationQueues which we believe are + # awaiting a call to flush_read_receipts_for_room. The presence of an entry + # here for a given room means that we are rate-limiting RR flushes to that room, + # and that there is a pending call to _flush_rrs_for_room in the system. + self._queues_awaiting_rr_flush_by_room = { + } # type: dict[str, set[PerDestinationQueue]] + + self._rr_txn_interval_per_room_ms = ( + 1000.0 / hs.get_config().federation_rr_transactions_per_room_per_second + ) + def _get_per_destination_queue(self, destination): + """Get or create a PerDestinationQueue for the given destination + + Args: + destination (str): server_name of remote server + + Returns: + PerDestinationQueue + """ queue = self._per_destination_queues.get(destination) if not queue: queue = PerDestinationQueue(self.hs, self._transaction_manager, destination) @@ -250,33 +269,91 @@ class FederationSender(object): Args: receipt (synapse.types.ReadReceipt): receipt to be sent """ + + # Some background on the rate-limiting going on here. + # + # It turns out that if we attempt to send out RRs as soon as we get them from + # a client, then we end up trying to do several hundred Hz of federation + # transactions. (The number of transactions scales as O(N^2) on the size of a + # room, since in a large room we have both more RRs coming in, and more servers + # to send them to.) + # + # This leads to a lot of CPU load, and we end up getting behind. The solution + # currently adopted is as follows: + # + # The first receipt in a given room is sent out immediately, at time T0. Any + # further receipts are, in theory, batched up for N seconds, where N is calculated + # based on the number of servers in the room to achieve a transaction frequency + # of around 50Hz. So, for example, if there were 100 servers in the room, then + # N would be 100 / 50Hz = 2 seconds. + # + # Then, after T+N, we flush out any receipts that have accumulated, and restart + # the timer to flush out more receipts at T+2N, etc. If no receipts accumulate, + # we stop the cycle and go back to the start. + # + # However, in practice, it is often possible to flush out receipts earlier: in + # particular, if we are sending a transaction to a given server anyway (for + # example, because we have a PDU or a RR in another room to send), then we may + # as well send out all of the pending RRs for that server. So it may be that + # by the time we get to T+N, we don't actually have any RRs left to send out. + # Nevertheless we continue to buffer up RRs for the room in question until we + # reach the point that no RRs arrive between timer ticks. + # + # For even more background, see https://github.com/matrix-org/synapse/issues/4730. + + room_id = receipt.room_id + # Work out which remote servers should be poked and poke them. - domains = yield self.state.get_current_hosts_in_room(receipt.room_id) + domains = yield self.state.get_current_hosts_in_room(room_id) domains = [d for d in domains if d != self.server_name] if not domains: return - logger.debug("Sending receipt to: %r", domains) + queues_pending_flush = self._queues_awaiting_rr_flush_by_room.get( + room_id + ) - content = { - receipt.room_id: { - receipt.receipt_type: { - receipt.user_id: { - "event_ids": receipt.event_ids, - "data": receipt.data, - }, - }, - }, - } - key = (receipt.room_id, receipt.receipt_type, receipt.user_id) + # if there is no flush yet scheduled, we will send out these receipts with + # immediate flushes, and schedule the next flush for this room. + if queues_pending_flush is not None: + logger.debug("Queuing receipt for: %r", domains) + else: + logger.debug("Sending receipt to: %r", domains) + self._schedule_rr_flush_for_room(room_id, len(domains)) for domain in domains: - self.build_and_send_edu( - destination=domain, - edu_type="m.receipt", - content=content, - key=key, - ) + queue = self._get_per_destination_queue(domain) + queue.queue_read_receipt(receipt) + + # if there is already a RR flush pending for this room, then make sure this + # destination is registered for the flush + if queues_pending_flush is not None: + queues_pending_flush.add(queue) + else: + queue.flush_read_receipts_for_room(room_id) + + def _schedule_rr_flush_for_room(self, room_id, n_domains): + # that is going to cause approximately len(domains) transactions, so now back + # off for that multiplied by RR_TXN_INTERVAL_PER_ROOM + backoff_ms = self._rr_txn_interval_per_room_ms * n_domains + + logger.debug("Scheduling RR flush in %s in %d ms", room_id, backoff_ms) + self.clock.call_later(backoff_ms, self._flush_rrs_for_room, room_id) + self._queues_awaiting_rr_flush_by_room[room_id] = set() + + def _flush_rrs_for_room(self, room_id): + queues = self._queues_awaiting_rr_flush_by_room.pop(room_id) + logger.debug("Flushing RRs in %s to %s", room_id, queues) + + if not queues: + # no more RRs arrived for this room; we are done. + return + + # schedule the next flush + self._schedule_rr_flush_for_room(room_id, len(queues)) + + for queue in queues: + queue.flush_read_receipts_for_room(room_id) @logcontext.preserve_fn # the caller should not yield on this @defer.inlineCallbacks diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 385039add4..be99211003 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -80,6 +80,10 @@ class PerDestinationQueue(object): # destination self._pending_presence = {} # type: dict[str, UserPresenceState] + # room_id -> receipt_type -> user_id -> receipt_dict + self._pending_rrs = {} + self._rrs_pending_flush = False + # stream_id of last successfully sent to-device message. # NB: may be a long or an int. self._last_device_stream_id = 0 @@ -87,6 +91,9 @@ class PerDestinationQueue(object): # stream_id of last successfully sent device list update. self._last_device_list_stream_id = 0 + def __str__(self): + return "PerDestinationQueue[%s]" % self._destination + def pending_pdu_count(self): return len(self._pending_pdus) @@ -118,6 +125,30 @@ class PerDestinationQueue(object): }) self.attempt_new_transaction() + def queue_read_receipt(self, receipt): + """Add a RR to the list to be sent. Doesn't start the transmission loop yet + (see flush_read_receipts_for_room) + + Args: + receipt (synapse.api.receipt_info.ReceiptInfo): receipt to be queued + """ + self._pending_rrs.setdefault( + receipt.room_id, {}, + ).setdefault( + receipt.receipt_type, {} + )[receipt.user_id] = { + "event_ids": receipt.event_ids, + "data": receipt.data, + } + + def flush_read_receipts_for_room(self, room_id): + # if we don't have any read-receipts for this room, it may be that we've already + # sent them out, so we don't need to flush. + if room_id not in self._pending_rrs: + return + self._rrs_pending_flush = True + self.attempt_new_transaction() + def send_keyed_edu(self, edu, key): self._pending_edus_keyed[(edu.edu_type, key)] = edu self.attempt_new_transaction() @@ -183,10 +214,12 @@ class PerDestinationQueue(object): # We can only include at most 50 PDUs per transactions pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] - pending_edus = self._pending_edus + pending_edus = [] + + pending_edus.extend(self._get_rr_edus(force_flush=False)) # We can only include at most 100 EDUs per transactions - pending_edus, self._pending_edus = pending_edus[:100], pending_edus[100:] + pending_edus.extend(self._pop_pending_edus(100 - len(pending_edus))) pending_edus.extend( self._pending_edus_keyed.values() @@ -224,6 +257,11 @@ class PerDestinationQueue(object): self._last_device_stream_id = device_stream_id return + # if we've decided to send a transaction anyway, and we have room, we + # may as well send any pending RRs + if len(pending_edus) < 100: + pending_edus.extend(self._get_rr_edus(force_flush=True)) + # END CRITICAL SECTION success = yield self._transaction_manager.send_new_transaction( @@ -285,6 +323,28 @@ class PerDestinationQueue(object): # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False + def _get_rr_edus(self, force_flush): + if not self._pending_rrs: + return + if not force_flush and not self._rrs_pending_flush: + # not yet time for this lot + return + + edu = Edu( + origin=self._server_name, + destination=self._destination, + edu_type="m.receipt", + content=self._pending_rrs, + ) + self._pending_rrs = {} + self._rrs_pending_flush = False + yield edu + + def _pop_pending_edus(self, limit): + pending_edus = self._pending_edus + pending_edus, self._pending_edus = pending_edus[:limit], pending_edus[limit:] + return pending_edus + @defer.inlineCallbacks def _get_new_device_messages(self): last_device_stream_id = self._last_device_stream_id diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index dd783ae134..274d2946ad 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -118,7 +118,7 @@ class ReceiptsHandler(BaseHandler): if not is_new: return - self.federation.send_read_receipt(receipt) + yield self.federation.send_read_receipt(receipt) @defer.inlineCallbacks def get_receipts_for_room(self, room_id, to_key): diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py new file mode 100644 index 0000000000..28e7e27416 --- /dev/null +++ b/tests/federation/test_federation_sender.py @@ -0,0 +1,128 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mock import Mock + +from twisted.internet import defer + +from synapse.types import ReadReceipt + +from tests.unittest import HomeserverTestCase + + +class FederationSenderTestCases(HomeserverTestCase): + def make_homeserver(self, reactor, clock): + return super(FederationSenderTestCases, self).setup_test_homeserver( + state_handler=Mock(spec=["get_current_hosts_in_room"]), + federation_transport_client=Mock(spec=["send_transaction"]), + ) + + def test_send_receipts(self): + mock_state_handler = self.hs.get_state_handler() + mock_state_handler.get_current_hosts_in_room.return_value = ["test", "host2"] + + mock_send_transaction = self.hs.get_federation_transport_client().send_transaction + mock_send_transaction.return_value = defer.succeed({}) + + sender = self.hs.get_federation_sender() + receipt = ReadReceipt("room_id", "m.read", "user_id", ["event_id"], {"ts": 1234}) + self.successResultOf(sender.send_read_receipt(receipt)) + + self.pump() + + # expect a call to send_transaction + mock_send_transaction.assert_called_once() + json_cb = mock_send_transaction.call_args[0][1] + data = json_cb() + self.assertEqual(data['edus'], [ + { + 'edu_type': 'm.receipt', + 'content': { + 'room_id': { + 'm.read': { + 'user_id': { + 'event_ids': ['event_id'], + 'data': {'ts': 1234}, + }, + }, + }, + }, + }, + ]) + + def test_send_receipts_with_backoff(self): + """Send two receipts in quick succession; the second should be flushed, but + only after 20ms""" + mock_state_handler = self.hs.get_state_handler() + mock_state_handler.get_current_hosts_in_room.return_value = ["test", "host2"] + + mock_send_transaction = self.hs.get_federation_transport_client().send_transaction + mock_send_transaction.return_value = defer.succeed({}) + + sender = self.hs.get_federation_sender() + receipt = ReadReceipt("room_id", "m.read", "user_id", ["event_id"], {"ts": 1234}) + self.successResultOf(sender.send_read_receipt(receipt)) + + self.pump() + + # expect a call to send_transaction + mock_send_transaction.assert_called_once() + json_cb = mock_send_transaction.call_args[0][1] + data = json_cb() + self.assertEqual(data['edus'], [ + { + 'edu_type': 'm.receipt', + 'content': { + 'room_id': { + 'm.read': { + 'user_id': { + 'event_ids': ['event_id'], + 'data': {'ts': 1234}, + }, + }, + }, + }, + }, + ]) + mock_send_transaction.reset_mock() + + # send the second RR + receipt = ReadReceipt("room_id", "m.read", "user_id", ["other_id"], {"ts": 1234}) + self.successResultOf(sender.send_read_receipt(receipt)) + self.pump() + mock_send_transaction.assert_not_called() + + self.reactor.advance(19) + mock_send_transaction.assert_not_called() + + self.reactor.advance(10) + mock_send_transaction.assert_called_once() + json_cb = mock_send_transaction.call_args[0][1] + data = json_cb() + self.assertEqual(data['edus'], [ + { + 'edu_type': 'm.receipt', + 'content': { + 'room_id': { + 'm.read': { + 'user_id': { + 'event_ids': ['other_id'], + 'data': {'ts': 1234}, + }, + }, + }, + }, + }, + ]) -- cgit 1.5.1 From cdb803616195c76306b30328af9da7cb2b32c960 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 20 Mar 2019 16:04:35 +0000 Subject: Add a config option for torture-testing worker replication. (#4902) Setting this to 50 or so makes a bunch of sytests fail in worker mode. --- changelog.d/4902.misc | 1 + synapse/config/server.py | 5 +++++ synapse/replication/tcp/resource.py | 18 +++++++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 changelog.d/4902.misc (limited to 'synapse') diff --git a/changelog.d/4902.misc b/changelog.d/4902.misc new file mode 100644 index 0000000000..fecc06a6e8 --- /dev/null +++ b/changelog.d/4902.misc @@ -0,0 +1 @@ +Add a config option for torture-testing worker replication. diff --git a/synapse/config/server.py b/synapse/config/server.py index 499eb30bea..08e4e45482 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -126,6 +126,11 @@ class ServerConfig(Config): self.public_baseurl += '/' self.start_pushers = config.get("start_pushers", True) + # (undocumented) option for torturing the worker-mode replication a bit, + # for testing. The value defines the number of milliseconds to pause before + # sending out any replication updates. + self.replication_torture_level = config.get("replication_torture_level") + self.listeners = [] for listener in config.get("listeners", []): if not isinstance(listener.get("port", None), int): diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index fd59f1595f..47cdf30bd3 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -16,6 +16,7 @@ """ import logging +import random from six import itervalues @@ -74,6 +75,8 @@ class ReplicationStreamer(object): self.notifier = hs.get_notifier() self._server_notices_sender = hs.get_server_notices_sender() + self._replication_torture_level = hs.config.replication_torture_level + # Current connections. self.connections = [] @@ -157,10 +160,23 @@ class ReplicationStreamer(object): for stream in self.streams: stream.advance_current_token() - for stream in self.streams: + all_streams = self.streams + + if self._replication_torture_level is not None: + # there is no guarantee about ordering between the streams, + # so let's shuffle them around a bit when we are in torture mode. + all_streams = list(all_streams) + random.shuffle(all_streams) + + for stream in all_streams: if stream.last_token == stream.upto_token: continue + if self._replication_torture_level: + yield self.clock.sleep( + self._replication_torture_level / 1000.0 + ) + logger.debug( "Getting stream: %s: %s -> %s", stream.NAME, stream.last_token, stream.upto_token -- cgit 1.5.1 From 4d53017432e05da621a13b3fe4d9e67108f856fd Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 21 Mar 2019 03:06:36 +1100 Subject: Batching in the user directory import (#4900) --- changelog.d/4900.feature | 1 + synapse/storage/user_directory.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 changelog.d/4900.feature (limited to 'synapse') diff --git a/changelog.d/4900.feature b/changelog.d/4900.feature new file mode 100644 index 0000000000..8f792b8890 --- /dev/null +++ b/changelog.d/4900.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 4ee653210f..d360e857d1 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -32,6 +32,11 @@ TEMP_TABLE = "_temp_populate_user_directory" class UserDirectoryStore(BackgroundUpdateStore): + + # How many records do we calculate before sending it to + # add_users_who_share_private_rooms? + SHARE_PRIVATE_WORKING_SET = 500 + def __init__(self, db_conn, hs): super(UserDirectoryStore, self).__init__(db_conn, hs) @@ -218,6 +223,14 @@ class UserDirectoryStore(BackgroundUpdateStore): user_set = (user_id, other_user_id) to_insert.add(user_set) + # If it gets too big, stop and write to the database + # to prevent storing too much in RAM. + if len(to_insert) >= self.SHARE_PRIVATE_WORKING_SET: + yield self.add_users_who_share_private_room( + room_id, to_insert + ) + to_insert.clear() + if to_insert: yield self.add_users_who_share_private_room(room_id, to_insert) to_insert.clear() -- cgit 1.5.1 From 67d618e111bd586fb0b4d6c92c9d43b1174b0f42 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 16:50:05 +0000 Subject: Allow blocking a room multiple times --- synapse/storage/room.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'synapse') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 41c65e112a..33870b585e 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -500,10 +500,12 @@ class RoomStore(RoomWorkerStore, SearchStore): @defer.inlineCallbacks def block_room(self, room_id, user_id): - yield self._simple_insert( + yield self._simple_upsert( table="blocked_rooms", - values={ + keyvalues={ "room_id": room_id, + }, + insertion_values={ "user_id": user_id, }, desc="block_room", -- cgit 1.5.1 From 74c46d81fa7c3e4f1cfc3688d9ce3f46d35ee5a5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 16:50:23 +0000 Subject: Only require consent for events with an associated request There are a number of instances where a server or admin may puppet a user to join/leave rooms, which we don't want to fail if the user has not consented to the privacy policy. We fix this by adding a check to test if the requester has an associated access_token, which is used as a proxy to answer the question of whether the action is being done on behalf of a real request from the user. --- synapse/handlers/message.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 55787563c0..ac9d9c1a83 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -316,8 +316,12 @@ class EventCreationHandler(object): target, e ) + # Check if the user has accepted the privacy policy. We only do this if + # the requester has an associated access_token_id, which indicates that + # this action came from a user request rather than an automatice server + # or admin action. is_exempt = yield self._is_exempt_from_privacy_policy(builder, requester) - if not is_exempt: + if requester.access_token_id and not is_exempt: yield self.assert_accepted_privacy_policy(requester) if token_id is not None: -- cgit 1.5.1 From 6b28890543cfd128a05c3e05ad53ea1e36c932fa Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 16:52:28 +0000 Subject: Log new room ID --- synapse/rest/client/v1/admin.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'synapse') diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 2a29f0c2af..56c253cc9d 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -490,8 +490,13 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): requester_user_id = requester.user.to_string() - logger.info("Shutting down room %r", room_id) + logger.info( + "Shutting down room %r, joining to new room: %r", + room_id, new_room_id, + ) + # This will work even if the room is already blocked, but that is + # desirable in case the first attempt at blocking the room failed below. yield self.store.block_room(room_id, requester_user_id) users = yield self.state.get_current_user_in_room(room_id) -- cgit 1.5.1 From 72a14860abadf6c8cee8960c4699f7d15da428d0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 16:54:00 +0000 Subject: Gracefully handle failing to kick user --- synapse/rest/client/v1/admin.py | 46 ++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 56c253cc9d..56ad65515a 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -501,34 +501,41 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): users = yield self.state.get_current_user_in_room(room_id) kicked_users = [] + failed_to_kick_users = [] for user_id in users: if not self.hs.is_mine_id(user_id): continue logger.info("Kicking %r from %r...", user_id, room_id) - target_requester = create_requester(user_id) - yield self.room_member_handler.update_membership( - requester=target_requester, - target=target_requester.user, - room_id=room_id, - action=Membership.LEAVE, - content={}, - ratelimit=False - ) + try: + target_requester = create_requester(user_id) + yield self.room_member_handler.update_membership( + requester=target_requester, + target=target_requester.user, + room_id=room_id, + action=Membership.LEAVE, + content={}, + ratelimit=False + ) - yield self.room_member_handler.forget(target_requester.user, room_id) + yield self.room_member_handler.forget(target_requester.user, room_id) - yield self.room_member_handler.update_membership( - requester=target_requester, - target=target_requester.user, - room_id=new_room_id, - action=Membership.JOIN, - content={}, - ratelimit=False - ) + yield self.room_member_handler.update_membership( + requester=target_requester, + target=target_requester.user, + room_id=new_room_id, + action=Membership.JOIN, + content={}, + ratelimit=False + ) - kicked_users.append(user_id) + kicked_users.append(user_id) + except Exception: + logger.exception( + "Failed to leave old room and join new room for %r", user_id, + ) + failed_to_kick_users.append(user_id) yield self.event_creation_handler.create_and_send_nonmember_event( room_creator_requester, @@ -549,6 +556,7 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): defer.returnValue((200, { "kicked_users": kicked_users, + "failed_to_kick_users": failed_to_kick_users, "local_aliases": aliases_for_room, "new_room_id": new_room_id, })) -- cgit 1.5.1 From 7d47cc1305e1504af78ff13c49b43b81b1ac5791 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 17:08:36 +0000 Subject: Move requester check into assert_accepted_privacy_policy --- synapse/handlers/message.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ac9d9c1a83..345a3e0ecd 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -316,12 +316,8 @@ class EventCreationHandler(object): target, e ) - # Check if the user has accepted the privacy policy. We only do this if - # the requester has an associated access_token_id, which indicates that - # this action came from a user request rather than an automatice server - # or admin action. is_exempt = yield self._is_exempt_from_privacy_policy(builder, requester) - if requester.access_token_id and not is_exempt: + if not is_exempt: yield self.assert_accepted_privacy_policy(requester) if token_id is not None: @@ -396,6 +392,13 @@ class EventCreationHandler(object): if requester.app_service is not None: return + # Check if the user has accepted the privacy policy. We only do this if + # the requester has an associated access_token_id, which indicates that + # this action came from a user request rather than an automatice server + # or admin action. + if requester.access_token_id is None: + return + user_id = requester.user.to_string() # exempt the system notices user -- cgit 1.5.1 From aa959a6c0705067cd01d1fd0ba42f51f320ed51b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 17:39:29 +0000 Subject: Use flags --- synapse/handlers/_base.py | 1 + synapse/handlers/deactivate_account.py | 1 + synapse/handlers/message.py | 18 +++++------------- synapse/handlers/room_member.py | 6 ++++++ synapse/rest/client/v1/admin.py | 6 ++++-- 5 files changed, 17 insertions(+), 15 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index d8d86d6ff3..ac09d03ba9 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -165,6 +165,7 @@ class BaseHandler(object): member_event.room_id, "leave", ratelimit=False, + require_consent=False, ) except Exception as e: logger.exception("Error kicking guest user: %s" % (e,)) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 75fe50c42c..97d3f31d98 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -164,6 +164,7 @@ class DeactivateAccountHandler(BaseHandler): room_id, "leave", ratelimit=False, + require_consent=False, ) except Exception: logger.exception( diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 345a3e0ecd..587fbfbe86 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -255,7 +255,7 @@ class EventCreationHandler(object): @defer.inlineCallbacks def create_event(self, requester, event_dict, token_id=None, txn_id=None, - prev_events_and_hashes=None): + prev_events_and_hashes=None, require_consent=True): """ Given a dict from a client, create a new event. @@ -276,6 +276,9 @@ class EventCreationHandler(object): where *hashes* is a map from algorithm to hash. If None, they will be requested from the database. + + require_consent (bool): Whether to check if the requester has + consented to privacy policy. Raises: ResourceLimitError if server is blocked to some resource being exceeded @@ -317,7 +320,7 @@ class EventCreationHandler(object): ) is_exempt = yield self._is_exempt_from_privacy_policy(builder, requester) - if not is_exempt: + if require_consent and not is_exempt: yield self.assert_accepted_privacy_policy(requester) if token_id is not None: @@ -388,17 +391,6 @@ class EventCreationHandler(object): if self._block_events_without_consent_error is None: return - # exempt AS users from needing consent - if requester.app_service is not None: - return - - # Check if the user has accepted the privacy policy. We only do this if - # the requester has an associated access_token_id, which indicates that - # this action came from a user request rather than an automatice server - # or admin action. - if requester.access_token_id is None: - return - user_id = requester.user.to_string() # exempt the system notices user diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index aead9e4608..71ce5b54e5 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -160,6 +160,7 @@ class RoomMemberHandler(object): txn_id=None, ratelimit=True, content=None, + require_consent=True, ): user_id = target.to_string() @@ -185,6 +186,7 @@ class RoomMemberHandler(object): token_id=requester.access_token_id, txn_id=txn_id, prev_events_and_hashes=prev_events_and_hashes, + require_consent=require_consent, ) # Check if this event matches the previous membership event for the user. @@ -305,6 +307,7 @@ class RoomMemberHandler(object): third_party_signed=None, ratelimit=True, content=None, + require_consent=True, ): key = (room_id,) @@ -319,6 +322,7 @@ class RoomMemberHandler(object): third_party_signed=third_party_signed, ratelimit=ratelimit, content=content, + require_consent=require_consent, ) defer.returnValue(result) @@ -335,6 +339,7 @@ class RoomMemberHandler(object): third_party_signed=None, ratelimit=True, content=None, + require_consent=True, ): content_specified = bool(content) if content is None: @@ -516,6 +521,7 @@ class RoomMemberHandler(object): ratelimit=ratelimit, prev_events_and_hashes=prev_events_and_hashes, content=content, + require_consent=require_consent, ) defer.returnValue(res) diff --git a/synapse/rest/client/v1/admin.py b/synapse/rest/client/v1/admin.py index 56ad65515a..e788769639 100644 --- a/synapse/rest/client/v1/admin.py +++ b/synapse/rest/client/v1/admin.py @@ -516,7 +516,8 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): room_id=room_id, action=Membership.LEAVE, content={}, - ratelimit=False + ratelimit=False, + require_consent=False, ) yield self.room_member_handler.forget(target_requester.user, room_id) @@ -527,7 +528,8 @@ class ShutdownRoomRestServlet(ClientV1RestServlet): room_id=new_room_id, action=Membership.JOIN, content={}, - ratelimit=False + ratelimit=False, + require_consent=False, ) kicked_users.append(user_id) -- cgit 1.5.1 From 8d8834d3e7d6c0c4086ea47664c922bfa91757d4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 17:49:56 +0000 Subject: comment block_room --- synapse/storage/room.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'synapse') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 33870b585e..abc9786a99 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -500,6 +500,15 @@ class RoomStore(RoomWorkerStore, SearchStore): @defer.inlineCallbacks def block_room(self, room_id, user_id): + """Marks the room as blocked. Can be called multiple times. + + Args: + room_id (str): Room to block + user_id (str): Who blocked it + + Returns: + Deferred + """ yield self._simple_upsert( table="blocked_rooms", keyvalues={ -- cgit 1.5.1 From cd62981a6a083945547100c8d0f30380ea17c6e3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 20 Mar 2019 17:51:27 +0000 Subject: Revert spurious delete --- synapse/handlers/message.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'synapse') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 587fbfbe86..9b41c7b205 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -391,6 +391,10 @@ class EventCreationHandler(object): if self._block_events_without_consent_error is None: return + # exempt AS users from needing consent + if requester.app_service is not None: + return + user_id = requester.user.to_string() # exempt the system notices user -- cgit 1.5.1 From 3ecec5ede2dfaf82f9587b4740d51288d43e7be1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 21 Mar 2019 10:21:15 +0000 Subject: Fix upsert --- synapse/storage/room.py | 1 + 1 file changed, 1 insertion(+) (limited to 'synapse') diff --git a/synapse/storage/room.py b/synapse/storage/room.py index abc9786a99..a979d4860a 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -514,6 +514,7 @@ class RoomStore(RoomWorkerStore, SearchStore): keyvalues={ "room_id": room_id, }, + values={}, insertion_values={ "user_id": user_id, }, -- cgit 1.5.1