From 7ca638c76135d7a0f86f6aa7981554bbe0b7a335 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Apr 2019 20:53:10 +0100 Subject: Clarify logging when PDU signature checking fails --- synapse/federation/federation_base.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'synapse/federation') diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index dfe6b4aa5c..1d641337da 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -269,7 +269,17 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): for p in pdus_to_check_sender ]) + def sender_err(e, pdu_to_check): + logger.warning( + "event id %s: unable to verify signature for sender %s: %s", + pdu_to_check.pdu.event_id, + pdu_to_check.sender_domain, + e, + ) + return e + for p, d in zip(pdus_to_check_sender, more_deferreds): + d.addErrback(sender_err, p) p.deferreds.append(d) # now let's look for events where the sender's domain is different to the @@ -291,7 +301,16 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): for p in pdus_to_check_event_id ]) + def event_err(e, pdu_to_check): + logger.warning( + "event id %s: unable to verify signature for event id domain: %s", + pdu_to_check.pdu.event_id, + e, + ) + return e + for p, d in zip(pdus_to_check_event_id, more_deferreds): + d.addErrback(event_err, p) p.deferreds.append(d) # replace lists of deferreds with single Deferreds -- cgit 1.5.1 From fd8fb32bdd70b0ad68666e0f39a95ac90f9b1c27 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Apr 2019 21:08:12 +0100 Subject: remove extraneous exception logging --- synapse/crypto/keyring.py | 48 +++++++++++++++++++++++------------ synapse/federation/federation_base.py | 4 +-- 2 files changed, 34 insertions(+), 18 deletions(-) (limited to 'synapse/federation') diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index b6d1b4cf0b..d8ba870cca 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -114,38 +114,54 @@ class Keyring(object): server_name. The deferreds run their callbacks in the sentinel logcontext. """ + # a list of VerifyKeyRequests verify_requests = [] + handle = preserve_fn(_handle_key_deferred) - for server_name, json_object in server_and_json: + def process(server_name, json_object): + """Process an entry in the request list + Given a (server_name, json_object) pair from the request list, + adds a key request to verify_requests, and returns a deferred which will + complete or fail (in the sentinel context) when verification completes. + """ key_ids = signature_ids(json_object, server_name) + if not key_ids: - deferred = defer.fail(SynapseError( - 400, - "Not signed by %s" % (server_name, ), - Codes.UNAUTHORIZED, - )) - else: - deferred = defer.Deferred() + return defer.fail( + SynapseError( + 400, + "Not signed by %s" % (server_name,), + Codes.UNAUTHORIZED, + ) + ) logger.debug("Verifying for %s with key_ids %s", server_name, key_ids) + # add the key request to the queue, but don't start it off yet. verify_request = VerifyKeyRequest( - server_name, key_ids, json_object, deferred + server_name, key_ids, json_object, defer.Deferred(), ) - verify_requests.append(verify_request) - run_in_background(self._start_key_lookups, verify_requests) + # now run _handle_key_deferred, which will wait for the key request + # to complete and then do the verification. + # + # We want _handle_key_request to log to the right context, so we + # wrap it with preserve_fn (aka run_in_background) + return handle(verify_request) - # Pass those keys to handle_key_deferred so that the json object - # signatures can be verified - handle = preserve_fn(_handle_key_deferred) - return [ - handle(rq) for rq in verify_requests + results = [ + process(server_name, json_object) + for server_name, json_object in server_and_json ] + if verify_requests: + run_in_background(self._start_key_lookups, verify_requests) + + return results + @defer.inlineCallbacks def _start_key_lookups(self, verify_requests): """Sets off the key fetches for each verify request diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 1d641337da..832e2bdb9b 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -274,7 +274,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): "event id %s: unable to verify signature for sender %s: %s", pdu_to_check.pdu.event_id, pdu_to_check.sender_domain, - e, + e.getErrorMessage(), ) return e @@ -305,7 +305,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): logger.warning( "event id %s: unable to verify signature for event id domain: %s", pdu_to_check.pdu.event_id, - e, + e.getErrorMessage(), ) return e -- cgit 1.5.1 From 837d7f85a9d0a479487cd6205a3982f3981e4276 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 25 Apr 2019 22:17:59 +0100 Subject: more logging improvements --- synapse/federation/federation_base.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'synapse/federation') diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 832e2bdb9b..cffa831d80 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -270,13 +270,14 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): ]) def sender_err(e, pdu_to_check): - logger.warning( - "event id %s: unable to verify signature for sender %s: %s", + errmsg = "event id %s: unable to verify signature for sender %s: %s" % ( pdu_to_check.pdu.event_id, pdu_to_check.sender_domain, e.getErrorMessage(), ) - return e + # XX not really sure if these are the right codes, but they are what + # we've done for ages + raise SynapseError(400, errmsg, Codes.UNAUTHORIZED) for p, d in zip(pdus_to_check_sender, more_deferreds): d.addErrback(sender_err, p) @@ -302,12 +303,14 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): ]) def event_err(e, pdu_to_check): - logger.warning( - "event id %s: unable to verify signature for event id domain: %s", - pdu_to_check.pdu.event_id, - e.getErrorMessage(), + errmsg = ( + "event id %s: unable to verify signature for event id domain: %s" % ( + pdu_to_check.pdu.event_id, + e.getErrorMessage(), + ) ) - return e + # XX as above: not really sure if these are the right codes + raise SynapseError(400, errmsg, Codes.UNAUTHORIZED) for p, d in zip(pdus_to_check_event_id, more_deferreds): d.addErrback(event_err, p) -- cgit 1.5.1 From c0e0740bef0db661abce352afaf6c958e276f11d Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 8 May 2019 18:26:56 +0100 Subject: add options to require an access_token to GET /profile and /publicRooms on CS API (#5083) This commit adds two config options: * `restrict_public_rooms_to_local_users` Requires auth to fetch the public rooms directory through the CS API and disables fetching it through the federation API. * `require_auth_for_profile_requests` When set to `true`, requires that requests to `/profile` over the CS API are authenticated, and only returns the user's profile if the requester shares a room with the profile's owner, as per MSC1301. MSC1301 also specifies a behaviour for federation (only returning the profile if the server asking for it shares a room with the profile's owner), but that's currently really non-trivial to do in a not too expensive way. Next step is writing down a MSC that allows a HS to specify which user sent the profile query. In this implementation, Synapse won't send a profile query over federation if it doesn't believe it already shares a room with the profile's owner, though. Groups have been intentionally omitted from this commit. --- changelog.d/5083.feature | 1 + docs/sample_config.yaml | 14 ++++++ synapse/config/server.py | 27 ++++++++++ synapse/federation/transport/server.py | 10 ++++ synapse/handlers/profile.py | 43 ++++++++++++++++ synapse/rest/client/v1/profile.py | 40 ++++++++++----- synapse/rest/client/v1/room.py | 6 +++ tests/rest/client/v1/test_profile.py | 92 +++++++++++++++++++++++++++++++++- tests/rest/client/v1/test_rooms.py | 32 ++++++++++++ 9 files changed, 252 insertions(+), 13 deletions(-) create mode 100644 changelog.d/5083.feature (limited to 'synapse/federation') diff --git a/changelog.d/5083.feature b/changelog.d/5083.feature new file mode 100644 index 0000000000..55d114b3fe --- /dev/null +++ b/changelog.d/5083.feature @@ -0,0 +1 @@ +Add an configuration option to require authentication on /publicRooms and /profile endpoints. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index b6b9da6e41..bdfc34c6bd 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -69,6 +69,20 @@ pid_file: DATADIR/homeserver.pid # #use_presence: false +# Whether to require authentication to retrieve profile data (avatars, +# display names) of other users through the client API. Defaults to +# 'false'. Note that profile data is also available via the federation +# API, so this setting is of limited value if federation is enabled on +# the server. +# +#require_auth_for_profile_requests: true + +# If set to 'true', requires authentication to access the server's +# public rooms directory through the client API, and forbids any other +# homeserver to fetch it via federation. Defaults to 'false'. +# +#restrict_public_rooms_to_local_users: true + # The GC threshold parameters to pass to `gc.set_threshold`, if defined # #gc_thresholds: [700, 10, 10] diff --git a/synapse/config/server.py b/synapse/config/server.py index 147a976485..8dce75c56a 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -72,6 +72,19 @@ class ServerConfig(Config): # master, potentially causing inconsistency. self.enable_media_repo = config.get("enable_media_repo", True) + # Whether to require authentication to retrieve profile data (avatars, + # display names) of other users through the client API. + self.require_auth_for_profile_requests = config.get( + "require_auth_for_profile_requests", False, + ) + + # If set to 'True', requires authentication to access the server's + # public rooms directory through the client API, and forbids any other + # homeserver to fetch it via federation. + self.restrict_public_rooms_to_local_users = config.get( + "restrict_public_rooms_to_local_users", False, + ) + # whether to enable search. If disabled, new entries will not be inserted # into the search tables and they will not be indexed. Users will receive # errors when attempting to search for messages. @@ -327,6 +340,20 @@ class ServerConfig(Config): # #use_presence: false + # Whether to require authentication to retrieve profile data (avatars, + # display names) of other users through the client API. Defaults to + # 'false'. Note that profile data is also available via the federation + # API, so this setting is of limited value if federation is enabled on + # the server. + # + #require_auth_for_profile_requests: true + + # If set to 'true', requires authentication to access the server's + # public rooms directory through the client API, and forbids any other + # homeserver to fetch it via federation. Defaults to 'false'. + # + #restrict_public_rooms_to_local_users: true + # The GC threshold parameters to pass to `gc.set_threshold`, if defined # #gc_thresholds: [700, 10, 10] diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 452599e1a1..9030eb18c5 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -716,8 +716,17 @@ class PublicRoomList(BaseFederationServlet): PATH = "/publicRooms" + def __init__(self, handler, authenticator, ratelimiter, server_name, deny_access): + super(PublicRoomList, self).__init__( + handler, authenticator, ratelimiter, server_name, + ) + self.deny_access = deny_access + @defer.inlineCallbacks def on_GET(self, origin, content, query): + if self.deny_access: + raise FederationDeniedError(origin) + limit = parse_integer_from_args(query, "limit", 0) since_token = parse_string_from_args(query, "since", None) include_all_networks = parse_boolean_from_args( @@ -1417,6 +1426,7 @@ def register_servlets(hs, resource, authenticator, ratelimiter, servlet_groups=N authenticator=authenticator, ratelimiter=ratelimiter, server_name=hs.hostname, + deny_access=hs.config.restrict_public_rooms_to_local_users, ).register(resource) if "group_server" in servlet_groups: diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index a65c98ff5c..91fc718ff8 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -53,6 +53,7 @@ class BaseProfileHandler(BaseHandler): @defer.inlineCallbacks def get_profile(self, user_id): target_user = UserID.from_string(user_id) + if self.hs.is_mine(target_user): try: displayname = yield self.store.get_profile_displayname( @@ -283,6 +284,48 @@ class BaseProfileHandler(BaseHandler): room_id, str(e) ) + @defer.inlineCallbacks + def check_profile_query_allowed(self, target_user, requester=None): + """Checks whether a profile query is allowed. If the + 'require_auth_for_profile_requests' config flag is set to True and a + 'requester' is provided, the query is only allowed if the two users + share a room. + + Args: + target_user (UserID): The owner of the queried profile. + requester (None|UserID): The user querying for the profile. + + Raises: + SynapseError(403): The two users share no room, or ne user couldn't + be found to be in any room the server is in, and therefore the query + is denied. + """ + # Implementation of MSC1301: don't allow looking up profiles if the + # requester isn't in the same room as the target. We expect requester to + # be None when this function is called outside of a profile query, e.g. + # when building a membership event. In this case, we must allow the + # lookup. + if not self.hs.config.require_auth_for_profile_requests or not requester: + return + + try: + requester_rooms = yield self.store.get_rooms_for_user( + requester.to_string() + ) + target_user_rooms = yield self.store.get_rooms_for_user( + target_user.to_string(), + ) + + # Check if the room lists have no elements in common. + if requester_rooms.isdisjoint(target_user_rooms): + raise SynapseError(403, "Profile isn't available", Codes.FORBIDDEN) + except StoreError as e: + if e.code == 404: + # This likely means that one of the users doesn't exist, + # so we act as if we couldn't find the profile. + raise SynapseError(403, "Profile isn't available", Codes.FORBIDDEN) + raise + class MasterProfileHandler(BaseProfileHandler): PROFILE_UPDATE_MS = 60 * 1000 diff --git a/synapse/rest/client/v1/profile.py b/synapse/rest/client/v1/profile.py index a23edd8fe5..eac1966c5e 100644 --- a/synapse/rest/client/v1/profile.py +++ b/synapse/rest/client/v1/profile.py @@ -31,11 +31,17 @@ class ProfileDisplaynameRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id): + requester_user = None + + if self.hs.config.require_auth_for_profile_requests: + requester = yield self.auth.get_user_by_req(request) + requester_user = requester.user + user = UserID.from_string(user_id) - displayname = yield self.profile_handler.get_displayname( - user, - ) + yield self.profile_handler.check_profile_query_allowed(user, requester_user) + + displayname = yield self.profile_handler.get_displayname(user) ret = {} if displayname is not None: @@ -74,11 +80,17 @@ class ProfileAvatarURLRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id): + requester_user = None + + if self.hs.config.require_auth_for_profile_requests: + requester = yield self.auth.get_user_by_req(request) + requester_user = requester.user + user = UserID.from_string(user_id) - avatar_url = yield self.profile_handler.get_avatar_url( - user, - ) + yield self.profile_handler.check_profile_query_allowed(user, requester_user) + + avatar_url = yield self.profile_handler.get_avatar_url(user) ret = {} if avatar_url is not None: @@ -116,14 +128,18 @@ class ProfileRestServlet(ClientV1RestServlet): @defer.inlineCallbacks def on_GET(self, request, user_id): + requester_user = None + + if self.hs.config.require_auth_for_profile_requests: + requester = yield self.auth.get_user_by_req(request) + requester_user = requester.user + user = UserID.from_string(user_id) - displayname = yield self.profile_handler.get_displayname( - user, - ) - avatar_url = yield self.profile_handler.get_avatar_url( - user, - ) + yield self.profile_handler.check_profile_query_allowed(user, requester_user) + + displayname = yield self.profile_handler.get_displayname(user) + avatar_url = yield self.profile_handler.get_avatar_url(user) ret = {} if displayname is not None: diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 48da4d557f..fab04965cb 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -301,6 +301,12 @@ class PublicRoomListRestServlet(ClientV1RestServlet): try: yield self.auth.get_user_by_req(request, allow_guest=True) except AuthError as e: + # Option to allow servers to require auth when accessing + # /publicRooms via CS API. This is especially helpful in private + # federations. + if self.hs.config.restrict_public_rooms_to_local_users: + raise + # We allow people to not be authed if they're just looking at our # room list, but require auth when we proxy the request. # In both cases we call the auth function, as that has the side diff --git a/tests/rest/client/v1/test_profile.py b/tests/rest/client/v1/test_profile.py index 1eab9c3bdb..5d13de11e6 100644 --- a/tests/rest/client/v1/test_profile.py +++ b/tests/rest/client/v1/test_profile.py @@ -20,7 +20,7 @@ from twisted.internet import defer import synapse.types from synapse.api.errors import AuthError, SynapseError -from synapse.rest.client.v1 import profile +from synapse.rest.client.v1 import admin, login, profile, room from tests import unittest @@ -42,6 +42,7 @@ class ProfileTestCase(unittest.TestCase): "set_displayname", "get_avatar_url", "set_avatar_url", + "check_profile_query_allowed", ] ) @@ -155,3 +156,92 @@ class ProfileTestCase(unittest.TestCase): self.assertEquals(mocked_set.call_args[0][0].localpart, "1234ABCD") self.assertEquals(mocked_set.call_args[0][1].user.localpart, "1234ABCD") self.assertEquals(mocked_set.call_args[0][2], "http://my.server/pic.gif") + + +class ProfilesRestrictedTestCase(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + login.register_servlets, + profile.register_servlets, + room.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + + config = self.default_config() + config.require_auth_for_profile_requests = True + self.hs = self.setup_test_homeserver(config=config) + + return self.hs + + def prepare(self, reactor, clock, hs): + # User owning the requested profile. + self.owner = self.register_user("owner", "pass") + self.owner_tok = self.login("owner", "pass") + self.profile_url = "/profile/%s" % (self.owner) + + # User requesting the profile. + self.requester = self.register_user("requester", "pass") + self.requester_tok = self.login("requester", "pass") + + self.room_id = self.helper.create_room_as(self.owner, tok=self.owner_tok) + + def test_no_auth(self): + self.try_fetch_profile(401) + + def test_not_in_shared_room(self): + self.ensure_requester_left_room() + + self.try_fetch_profile(403, access_token=self.requester_tok) + + def test_in_shared_room(self): + self.ensure_requester_left_room() + + self.helper.join( + room=self.room_id, + user=self.requester, + tok=self.requester_tok, + ) + + self.try_fetch_profile(200, self.requester_tok) + + def try_fetch_profile(self, expected_code, access_token=None): + self.request_profile( + expected_code, + access_token=access_token + ) + + self.request_profile( + expected_code, + url_suffix="/displayname", + access_token=access_token, + ) + + self.request_profile( + expected_code, + url_suffix="/avatar_url", + access_token=access_token, + ) + + def request_profile(self, expected_code, url_suffix="", access_token=None): + request, channel = self.make_request( + "GET", + self.profile_url + url_suffix, + access_token=access_token, + ) + self.render(request) + self.assertEqual(channel.code, expected_code, channel.result) + + def ensure_requester_left_room(self): + try: + self.helper.leave( + room=self.room_id, + user=self.requester, + tok=self.requester_tok, + ) + except AssertionError: + # We don't care whether the leave request didn't return a 200 (e.g. + # if the user isn't already in the room), because we only want to + # make sure the user isn't in the room. + pass diff --git a/tests/rest/client/v1/test_rooms.py b/tests/rest/client/v1/test_rooms.py index 521ac80f9a..28fbf6ae52 100644 --- a/tests/rest/client/v1/test_rooms.py +++ b/tests/rest/client/v1/test_rooms.py @@ -904,3 +904,35 @@ class RoomSearchTestCase(unittest.HomeserverTestCase): self.assertEqual( context["profile_info"][self.other_user_id]["displayname"], "otheruser" ) + + +class PublicRoomsRestrictedTestCase(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def make_homeserver(self, reactor, clock): + + self.url = b"/_matrix/client/r0/publicRooms" + + config = self.default_config() + config.restrict_public_rooms_to_local_users = True + self.hs = self.setup_test_homeserver(config=config) + + return self.hs + + def test_restricted_no_auth(self): + request, channel = self.make_request("GET", self.url) + self.render(request) + self.assertEqual(channel.code, 401, channel.result) + + def test_restricted_auth(self): + self.register_user("user", "pass") + tok = self.login("user", "pass") + + request, channel = self.make_request("GET", self.url, access_token=tok) + self.render(request) + self.assertEqual(channel.code, 200, channel.result) -- cgit 1.5.1 From 11ea16777f52264f0a1d6f4db5b7db5c6c147523 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Thu, 9 May 2019 12:01:41 +0200 Subject: Limit the number of EDUs in transactions to 100 as expected by receiver (#5138) Fixes #3951. --- changelog.d/5138.bugfix | 1 + synapse/federation/sender/per_destination_queue.py | 56 ++++++++++++---------- synapse/storage/deviceinbox.py | 2 +- 3 files changed, 32 insertions(+), 27 deletions(-) create mode 100644 changelog.d/5138.bugfix (limited to 'synapse/federation') diff --git a/changelog.d/5138.bugfix b/changelog.d/5138.bugfix new file mode 100644 index 0000000000..c1945a8eb2 --- /dev/null +++ b/changelog.d/5138.bugfix @@ -0,0 +1 @@ +Limit the number of EDUs in transactions to 100 as expected by synapse. Thanks to @superboum for this work! diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index be99211003..4d96f026c6 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -33,6 +33,9 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage import UserPresenceState from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter +# This is defined in the Matrix spec and enforced by the receiver. +MAX_EDUS_PER_TRANSACTION = 100 + logger = logging.getLogger(__name__) @@ -197,7 +200,8 @@ class PerDestinationQueue(object): pending_pdus = [] while True: device_message_edus, device_stream_id, dev_list_id = ( - yield self._get_new_device_messages() + # We have to keep 2 free slots for presence and rr_edus + yield self._get_new_device_messages(MAX_EDUS_PER_TRANSACTION - 2) ) # BEGIN CRITICAL SECTION @@ -216,19 +220,9 @@ class PerDestinationQueue(object): pending_edus = [] - pending_edus.extend(self._get_rr_edus(force_flush=False)) - # We can only include at most 100 EDUs per transactions - pending_edus.extend(self._pop_pending_edus(100 - len(pending_edus))) - - pending_edus.extend( - self._pending_edus_keyed.values() - ) - - self._pending_edus_keyed = {} - - pending_edus.extend(device_message_edus) - + # rr_edus and pending_presence take at most one slot each + pending_edus.extend(self._get_rr_edus(force_flush=False)) pending_presence = self._pending_presence self._pending_presence = {} if pending_presence: @@ -248,6 +242,12 @@ class PerDestinationQueue(object): ) ) + pending_edus.extend(device_message_edus) + pending_edus.extend(self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus))) + while len(pending_edus) < MAX_EDUS_PER_TRANSACTION and self._pending_edus_keyed: + _, val = self._pending_edus_keyed.popitem() + pending_edus.append(val) + if pending_pdus: logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", self._destination, len(pending_pdus)) @@ -259,7 +259,7 @@ class PerDestinationQueue(object): # if we've decided to send a transaction anyway, and we have room, we # may as well send any pending RRs - if len(pending_edus) < 100: + if len(pending_edus) < MAX_EDUS_PER_TRANSACTION: pending_edus.extend(self._get_rr_edus(force_flush=True)) # END CRITICAL SECTION @@ -346,33 +346,37 @@ class PerDestinationQueue(object): return pending_edus @defer.inlineCallbacks - def _get_new_device_messages(self): - last_device_stream_id = self._last_device_stream_id - to_device_stream_id = self._store.get_to_device_stream_token() - contents, stream_id = yield self._store.get_new_device_msgs_for_remote( - self._destination, last_device_stream_id, to_device_stream_id + def _get_new_device_messages(self, limit): + last_device_list = self._last_device_list_stream_id + # Will return at most 20 entries + now_stream_id, results = yield self._store.get_devices_by_remote( + self._destination, last_device_list ) edus = [ Edu( origin=self._server_name, destination=self._destination, - edu_type="m.direct_to_device", + edu_type="m.device_list_update", content=content, ) - for content in contents + for content in results ] - last_device_list = self._last_device_list_stream_id - now_stream_id, results = yield self._store.get_devices_by_remote( - self._destination, last_device_list + assert len(edus) <= limit, "get_devices_by_remote returned too many EDUs" + + last_device_stream_id = self._last_device_stream_id + to_device_stream_id = self._store.get_to_device_stream_token() + contents, stream_id = yield self._store.get_new_device_msgs_for_remote( + self._destination, last_device_stream_id, to_device_stream_id, limit - len(edus) ) edus.extend( Edu( origin=self._server_name, destination=self._destination, - edu_type="m.device_list_update", + edu_type="m.direct_to_device", content=content, ) - for content in results + for content in contents ) + defer.returnValue((edus, stream_id, now_stream_id)) diff --git a/synapse/storage/deviceinbox.py b/synapse/storage/deviceinbox.py index fed4ea3610..9b0a99cb49 100644 --- a/synapse/storage/deviceinbox.py +++ b/synapse/storage/deviceinbox.py @@ -118,7 +118,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): defer.returnValue(count) def get_new_device_msgs_for_remote( - self, destination, last_stream_id, current_stream_id, limit=100 + self, destination, last_stream_id, current_stream_id, limit ): """ Args: -- cgit 1.5.1 From 130f932cbc5ca5477af263c5290e0c9a51b5150c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 9 May 2019 16:27:02 +0100 Subject: Run `black` on per_destination_queue ... mostly to fix pep8 fails --- synapse/federation/sender/per_destination_queue.py | 74 ++++++++++++---------- 1 file changed, 39 insertions(+), 35 deletions(-) (limited to 'synapse/federation') diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 4d96f026c6..fae8bea392 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -40,8 +40,7 @@ logger = logging.getLogger(__name__) sent_edus_counter = Counter( - "synapse_federation_client_sent_edus", - "Total number of EDUs successfully sent", + "synapse_federation_client_sent_edus", "Total number of EDUs successfully sent" ) sent_edus_by_type = Counter( @@ -61,6 +60,7 @@ class PerDestinationQueue(object): destination (str): the server_name of the destination that we are managing transmission for. """ + def __init__(self, hs, transaction_manager, destination): self._server_name = hs.hostname self._clock = hs.get_clock() @@ -71,17 +71,17 @@ class PerDestinationQueue(object): self.transmission_loop_running = False # a list of tuples of (pending pdu, order) - self._pending_pdus = [] # type: list[tuple[EventBase, int]] - self._pending_edus = [] # type: list[Edu] + self._pending_pdus = [] # type: list[tuple[EventBase, int]] + self._pending_edus = [] # type: list[Edu] # Pending EDUs by their "key". Keyed EDUs are EDUs that get clobbered # based on their key (e.g. typing events by room_id) # Map of (edu_type, key) -> Edu - self._pending_edus_keyed = {} # type: dict[tuple[str, str], Edu] + self._pending_edus_keyed = {} # type: dict[tuple[str, str], Edu] # Map of user_id -> UserPresenceState of pending presence to be sent to this # destination - self._pending_presence = {} # type: dict[str, UserPresenceState] + self._pending_presence = {} # type: dict[str, UserPresenceState] # room_id -> receipt_type -> user_id -> receipt_dict self._pending_rrs = {} @@ -123,9 +123,7 @@ class PerDestinationQueue(object): Args: states (iterable[UserPresenceState]): presence to send """ - self._pending_presence.update({ - state.user_id: state for state in states - }) + self._pending_presence.update({state.user_id: state for state in states}) self.attempt_new_transaction() def queue_read_receipt(self, receipt): @@ -135,14 +133,9 @@ class PerDestinationQueue(object): Args: receipt (synapse.api.receipt_info.ReceiptInfo): receipt to be queued """ - self._pending_rrs.setdefault( - receipt.room_id, {}, - ).setdefault( + self._pending_rrs.setdefault(receipt.room_id, {}).setdefault( receipt.receipt_type, {} - )[receipt.user_id] = { - "event_ids": receipt.event_ids, - "data": receipt.data, - } + )[receipt.user_id] = {"event_ids": receipt.event_ids, "data": receipt.data} def flush_read_receipts_for_room(self, room_id): # if we don't have any read-receipts for this room, it may be that we've already @@ -173,10 +166,7 @@ class PerDestinationQueue(object): # request at which point pending_pdus just keeps growing. # we need application-layer timeouts of some flavour of these # requests - logger.debug( - "TX [%s] Transaction already in progress", - self._destination - ) + logger.debug("TX [%s] Transaction already in progress", self._destination) return logger.debug("TX [%s] Starting transaction loop", self._destination) @@ -243,14 +233,22 @@ class PerDestinationQueue(object): ) pending_edus.extend(device_message_edus) - pending_edus.extend(self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus))) - while len(pending_edus) < MAX_EDUS_PER_TRANSACTION and self._pending_edus_keyed: + pending_edus.extend( + self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus)) + ) + while ( + len(pending_edus) < MAX_EDUS_PER_TRANSACTION + and self._pending_edus_keyed + ): _, val = self._pending_edus_keyed.popitem() pending_edus.append(val) if pending_pdus: - logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", - self._destination, len(pending_pdus)) + logger.debug( + "TX [%s] len(pending_pdus_by_dest[dest]) = %d", + self._destination, + len(pending_pdus), + ) if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", self._destination) @@ -303,22 +301,25 @@ class PerDestinationQueue(object): except HttpResponseException as e: logger.warning( "TX [%s] Received %d response to transaction: %s", - self._destination, e.code, e, + self._destination, + e.code, + e, ) except RequestSendFailed as e: - logger.warning("TX [%s] Failed to send transaction: %s", self._destination, e) + logger.warning( + "TX [%s] Failed to send transaction: %s", self._destination, e + ) for p, _ in pending_pdus: - logger.info("Failed to send event %s to %s", p.event_id, - self._destination) + logger.info( + "Failed to send event %s to %s", p.event_id, self._destination + ) except Exception: - logger.exception( - "TX [%s] Failed to send transaction", - self._destination, - ) + logger.exception("TX [%s] Failed to send transaction", self._destination) for p, _ in pending_pdus: - logger.info("Failed to send event %s to %s", p.event_id, - self._destination) + logger.info( + "Failed to send event %s to %s", p.event_id, self._destination + ) finally: # We want to be *very* sure we clear this after we stop processing self.transmission_loop_running = False @@ -367,7 +368,10 @@ class PerDestinationQueue(object): last_device_stream_id = self._last_device_stream_id to_device_stream_id = self._store.get_to_device_stream_token() contents, stream_id = yield self._store.get_new_device_msgs_for_remote( - self._destination, last_device_stream_id, to_device_stream_id, limit - len(edus) + self._destination, + last_device_stream_id, + to_device_stream_id, + limit - len(edus), ) edus.extend( Edu( -- cgit 1.5.1 From f1e5b413886ba4d9d0a16b028dba89c4a5cb56ac Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 15 May 2019 12:06:04 -0500 Subject: Make all the rate limiting options more consistent (#5181) --- changelog.d/5181.feature | 1 + docs/sample_config.yaml | 53 +++++++------- synapse/config/ratelimiting.py | 115 ++++++++++++++++++++----------- synapse/federation/transport/server.py | 6 +- synapse/handlers/_base.py | 4 +- synapse/rest/client/v2_alpha/register.py | 23 ++++--- synapse/util/ratelimitutils.py | 47 ++++--------- tests/utils.py | 20 +++--- 8 files changed, 138 insertions(+), 131 deletions(-) create mode 100644 changelog.d/5181.feature (limited to 'synapse/federation') diff --git a/changelog.d/5181.feature b/changelog.d/5181.feature new file mode 100644 index 0000000000..5ce13aa2ea --- /dev/null +++ b/changelog.d/5181.feature @@ -0,0 +1 @@ +Ratelimiting configuration for clients sending messages and the federation server has been altered to match login ratelimiting. The old configuration names will continue working. Check the sample config for details of the new names. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index c4e5c4cf39..09ee0e8984 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -446,21 +446,15 @@ log_config: "CONFDIR/SERVERNAME.log.config" ## Ratelimiting ## -# Number of messages a client can send per second -# -#rc_messages_per_second: 0.2 - -# Number of message a client can send before being throttled -# -#rc_message_burst_count: 10.0 - -# Ratelimiting settings for registration and login. +# Ratelimiting settings for client actions (registration, login, messaging). # # Each ratelimiting configuration is made of two parameters: # - per_second: number of requests a client can send per second. # - burst_count: number of requests a client can send before being throttled. # # Synapse currently uses the following configurations: +# - one for messages that ratelimits sending based on the account the client +# is using # - one for registration that ratelimits registration requests based on the # client's IP address. # - one for login that ratelimits login requests based on the client's IP @@ -473,6 +467,10 @@ log_config: "CONFDIR/SERVERNAME.log.config" # # The defaults are as shown below. # +#rc_message: +# per_second: 0.2 +# burst_count: 10 +# #rc_registration: # per_second: 0.17 # burst_count: 3 @@ -488,29 +486,28 @@ log_config: "CONFDIR/SERVERNAME.log.config" # per_second: 0.17 # burst_count: 3 -# The federation window size in milliseconds -# -#federation_rc_window_size: 1000 -# The number of federation requests from a single server in a window -# before the server will delay processing the request. +# Ratelimiting settings for incoming federation # -#federation_rc_sleep_limit: 10 - -# The duration in milliseconds to delay processing events from -# remote servers by if they go over the sleep limit. -# -#federation_rc_sleep_delay: 500 - -# The maximum number of concurrent federation requests allowed -# from a single server +# The rc_federation configuration is made up of the following settings: +# - window_size: window size in milliseconds +# - sleep_limit: number of federation requests from a single server in +# a window before the server will delay processing the request. +# - sleep_delay: duration in milliseconds to delay processing events +# from remote servers by if they go over the sleep limit. +# - reject_limit: maximum number of concurrent federation requests +# allowed from a single server +# - concurrent: number of federation requests to concurrently process +# from a single server # -#federation_rc_reject_limit: 50 - -# The number of federation requests to concurrently process from a -# single server +# The defaults are as shown below. # -#federation_rc_concurrent: 3 +#rc_federation: +# window_size: 1000 +# sleep_limit: 10 +# sleep_delay: 500 +# reject_limit: 50 +# concurrent: 3 # Target outgoing federation transaction frequency for sending read-receipts, # per-room. diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 5a68399e63..5a9adac480 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -16,16 +16,56 @@ from ._base import Config class RateLimitConfig(object): - def __init__(self, config): - self.per_second = config.get("per_second", 0.17) - self.burst_count = config.get("burst_count", 3.0) + def __init__(self, config, defaults={"per_second": 0.17, "burst_count": 3.0}): + self.per_second = config.get("per_second", defaults["per_second"]) + self.burst_count = config.get("burst_count", defaults["burst_count"]) -class RatelimitConfig(Config): +class FederationRateLimitConfig(object): + _items_and_default = { + "window_size": 10000, + "sleep_limit": 10, + "sleep_delay": 500, + "reject_limit": 50, + "concurrent": 3, + } + + def __init__(self, **kwargs): + for i in self._items_and_default.keys(): + setattr(self, i, kwargs.get(i) or self._items_and_default[i]) + +class RatelimitConfig(Config): def read_config(self, config): - self.rc_messages_per_second = config.get("rc_messages_per_second", 0.2) - self.rc_message_burst_count = config.get("rc_message_burst_count", 10.0) + + # Load the new-style messages config if it exists. Otherwise fall back + # to the old method. + if "rc_message" in config: + self.rc_message = RateLimitConfig( + config["rc_message"], defaults={"per_second": 0.2, "burst_count": 10.0} + ) + else: + self.rc_message = RateLimitConfig( + { + "per_second": config.get("rc_messages_per_second", 0.2), + "burst_count": config.get("rc_message_burst_count", 10.0), + } + ) + + # Load the new-style federation config, if it exists. Otherwise, fall + # back to the old method. + if "federation_rc" in config: + self.rc_federation = FederationRateLimitConfig(**config["rc_federation"]) + else: + self.rc_federation = FederationRateLimitConfig( + **{ + "window_size": config.get("federation_rc_window_size"), + "sleep_limit": config.get("federation_rc_sleep_limit"), + "sleep_delay": config.get("federation_rc_sleep_delay"), + "reject_limit": config.get("federation_rc_reject_limit"), + "concurrent": config.get("federation_rc_concurrent"), + } + ) self.rc_registration = RateLimitConfig(config.get("rc_registration", {})) @@ -33,38 +73,26 @@ class RatelimitConfig(Config): self.rc_login_address = RateLimitConfig(rc_login_config.get("address", {})) self.rc_login_account = RateLimitConfig(rc_login_config.get("account", {})) self.rc_login_failed_attempts = RateLimitConfig( - rc_login_config.get("failed_attempts", {}), + rc_login_config.get("failed_attempts", {}) ) - self.federation_rc_window_size = config.get("federation_rc_window_size", 1000) - self.federation_rc_sleep_limit = config.get("federation_rc_sleep_limit", 10) - self.federation_rc_sleep_delay = config.get("federation_rc_sleep_delay", 500) - self.federation_rc_reject_limit = config.get("federation_rc_reject_limit", 50) - self.federation_rc_concurrent = config.get("federation_rc_concurrent", 3) - self.federation_rr_transactions_per_room_per_second = config.get( - "federation_rr_transactions_per_room_per_second", 50, + "federation_rr_transactions_per_room_per_second", 50 ) def default_config(self, **kwargs): return """\ ## Ratelimiting ## - # Number of messages a client can send per second - # - #rc_messages_per_second: 0.2 - - # Number of message a client can send before being throttled - # - #rc_message_burst_count: 10.0 - - # Ratelimiting settings for registration and login. + # Ratelimiting settings for client actions (registration, login, messaging). # # Each ratelimiting configuration is made of two parameters: # - per_second: number of requests a client can send per second. # - burst_count: number of requests a client can send before being throttled. # # Synapse currently uses the following configurations: + # - one for messages that ratelimits sending based on the account the client + # is using # - one for registration that ratelimits registration requests based on the # client's IP address. # - one for login that ratelimits login requests based on the client's IP @@ -77,6 +105,10 @@ class RatelimitConfig(Config): # # The defaults are as shown below. # + #rc_message: + # per_second: 0.2 + # burst_count: 10 + # #rc_registration: # per_second: 0.17 # burst_count: 3 @@ -92,29 +124,28 @@ class RatelimitConfig(Config): # per_second: 0.17 # burst_count: 3 - # The federation window size in milliseconds - # - #federation_rc_window_size: 1000 - - # The number of federation requests from a single server in a window - # before the server will delay processing the request. - # - #federation_rc_sleep_limit: 10 - # The duration in milliseconds to delay processing events from - # remote servers by if they go over the sleep limit. + # Ratelimiting settings for incoming federation # - #federation_rc_sleep_delay: 500 - - # The maximum number of concurrent federation requests allowed - # from a single server + # The rc_federation configuration is made up of the following settings: + # - window_size: window size in milliseconds + # - sleep_limit: number of federation requests from a single server in + # a window before the server will delay processing the request. + # - sleep_delay: duration in milliseconds to delay processing events + # from remote servers by if they go over the sleep limit. + # - reject_limit: maximum number of concurrent federation requests + # allowed from a single server + # - concurrent: number of federation requests to concurrently process + # from a single server # - #federation_rc_reject_limit: 50 - - # The number of federation requests to concurrently process from a - # single server + # The defaults are as shown below. # - #federation_rc_concurrent: 3 + #rc_federation: + # window_size: 1000 + # sleep_limit: 10 + # sleep_delay: 500 + # reject_limit: 50 + # concurrent: 3 # Target outgoing federation transaction frequency for sending read-receipts, # per-room. diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 9030eb18c5..385eda2dca 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -63,11 +63,7 @@ class TransportLayerServer(JsonResource): self.authenticator = Authenticator(hs) self.ratelimiter = FederationRateLimiter( self.clock, - window_size=hs.config.federation_rc_window_size, - sleep_limit=hs.config.federation_rc_sleep_limit, - sleep_msec=hs.config.federation_rc_sleep_delay, - reject_limit=hs.config.federation_rc_reject_limit, - concurrent_requests=hs.config.federation_rc_concurrent, + config=hs.config.rc_federation, ) self.register_servlets() diff --git a/synapse/handlers/_base.py b/synapse/handlers/_base.py index ac09d03ba9..dca337ec61 100644 --- a/synapse/handlers/_base.py +++ b/synapse/handlers/_base.py @@ -90,8 +90,8 @@ class BaseHandler(object): messages_per_second = override.messages_per_second burst_count = override.burst_count else: - messages_per_second = self.hs.config.rc_messages_per_second - burst_count = self.hs.config.rc_message_burst_count + messages_per_second = self.hs.config.rc_message.per_second + burst_count = self.hs.config.rc_message.burst_count allowed, time_allowed = self.ratelimiter.can_do_action( user_id, time_now, diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index dc3e265bcd..3d045880b9 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -31,6 +31,7 @@ from synapse.api.errors import ( SynapseError, UnrecognizedRequestError, ) +from synapse.config.ratelimiting import FederationRateLimitConfig from synapse.config.server import is_threepid_reserved from synapse.http.servlet import ( RestServlet, @@ -153,16 +154,18 @@ class UsernameAvailabilityRestServlet(RestServlet): self.registration_handler = hs.get_registration_handler() self.ratelimiter = FederationRateLimiter( hs.get_clock(), - # Time window of 2s - window_size=2000, - # Artificially delay requests if rate > sleep_limit/window_size - sleep_limit=1, - # Amount of artificial delay to apply - sleep_msec=1000, - # Error with 429 if more than reject_limit requests are queued - reject_limit=1, - # Allow 1 request at a time - concurrent_requests=1, + FederationRateLimitConfig( + # Time window of 2s + window_size=2000, + # Artificially delay requests if rate > sleep_limit/window_size + sleep_limit=1, + # Amount of artificial delay to apply + sleep_msec=1000, + # Error with 429 if more than reject_limit requests are queued + reject_limit=1, + # Allow 1 request at a time + concurrent_requests=1, + ) ) @defer.inlineCallbacks diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 7deb38f2a7..b146d137f4 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -30,31 +30,14 @@ logger = logging.getLogger(__name__) class FederationRateLimiter(object): - def __init__(self, clock, window_size, sleep_limit, sleep_msec, - reject_limit, concurrent_requests): + def __init__(self, clock, config): """ Args: clock (Clock) - window_size (int): The window size in milliseconds. - sleep_limit (int): The number of requests received in the last - `window_size` milliseconds before we artificially start - delaying processing of requests. - sleep_msec (int): The number of milliseconds to delay processing - of incoming requests by. - reject_limit (int): The maximum number of requests that are can be - queued for processing before we start rejecting requests with - a 429 Too Many Requests response. - concurrent_requests (int): The number of concurrent requests to - process. + config (FederationRateLimitConfig) """ self.clock = clock - - self.window_size = window_size - self.sleep_limit = sleep_limit - self.sleep_msec = sleep_msec - self.reject_limit = reject_limit - self.concurrent_requests = concurrent_requests - + self._config = config self.ratelimiters = {} def ratelimit(self, host): @@ -76,25 +59,25 @@ class FederationRateLimiter(object): host, _PerHostRatelimiter( clock=self.clock, - window_size=self.window_size, - sleep_limit=self.sleep_limit, - sleep_msec=self.sleep_msec, - reject_limit=self.reject_limit, - concurrent_requests=self.concurrent_requests, + config=self._config, ) ).ratelimit() class _PerHostRatelimiter(object): - def __init__(self, clock, window_size, sleep_limit, sleep_msec, - reject_limit, concurrent_requests): + def __init__(self, clock, config): + """ + Args: + clock (Clock) + config (FederationRateLimitConfig) + """ self.clock = clock - self.window_size = window_size - self.sleep_limit = sleep_limit - self.sleep_sec = sleep_msec / 1000.0 - self.reject_limit = reject_limit - self.concurrent_requests = concurrent_requests + self.window_size = config.window_size + self.sleep_limit = config.sleep_limit + self.sleep_sec = config.sleep_delay / 1000.0 + self.reject_limit = config.reject_limit + self.concurrent_requests = config.concurrent # request_id objects for requests which have been slept self.sleeping_requests = set() diff --git a/tests/utils.py b/tests/utils.py index f38533a0c7..200c1ceabe 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -134,10 +134,6 @@ def default_config(name, parse=False): "email_enable_notifs": False, "block_non_admin_invites": False, "federation_domain_whitelist": None, - "federation_rc_reject_limit": 10, - "federation_rc_sleep_limit": 10, - "federation_rc_sleep_delay": 100, - "federation_rc_concurrent": 10, "filter_timeline_limit": 5000, "user_directory_search_all_users": False, "user_consent_server_notice_content": None, @@ -156,8 +152,13 @@ def default_config(name, parse=False): "mau_stats_only": False, "mau_limits_reserved_threepids": [], "admin_contact": None, - "rc_messages_per_second": 10000, - "rc_message_burst_count": 10000, + "rc_federation": { + "reject_limit": 10, + "sleep_limit": 10, + "sleep_delay": 10, + "concurrent": 10, + }, + "rc_message": {"per_second": 10000, "burst_count": 10000}, "rc_registration": {"per_second": 10000, "burst_count": 10000}, "rc_login": { "address": {"per_second": 10000, "burst_count": 10000}, @@ -375,12 +376,7 @@ def register_federation_servlets(hs, resource): resource=resource, authenticator=federation_server.Authenticator(hs), ratelimiter=FederationRateLimiter( - hs.get_clock(), - window_size=hs.config.federation_rc_window_size, - sleep_limit=hs.config.federation_rc_sleep_limit, - sleep_msec=hs.config.federation_rc_sleep_delay, - reject_limit=hs.config.federation_rc_reject_limit, - concurrent_requests=hs.config.federation_rc_concurrent, + hs.get_clock(), config=hs.config.rc_federation ), ) -- cgit 1.5.1 From 04d53794d6848cca2567d67f494ba8405d0bf1cf Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 21 May 2019 13:47:25 +0100 Subject: Fix error handling for rooms whose versions are unknown. (#5219) If we remove support for a particular room version, we should behave more gracefully. This should make client requests fail with a 400 rather than a 500, and will ignore individiual PDUs in a federation transaction, rather than the whole transaction. --- changelog.d/5219.bugfix | 1 + synapse/api/errors.py | 16 +++++++++++++++- synapse/events/__init__.py | 8 ++++++-- synapse/events/builder.py | 6 +++--- synapse/federation/federation_server.py | 14 +++++++++++++- 5 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 changelog.d/5219.bugfix (limited to 'synapse/federation') diff --git a/changelog.d/5219.bugfix b/changelog.d/5219.bugfix new file mode 100644 index 0000000000..c1e17adc5d --- /dev/null +++ b/changelog.d/5219.bugfix @@ -0,0 +1 @@ +Fix error handling for rooms whose versions are unknown. diff --git a/synapse/api/errors.py b/synapse/api/errors.py index ff89259dec..e91697049c 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -328,9 +328,23 @@ class RoomKeysVersionError(SynapseError): self.current_version = current_version +class UnsupportedRoomVersionError(SynapseError): + """The client's request to create a room used a room version that the server does + not support.""" + def __init__(self): + super(UnsupportedRoomVersionError, self).__init__( + code=400, + msg="Homeserver does not support this room version", + errcode=Codes.UNSUPPORTED_ROOM_VERSION, + ) + + class IncompatibleRoomVersionError(SynapseError): - """A server is trying to join a room whose version it does not support.""" + """A server is trying to join a room whose version it does not support. + Unlike UnsupportedRoomVersionError, it is specific to the case of the make_join + failing. + """ def __init__(self, room_version): super(IncompatibleRoomVersionError, self).__init__( code=400, diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index badeb903fc..1edd19cc13 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -21,6 +21,7 @@ import six from unpaddedbase64 import encode_base64 +from synapse.api.errors import UnsupportedRoomVersionError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, EventFormatVersions from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze @@ -369,12 +370,15 @@ def room_version_to_event_format(room_version): Returns: int + + Raises: + UnsupportedRoomVersionError if the room version is unknown """ v = KNOWN_ROOM_VERSIONS.get(room_version) if not v: - # We should have already checked version, so this should not happen - raise RuntimeError("Unrecognized room version %s" % (room_version,)) + # this can happen if support is withdrawn for a room version + raise UnsupportedRoomVersionError() return v.event_format diff --git a/synapse/events/builder.py b/synapse/events/builder.py index fba27177c7..1fe995f212 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -18,6 +18,7 @@ import attr from twisted.internet import defer from synapse.api.constants import MAX_DEPTH +from synapse.api.errors import UnsupportedRoomVersionError from synapse.api.room_versions import ( KNOWN_EVENT_FORMAT_VERSIONS, KNOWN_ROOM_VERSIONS, @@ -178,9 +179,8 @@ class EventBuilderFactory(object): """ v = KNOWN_ROOM_VERSIONS.get(room_version) if not v: - raise Exception( - "No event format defined for version %r" % (room_version,) - ) + # this can happen if support is withdrawn for a room version + raise UnsupportedRoomVersionError() return self.for_room_version(v, key_values) def for_room_version(self, room_version, key_values): diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index df60828dba..4c28c1dc3c 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -33,6 +33,7 @@ from synapse.api.errors import ( IncompatibleRoomVersionError, NotFoundError, SynapseError, + UnsupportedRoomVersionError, ) from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.crypto.event_signing import compute_event_signature @@ -198,11 +199,22 @@ class FederationServer(FederationBase): try: room_version = yield self.store.get_room_version(room_id) - format_ver = room_version_to_event_format(room_version) except NotFoundError: logger.info("Ignoring PDU for unknown room_id: %s", room_id) continue + try: + format_ver = room_version_to_event_format(room_version) + except UnsupportedRoomVersionError: + # this can happen if support for a given room version is withdrawn, + # so that we still get events for said room. + logger.info( + "Ignoring PDU for room %s with unknown version %s", + room_id, + room_version, + ) + continue + event = event_from_pdu_json(p, format_ver) pdus_by_room.setdefault(room_id, []).append(event) -- cgit 1.5.1 From 46c8f7a5170d04dfa6ad02c69667d4aa48635231 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 30 May 2019 01:47:16 +1000 Subject: Implement the SHHS complexity API (#5216) --- changelog.d/5216.misc | 1 + synapse/api/urls.py | 1 + synapse/federation/transport/server.py | 31 +++++++++++- synapse/rest/admin/__init__.py | 12 +++-- synapse/storage/events_worker.py | 50 ++++++++++++++++++- tests/federation/test_complexity.py | 90 ++++++++++++++++++++++++++++++++++ 6 files changed, 180 insertions(+), 5 deletions(-) create mode 100644 changelog.d/5216.misc create mode 100644 tests/federation/test_complexity.py (limited to 'synapse/federation') diff --git a/changelog.d/5216.misc b/changelog.d/5216.misc new file mode 100644 index 0000000000..dbfa29475f --- /dev/null +++ b/changelog.d/5216.misc @@ -0,0 +1 @@ +Synapse will now serve the experimental "room complexity" API endpoint. diff --git a/synapse/api/urls.py b/synapse/api/urls.py index 3c6bddff7a..e16c386a14 100644 --- a/synapse/api/urls.py +++ b/synapse/api/urls.py @@ -26,6 +26,7 @@ CLIENT_API_PREFIX = "/_matrix/client" FEDERATION_PREFIX = "/_matrix/federation" FEDERATION_V1_PREFIX = FEDERATION_PREFIX + "/v1" FEDERATION_V2_PREFIX = FEDERATION_PREFIX + "/v2" +FEDERATION_UNSTABLE_PREFIX = FEDERATION_PREFIX + "/unstable" STATIC_PREFIX = "/_matrix/static" WEB_CLIENT_PREFIX = "/_matrix/client" CONTENT_REPO_PREFIX = "/_matrix/content" diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 385eda2dca..d0efc4e0d3 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -23,7 +23,11 @@ from twisted.internet import defer import synapse from synapse.api.errors import Codes, FederationDeniedError, SynapseError from synapse.api.room_versions import RoomVersions -from synapse.api.urls import FEDERATION_V1_PREFIX, FEDERATION_V2_PREFIX +from synapse.api.urls import ( + FEDERATION_UNSTABLE_PREFIX, + FEDERATION_V1_PREFIX, + FEDERATION_V2_PREFIX, +) from synapse.http.endpoint import parse_and_validate_server_name from synapse.http.server import JsonResource from synapse.http.servlet import ( @@ -1304,6 +1308,30 @@ class FederationGroupsSettingJoinPolicyServlet(BaseFederationServlet): defer.returnValue((200, new_content)) +class RoomComplexityServlet(BaseFederationServlet): + """ + Indicates to other servers how complex (and therefore likely + resource-intensive) a public room this server knows about is. + """ + PATH = "/rooms/(?P[^/]*)/complexity" + PREFIX = FEDERATION_UNSTABLE_PREFIX + + @defer.inlineCallbacks + def on_GET(self, origin, content, query, room_id): + + store = self.handler.hs.get_datastore() + + is_public = yield store.is_room_world_readable_or_publicly_joinable( + room_id + ) + + if not is_public: + raise SynapseError(404, "Room not found", errcode=Codes.INVALID_PARAM) + + complexity = yield store.get_room_complexity(room_id) + defer.returnValue((200, complexity)) + + FEDERATION_SERVLET_CLASSES = ( FederationSendServlet, FederationEventServlet, @@ -1327,6 +1355,7 @@ FEDERATION_SERVLET_CLASSES = ( FederationThirdPartyInviteExchangeServlet, On3pidBindServlet, FederationVersionServlet, + RoomComplexityServlet, ) OPENID_SERVLET_CLASSES = ( diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 744d85594f..d6c4dcdb18 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -822,10 +822,16 @@ class AdminRestResource(JsonResource): def __init__(self, hs): JsonResource.__init__(self, hs, canonical_json=False) + register_servlets(hs, self) - register_servlets_for_client_rest_resource(hs, self) - SendServerNoticeServlet(hs).register(self) - VersionServlet(hs).register(self) + +def register_servlets(hs, http_server): + """ + Register all the admin servlets. + """ + register_servlets_for_client_rest_resource(hs, http_server) + SendServerNoticeServlet(hs).register(http_server) + VersionServlet(hs).register(http_server) def register_servlets_for_client_rest_resource(hs, http_server): diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 21b353cad3..b56c83e460 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import division + import itertools import logging from collections import namedtuple @@ -614,7 +616,7 @@ class EventsWorkerStore(SQLBaseStore): def _get_total_state_event_counts_txn(self, txn, room_id): """ - See get_state_event_counts. + See get_total_state_event_counts. """ sql = "SELECT COUNT(*) FROM state_events WHERE room_id=?" txn.execute(sql, (room_id,)) @@ -635,3 +637,49 @@ class EventsWorkerStore(SQLBaseStore): "get_total_state_event_counts", self._get_total_state_event_counts_txn, room_id ) + + def _get_current_state_event_counts_txn(self, txn, room_id): + """ + See get_current_state_event_counts. + """ + sql = "SELECT COUNT(*) FROM current_state_events WHERE room_id=?" + txn.execute(sql, (room_id,)) + row = txn.fetchone() + return row[0] if row else 0 + + def get_current_state_event_counts(self, room_id): + """ + Gets the current number of state events in a room. + + Args: + room_id (str) + + Returns: + Deferred[int] + """ + return self.runInteraction( + "get_current_state_event_counts", + self._get_current_state_event_counts_txn, room_id + ) + + @defer.inlineCallbacks + def get_room_complexity(self, room_id): + """ + Get a rough approximation of the complexity of the room. This is used by + remote servers to decide whether they wish to join the room or not. + Higher complexity value indicates that being in the room will consume + more resources. + + Args: + room_id (str) + + Returns: + Deferred[dict[str:int]] of complexity version to complexity. + """ + state_events = yield self.get_current_state_event_counts(room_id) + + # Call this one "v1", so we can introduce new ones as we want to develop + # it. + complexity_v1 = round(state_events / 500, 2) + + defer.returnValue({"v1": complexity_v1}) diff --git a/tests/federation/test_complexity.py b/tests/federation/test_complexity.py new file mode 100644 index 0000000000..1e3e5aec66 --- /dev/null +++ b/tests/federation/test_complexity.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 Matrix.org Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.config.ratelimiting import FederationRateLimitConfig +from synapse.federation.transport import server +from synapse.rest import admin +from synapse.rest.client.v1 import login, room +from synapse.util.ratelimitutils import FederationRateLimiter + +from tests import unittest + + +class RoomComplexityTests(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def default_config(self, name='test'): + config = super(RoomComplexityTests, self).default_config(name=name) + config["limit_large_remote_room_joins"] = True + config["limit_large_remote_room_complexity"] = 0.05 + return config + + def prepare(self, reactor, clock, homeserver): + class Authenticator(object): + def authenticate_request(self, request, content): + return defer.succeed("otherserver.nottld") + + ratelimiter = FederationRateLimiter( + clock, + FederationRateLimitConfig( + window_size=1, + sleep_limit=1, + sleep_msec=1, + reject_limit=1000, + concurrent_requests=1000, + ), + ) + server.register_servlets( + homeserver, self.resource, Authenticator(), ratelimiter + ) + + def test_complexity_simple(self): + + u1 = self.register_user("u1", "pass") + u1_token = self.login("u1", "pass") + + room_1 = self.helper.create_room_as(u1, tok=u1_token) + self.helper.send_state( + room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token + ) + + # Get the room complexity + request, channel = self.make_request( + "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,) + ) + self.render(request) + self.assertEquals(200, channel.code) + complexity = channel.json_body["v1"] + self.assertTrue(complexity > 0, complexity) + + # Artificially raise the complexity + store = self.hs.get_datastore() + store.get_current_state_event_counts = lambda x: defer.succeed(500 * 1.23) + + # Get the room complexity again -- make sure it's our artificial value + request, channel = self.make_request( + "GET", "/_matrix/federation/unstable/rooms/%s/complexity" % (room_1,) + ) + self.render(request) + self.assertEquals(200, channel.code) + complexity = channel.json_body["v1"] + self.assertEqual(complexity, 1.23) -- cgit 1.5.1 From fec2dcb1a538ab8ab447f724af1a94d5b3517197 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 3 Jun 2019 22:59:51 +0100 Subject: Enforce validity period on server_keys for fed requests. (#5321) When handling incoming federation requests, make sure that we have an up-to-date copy of the signing key. We do not yet enforce the validity period for event signatures. --- changelog.d/5321.bugfix | 1 + synapse/crypto/keyring.py | 167 ++++++++++++++++++++++----------- synapse/federation/federation_base.py | 4 +- synapse/federation/transport/server.py | 4 +- synapse/groups/attestations.py | 5 +- tests/crypto/test_keyring.py | 135 ++++++++++++++++++++------ 6 files changed, 228 insertions(+), 88 deletions(-) create mode 100644 changelog.d/5321.bugfix (limited to 'synapse/federation') diff --git a/changelog.d/5321.bugfix b/changelog.d/5321.bugfix new file mode 100644 index 0000000000..943a61956d --- /dev/null +++ b/changelog.d/5321.bugfix @@ -0,0 +1 @@ +Ensure that we have an up-to-date copy of the signing key when validating incoming federation requests. diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index b2f4cea536..cdec06c88e 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -15,6 +15,7 @@ # limitations under the License. import logging +from collections import defaultdict import six from six import raise_from @@ -70,6 +71,9 @@ class VerifyKeyRequest(object): json_object(dict): The JSON object to verify. + minimum_valid_until_ts (int): time at which we require the signing key to + be valid. (0 implies we don't care) + deferred(Deferred[str, str, nacl.signing.VerifyKey]): A deferred (server_name, key_id, verify_key) tuple that resolves when a verify key has been fetched. The deferreds' callbacks are run with no @@ -82,7 +86,8 @@ class VerifyKeyRequest(object): server_name = attr.ib() key_ids = attr.ib() json_object = attr.ib() - deferred = attr.ib() + minimum_valid_until_ts = attr.ib() + deferred = attr.ib(default=attr.Factory(defer.Deferred)) class KeyLookupError(ValueError): @@ -90,14 +95,16 @@ class KeyLookupError(ValueError): class Keyring(object): - def __init__(self, hs): + def __init__(self, hs, key_fetchers=None): self.clock = hs.get_clock() - self._key_fetchers = ( - StoreKeyFetcher(hs), - PerspectivesKeyFetcher(hs), - ServerKeyFetcher(hs), - ) + if key_fetchers is None: + key_fetchers = ( + StoreKeyFetcher(hs), + PerspectivesKeyFetcher(hs), + ServerKeyFetcher(hs), + ) + self._key_fetchers = key_fetchers # map from server name to Deferred. Has an entry for each server with # an ongoing key download; the Deferred completes once the download @@ -106,9 +113,25 @@ class Keyring(object): # These are regular, logcontext-agnostic Deferreds. self.key_downloads = {} - def verify_json_for_server(self, server_name, json_object): + def verify_json_for_server(self, server_name, json_object, validity_time): + """Verify that a JSON object has been signed by a given server + + Args: + server_name (str): name of the server which must have signed this object + + json_object (dict): object to be checked + + validity_time (int): timestamp at which we require the signing key to + be valid. (0 implies we don't care) + + Returns: + Deferred[None]: completes if the the object was correctly signed, otherwise + errbacks with an error + """ + req = server_name, json_object, validity_time + return logcontext.make_deferred_yieldable( - self.verify_json_objects_for_server([(server_name, json_object)])[0] + self.verify_json_objects_for_server((req,))[0] ) def verify_json_objects_for_server(self, server_and_json): @@ -116,10 +139,12 @@ class Keyring(object): necessary. Args: - server_and_json (list): List of pairs of (server_name, json_object) + server_and_json (iterable[Tuple[str, dict, int]): + Iterable of triplets of (server_name, json_object, validity_time) + validity_time is a timestamp at which the signing key must be valid. Returns: - List: for each input pair, a deferred indicating success + List: for each input triplet, a deferred indicating success or failure to verify each json object's signature for the given server_name. The deferreds run their callbacks in the sentinel logcontext. @@ -128,12 +153,12 @@ class Keyring(object): verify_requests = [] handle = preserve_fn(_handle_key_deferred) - def process(server_name, json_object): + def process(server_name, json_object, validity_time): """Process an entry in the request list - Given a (server_name, json_object) pair from the request list, - adds a key request to verify_requests, and returns a deferred which will - complete or fail (in the sentinel context) when verification completes. + Given a (server_name, json_object, validity_time) triplet from the request + list, adds a key request to verify_requests, and returns a deferred which + will complete or fail (in the sentinel context) when verification completes. """ key_ids = signature_ids(json_object, server_name) @@ -148,7 +173,7 @@ class Keyring(object): # add the key request to the queue, but don't start it off yet. verify_request = VerifyKeyRequest( - server_name, key_ids, json_object, defer.Deferred() + server_name, key_ids, json_object, validity_time ) verify_requests.append(verify_request) @@ -160,8 +185,8 @@ class Keyring(object): return handle(verify_request) results = [ - process(server_name, json_object) - for server_name, json_object in server_and_json + process(server_name, json_object, validity_time) + for server_name, json_object, validity_time in server_and_json ] if verify_requests: @@ -298,8 +323,12 @@ class Keyring(object): verify_request.deferred.errback( SynapseError( 401, - "No key for %s with id %s" - % (verify_request.server_name, verify_request.key_ids), + "No key for %s with ids in %s (min_validity %i)" + % ( + verify_request.server_name, + verify_request.key_ids, + verify_request.minimum_valid_until_ts, + ), Codes.UNAUTHORIZED, ) ) @@ -323,18 +352,28 @@ class Keyring(object): Args: fetcher (KeyFetcher): fetcher to use to fetch the keys remaining_requests (set[VerifyKeyRequest]): outstanding key requests. - Any successfully-completed requests will be reomved from the list. + Any successfully-completed requests will be removed from the list. """ - # dict[str, set(str)]: keys to fetch for each server - missing_keys = {} + # dict[str, dict[str, int]]: keys to fetch. + # server_name -> key_id -> min_valid_ts + missing_keys = defaultdict(dict) + for verify_request in remaining_requests: # any completed requests should already have been removed assert not verify_request.deferred.called - missing_keys.setdefault(verify_request.server_name, set()).update( - verify_request.key_ids - ) + keys_for_server = missing_keys[verify_request.server_name] - results = yield fetcher.get_keys(missing_keys.items()) + for key_id in verify_request.key_ids: + # If we have several requests for the same key, then we only need to + # request that key once, but we should do so with the greatest + # min_valid_until_ts of the requests, so that we can satisfy all of + # the requests. + keys_for_server[key_id] = max( + keys_for_server.get(key_id, -1), + verify_request.minimum_valid_until_ts + ) + + results = yield fetcher.get_keys(missing_keys) completed = list() for verify_request in remaining_requests: @@ -344,25 +383,34 @@ class Keyring(object): # complete this VerifyKeyRequest. result_keys = results.get(server_name, {}) for key_id in verify_request.key_ids: - key = result_keys.get(key_id) - if key: - with PreserveLoggingContext(): - verify_request.deferred.callback( - (server_name, key_id, key.verify_key) - ) - completed.append(verify_request) - break + fetch_key_result = result_keys.get(key_id) + if not fetch_key_result: + # we didn't get a result for this key + continue + + if ( + fetch_key_result.valid_until_ts + < verify_request.minimum_valid_until_ts + ): + # key was not valid at this point + continue + + with PreserveLoggingContext(): + verify_request.deferred.callback( + (server_name, key_id, fetch_key_result.verify_key) + ) + completed.append(verify_request) + break remaining_requests.difference_update(completed) class KeyFetcher(object): - def get_keys(self, server_name_and_key_ids): + def get_keys(self, keys_to_fetch): """ Args: - server_name_and_key_ids (iterable[Tuple[str, iterable[str]]]): - list of (server_name, iterable[key_id]) tuples to fetch keys for - Note that the iterables may be iterated more than once. + keys_to_fetch (dict[str, dict[str, int]]): + the keys to be fetched. server_name -> key_id -> min_valid_ts Returns: Deferred[dict[str, dict[str, synapse.storage.keys.FetchKeyResult|None]]]: @@ -378,13 +426,15 @@ class StoreKeyFetcher(KeyFetcher): self.store = hs.get_datastore() @defer.inlineCallbacks - def get_keys(self, server_name_and_key_ids): + def get_keys(self, keys_to_fetch): """see KeyFetcher.get_keys""" + keys_to_fetch = ( (server_name, key_id) - for server_name, key_ids in server_name_and_key_ids - for key_id in key_ids + for server_name, keys_for_server in keys_to_fetch.items() + for key_id in keys_for_server.keys() ) + res = yield self.store.get_server_verify_keys(keys_to_fetch) keys = {} for (server_name, key_id), key in res.items(): @@ -508,14 +558,14 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): self.perspective_servers = self.config.perspectives @defer.inlineCallbacks - def get_keys(self, server_name_and_key_ids): + def get_keys(self, keys_to_fetch): """see KeyFetcher.get_keys""" @defer.inlineCallbacks def get_key(perspective_name, perspective_keys): try: result = yield self.get_server_verify_key_v2_indirect( - server_name_and_key_ids, perspective_name, perspective_keys + keys_to_fetch, perspective_name, perspective_keys ) defer.returnValue(result) except KeyLookupError as e: @@ -549,13 +599,15 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): @defer.inlineCallbacks def get_server_verify_key_v2_indirect( - self, server_names_and_key_ids, perspective_name, perspective_keys + self, keys_to_fetch, perspective_name, perspective_keys ): """ Args: - server_names_and_key_ids (iterable[Tuple[str, iterable[str]]]): - list of (server_name, iterable[key_id]) tuples to fetch keys for + keys_to_fetch (dict[str, dict[str, int]]): + the keys to be fetched. server_name -> key_id -> min_valid_ts + perspective_name (str): name of the notary server to query for the keys + perspective_keys (dict[str, VerifyKey]): map of key_id->key for the notary server @@ -569,12 +621,10 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): """ logger.info( "Requesting keys %s from notary server %s", - server_names_and_key_ids, + keys_to_fetch.items(), perspective_name, ) - # TODO(mark): Set the minimum_valid_until_ts to that needed by - # the events being validated or the current time if validating - # an incoming request. + try: query_response = yield self.client.post_json( destination=perspective_name, @@ -582,9 +632,10 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): data={ u"server_keys": { server_name: { - key_id: {u"minimum_valid_until_ts": 0} for key_id in key_ids + key_id: {u"minimum_valid_until_ts": min_valid_ts} + for key_id, min_valid_ts in server_keys.items() } - for server_name, key_ids in server_names_and_key_ids + for server_name, server_keys in keys_to_fetch.items() } }, long_retries=True, @@ -694,15 +745,18 @@ class ServerKeyFetcher(BaseV2KeyFetcher): self.client = hs.get_http_client() @defer.inlineCallbacks - def get_keys(self, server_name_and_key_ids): + def get_keys(self, keys_to_fetch): """see KeyFetcher.get_keys""" + # TODO make this more resilient results = yield logcontext.make_deferred_yieldable( defer.gatherResults( [ run_in_background( - self.get_server_verify_key_v2_direct, server_name, key_ids + self.get_server_verify_key_v2_direct, + server_name, + server_keys.keys(), ) - for server_name, key_ids in server_name_and_key_ids + for server_name, server_keys in keys_to_fetch.items() ], consumeErrors=True, ).addErrback(unwrapFirstError) @@ -721,6 +775,7 @@ class ServerKeyFetcher(BaseV2KeyFetcher): keys = {} # type: dict[str, FetchKeyResult] for requested_key_id in key_ids: + # we may have found this key as a side-effect of asking for another. if requested_key_id in keys: continue diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index cffa831d80..4b38f7c759 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -265,7 +265,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): ] more_deferreds = keyring.verify_json_objects_for_server([ - (p.sender_domain, p.redacted_pdu_json) + (p.sender_domain, p.redacted_pdu_json, 0) for p in pdus_to_check_sender ]) @@ -298,7 +298,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): ] more_deferreds = keyring.verify_json_objects_for_server([ - (get_domain_from_id(p.pdu.event_id), p.redacted_pdu_json) + (get_domain_from_id(p.pdu.event_id), p.redacted_pdu_json, 0) for p in pdus_to_check_event_id ]) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index d0efc4e0d3..0db8858cf1 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -94,6 +94,7 @@ class NoAuthenticationError(AuthenticationError): class Authenticator(object): def __init__(self, hs): + self._clock = hs.get_clock() self.keyring = hs.get_keyring() self.server_name = hs.hostname self.store = hs.get_datastore() @@ -102,6 +103,7 @@ class Authenticator(object): # A method just so we can pass 'self' as the authenticator to the Servlets @defer.inlineCallbacks def authenticate_request(self, request, content): + now = self._clock.time_msec() json_request = { "method": request.method.decode('ascii'), "uri": request.uri.decode('ascii'), @@ -138,7 +140,7 @@ class Authenticator(object): 401, "Missing Authorization headers", Codes.UNAUTHORIZED, ) - yield self.keyring.verify_json_for_server(origin, json_request) + yield self.keyring.verify_json_for_server(origin, json_request, now) logger.info("Request from %s", origin) request.authenticated_entity = origin diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index 786149be65..fa6b641ee1 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -97,10 +97,11 @@ class GroupAttestationSigning(object): # TODO: We also want to check that *new* attestations that people give # us to store are valid for at least a little while. - if valid_until_ms < self.clock.time_msec(): + now = self.clock.time_msec() + if valid_until_ms < now: raise SynapseError(400, "Attestation expired") - yield self.keyring.verify_json_for_server(server_name, attestation) + yield self.keyring.verify_json_for_server(server_name, attestation, now) def create_attestation(self, group_id, user_id): """Create an attestation for the group_id and user_id with default diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index 3933ad4347..096401938d 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -19,6 +19,7 @@ from mock import Mock import canonicaljson import signedjson.key import signedjson.sign +from signedjson.key import get_verify_key from twisted.internet import defer @@ -137,7 +138,7 @@ class KeyringTestCase(unittest.HomeserverTestCase): context_11.request = "11" res_deferreds = kr.verify_json_objects_for_server( - [("server10", json1), ("server11", {})] + [("server10", json1, 0), ("server11", {}, 0)] ) # the unsigned json should be rejected pretty quickly @@ -174,7 +175,7 @@ class KeyringTestCase(unittest.HomeserverTestCase): self.http_client.post_json.return_value = defer.Deferred() res_deferreds_2 = kr.verify_json_objects_for_server( - [("server10", json1)] + [("server10", json1, 0)] ) res_deferreds_2[0].addBoth(self.check_context, None) yield logcontext.make_deferred_yieldable(res_deferreds_2[0]) @@ -197,31 +198,108 @@ class KeyringTestCase(unittest.HomeserverTestCase): kr = keyring.Keyring(self.hs) key1 = signedjson.key.generate_signing_key(1) - key1_id = "%s:%s" % (key1.alg, key1.version) - r = self.hs.datastore.store_server_verify_keys( "server9", time.time() * 1000, - [ - ( - "server9", - key1_id, - FetchKeyResult(signedjson.key.get_verify_key(key1), 1000), - ), - ], + [("server9", get_key_id(key1), FetchKeyResult(get_verify_key(key1), 1000))], ) self.get_success(r) + json1 = {} signedjson.sign.sign_json(json1, "server9", key1) # should fail immediately on an unsigned object - d = _verify_json_for_server(kr, "server9", {}) + d = _verify_json_for_server(kr, "server9", {}, 0) self.failureResultOf(d, SynapseError) - d = _verify_json_for_server(kr, "server9", json1) - self.assertFalse(d.called) + # should suceed on a signed object + d = _verify_json_for_server(kr, "server9", json1, 500) + # self.assertFalse(d.called) self.get_success(d) + def test_verify_json_dedupes_key_requests(self): + """Two requests for the same key should be deduped.""" + key1 = signedjson.key.generate_signing_key(1) + + def get_keys(keys_to_fetch): + # there should only be one request object (with the max validity) + self.assertEqual(keys_to_fetch, {"server1": {get_key_id(key1): 1500}}) + + return defer.succeed( + { + "server1": { + get_key_id(key1): FetchKeyResult(get_verify_key(key1), 1200) + } + } + ) + + mock_fetcher = keyring.KeyFetcher() + mock_fetcher.get_keys = Mock(side_effect=get_keys) + kr = keyring.Keyring(self.hs, key_fetchers=(mock_fetcher,)) + + json1 = {} + signedjson.sign.sign_json(json1, "server1", key1) + + # the first request should succeed; the second should fail because the key + # has expired + results = kr.verify_json_objects_for_server( + [("server1", json1, 500), ("server1", json1, 1500)] + ) + self.assertEqual(len(results), 2) + self.get_success(results[0]) + e = self.get_failure(results[1], SynapseError).value + self.assertEqual(e.errcode, "M_UNAUTHORIZED") + self.assertEqual(e.code, 401) + + # there should have been a single call to the fetcher + mock_fetcher.get_keys.assert_called_once() + + def test_verify_json_falls_back_to_other_fetchers(self): + """If the first fetcher cannot provide a recent enough key, we fall back""" + key1 = signedjson.key.generate_signing_key(1) + + def get_keys1(keys_to_fetch): + self.assertEqual(keys_to_fetch, {"server1": {get_key_id(key1): 1500}}) + return defer.succeed( + { + "server1": { + get_key_id(key1): FetchKeyResult(get_verify_key(key1), 800) + } + } + ) + + def get_keys2(keys_to_fetch): + self.assertEqual(keys_to_fetch, {"server1": {get_key_id(key1): 1500}}) + return defer.succeed( + { + "server1": { + get_key_id(key1): FetchKeyResult(get_verify_key(key1), 1200) + } + } + ) + + mock_fetcher1 = keyring.KeyFetcher() + mock_fetcher1.get_keys = Mock(side_effect=get_keys1) + mock_fetcher2 = keyring.KeyFetcher() + mock_fetcher2.get_keys = Mock(side_effect=get_keys2) + kr = keyring.Keyring(self.hs, key_fetchers=(mock_fetcher1, mock_fetcher2)) + + json1 = {} + signedjson.sign.sign_json(json1, "server1", key1) + + results = kr.verify_json_objects_for_server( + [("server1", json1, 1200), ("server1", json1, 1500)] + ) + self.assertEqual(len(results), 2) + self.get_success(results[0]) + e = self.get_failure(results[1], SynapseError).value + self.assertEqual(e.errcode, "M_UNAUTHORIZED") + self.assertEqual(e.code, 401) + + # there should have been a single call to each fetcher + mock_fetcher1.get_keys.assert_called_once() + mock_fetcher2.get_keys.assert_called_once() + class ServerKeyFetcherTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor, clock): @@ -260,8 +338,8 @@ class ServerKeyFetcherTestCase(unittest.HomeserverTestCase): self.http_client.get_json.side_effect = get_json - server_name_and_key_ids = [(SERVER_NAME, ("key1",))] - keys = self.get_success(fetcher.get_keys(server_name_and_key_ids)) + keys_to_fetch = {SERVER_NAME: {"key1": 0}} + keys = self.get_success(fetcher.get_keys(keys_to_fetch)) k = keys[SERVER_NAME][testverifykey_id] self.assertEqual(k.valid_until_ts, VALID_UNTIL_TS) self.assertEqual(k.verify_key, testverifykey) @@ -288,9 +366,7 @@ class ServerKeyFetcherTestCase(unittest.HomeserverTestCase): # change the server name: it should cause a rejection response["server_name"] = "OTHER_SERVER" - self.get_failure( - fetcher.get_keys(server_name_and_key_ids), KeyLookupError - ) + self.get_failure(fetcher.get_keys(keys_to_fetch), KeyLookupError) class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase): @@ -342,8 +418,8 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase): self.http_client.post_json.side_effect = post_json - server_name_and_key_ids = [(SERVER_NAME, ("key1",))] - keys = self.get_success(fetcher.get_keys(server_name_and_key_ids)) + keys_to_fetch = {SERVER_NAME: {"key1": 0}} + keys = self.get_success(fetcher.get_keys(keys_to_fetch)) self.assertIn(SERVER_NAME, keys) k = keys[SERVER_NAME][testverifykey_id] self.assertEqual(k.valid_until_ts, VALID_UNTIL_TS) @@ -401,7 +477,7 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase): def get_key_from_perspectives(response): fetcher = PerspectivesKeyFetcher(self.hs) - server_name_and_key_ids = [(SERVER_NAME, ("key1",))] + keys_to_fetch = {SERVER_NAME: {"key1": 0}} def post_json(destination, path, data, **kwargs): self.assertEqual(destination, self.mock_perspective_server.server_name) @@ -410,9 +486,7 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase): self.http_client.post_json.side_effect = post_json - return self.get_success( - fetcher.get_keys(server_name_and_key_ids) - ) + return self.get_success(fetcher.get_keys(keys_to_fetch)) # start with a valid response so we can check we are testing the right thing response = build_response() @@ -435,6 +509,11 @@ class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase): self.assertEqual(keys, {}, "Expected empty dict with missing origin server sig") +def get_key_id(key): + """Get the matrix ID tag for a given SigningKey or VerifyKey""" + return "%s:%s" % (key.alg, key.version) + + @defer.inlineCallbacks def run_in_context(f, *args, **kwargs): with LoggingContext("testctx") as ctx: @@ -445,14 +524,16 @@ def run_in_context(f, *args, **kwargs): defer.returnValue(rv) -def _verify_json_for_server(keyring, server_name, json_object): +def _verify_json_for_server(keyring, server_name, json_object, validity_time): """thin wrapper around verify_json_for_server which makes sure it is wrapped with the patched defer.inlineCallbacks. """ @defer.inlineCallbacks def v(): - rv1 = yield keyring.verify_json_for_server(server_name, json_object) + rv1 = yield keyring.verify_json_for_server( + server_name, json_object, validity_time + ) defer.returnValue(rv1) return run_in_context(v) -- cgit 1.5.1 From b4189b112fcacf8143aa8fe7674d5c2518067bc8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 4 Jun 2019 18:01:09 +0100 Subject: Rename get_events->get_events_from_store_or_dest (#5344) We have too many things called get_event, and it's hard to figure out what we mean. Also remove some unused params from the signature, and add some logging. --- changelog.d/5344.misc | 1 + synapse/federation/federation_client.py | 33 +++++++++++++-------------------- 2 files changed, 14 insertions(+), 20 deletions(-) create mode 100644 changelog.d/5344.misc (limited to 'synapse/federation') diff --git a/changelog.d/5344.misc b/changelog.d/5344.misc new file mode 100644 index 0000000000..a20c563bf1 --- /dev/null +++ b/changelog.d/5344.misc @@ -0,0 +1 @@ +Clean up FederationClient.get_events for clarity. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index f3fc897a0a..916ff487c9 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -17,7 +17,6 @@ import copy import itertools import logging -import random from six.moves import range @@ -326,8 +325,8 @@ class FederationClient(FederationBase): state_event_ids = result["pdu_ids"] auth_event_ids = result.get("auth_chain_ids", []) - fetched_events, failed_to_fetch = yield self.get_events( - [destination], room_id, set(state_event_ids + auth_event_ids) + fetched_events, failed_to_fetch = yield self.get_events_from_store_or_dest( + destination, room_id, set(state_event_ids + auth_event_ids) ) if failed_to_fetch: @@ -397,27 +396,20 @@ class FederationClient(FederationBase): defer.returnValue((signed_pdus, signed_auth)) @defer.inlineCallbacks - def get_events(self, destinations, room_id, event_ids, return_local=True): - """Fetch events from some remote destinations, checking if we already - have them. + def get_events_from_store_or_dest(self, destination, room_id, event_ids): + """Fetch events from a remote destination, checking if we already have them. Args: - destinations (list) + destination (str) room_id (str) event_ids (list) - return_local (bool): Whether to include events we already have in - the DB in the returned list of events Returns: Deferred: A deferred resolving to a 2-tuple where the first is a list of events and the second is a list of event ids that we failed to fetch. """ - if return_local: - seen_events = yield self.store.get_events(event_ids, allow_rejected=True) - signed_events = list(seen_events.values()) - else: - seen_events = yield self.store.have_seen_events(event_ids) - signed_events = [] + seen_events = yield self.store.get_events(event_ids, allow_rejected=True) + signed_events = list(seen_events.values()) failed_to_fetch = set() @@ -428,10 +420,11 @@ class FederationClient(FederationBase): if not missing_events: defer.returnValue((signed_events, failed_to_fetch)) - def random_server_list(): - srvs = list(destinations) - random.shuffle(srvs) - return srvs + logger.debug( + "Fetching unknown state/auth events %s for room %s", + missing_events, + event_ids, + ) room_version = yield self.store.get_room_version(room_id) @@ -443,7 +436,7 @@ class FederationClient(FederationBase): deferreds = [ run_in_background( self.get_pdu, - destinations=random_server_list(), + destinations=[destination], event_id=e_id, room_version=room_version, ) -- cgit 1.5.1 From dae224a73f6a799718f7dfc5c6d8ac3e050fca1d Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 4 Jun 2019 18:05:06 +0100 Subject: Fix failure to fetch batches of PDUs (#5342) FederationClient.get_pdu is called in a loop to fetch a batch of PDUs. A failure to fetch one should not result in a failure of the whole batch. Add the missing `continue`. --- changelog.d/5342.bugfix | 1 + synapse/federation/federation_client.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/5342.bugfix (limited to 'synapse/federation') diff --git a/changelog.d/5342.bugfix b/changelog.d/5342.bugfix new file mode 100644 index 0000000000..66a3076292 --- /dev/null +++ b/changelog.d/5342.bugfix @@ -0,0 +1 @@ +Fix failure when fetching batches of events during backfill, etc. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 916ff487c9..d559605382 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -279,6 +279,7 @@ class FederationClient(FederationBase): "Failed to get PDU %s from %s because %s", event_id, destination, e, ) + continue except NotRetryingDestination as e: logger.info(str(e)) continue -- cgit 1.5.1 From 2615c6bd9e5456c5aefc23a9c89a4346b8afc6b0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 5 Jun 2019 10:35:40 +0100 Subject: Clean up debug logging (#5347) Remove some spurious stuff, clarify some other stuff --- changelog.d/5347.misc | 2 ++ synapse/crypto/event_signing.py | 6 +++++- synapse/crypto/keyring.py | 4 ---- synapse/federation/federation_client.py | 16 +++++++++++++--- 4 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 changelog.d/5347.misc (limited to 'synapse/federation') diff --git a/changelog.d/5347.misc b/changelog.d/5347.misc new file mode 100644 index 0000000000..436245fb11 --- /dev/null +++ b/changelog.d/5347.misc @@ -0,0 +1,2 @@ +Various improvements to debug logging. + diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 1dfa727fcf..99a586655b 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -31,7 +31,11 @@ logger = logging.getLogger(__name__) def check_event_content_hash(event, hash_algorithm=hashlib.sha256): """Check whether the hash for this PDU matches the contents""" name, expected_hash = compute_content_hash(event.get_pdu_json(), hash_algorithm) - logger.debug("Expecting hash: %s", encode_base64(expected_hash)) + logger.debug( + "Verifying content hash on %s (expecting: %s)", + event.event_id, + encode_base64(expected_hash), + ) # some malformed events lack a 'hashes'. Protect against it being missing # or a weird type by basically treating it the same as an unhashed event. diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index 8e1d666db1..e94e71bdad 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -866,10 +866,6 @@ def _handle_key_deferred(verify_request): json_object = verify_request.json_object - logger.debug( - "Got key %s %s:%s for server %s, verifying" - % (key_id, verify_key.alg, verify_key.version, server_name) - ) try: verify_signed_json(json_object, server_name, verify_key) except SignatureVerifyException as e: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index d559605382..70573746d6 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -232,7 +232,8 @@ class FederationClient(FederationBase): moving to the next destination. None indicates no timeout. Returns: - Deferred: Results in the requested PDU. + Deferred: Results in the requested PDU, or None if we were unable to find + it. """ # TODO: Rate limit the number of times we try and get the same event. @@ -257,7 +258,12 @@ class FederationClient(FederationBase): destination, event_id, timeout=timeout, ) - logger.debug("transaction_data %r", transaction_data) + logger.debug( + "retrieved event id %s from %s: %r", + event_id, + destination, + transaction_data, + ) pdu_list = [ event_from_pdu_json(p, format_ver, outlier=outlier) @@ -331,7 +337,11 @@ class FederationClient(FederationBase): ) if failed_to_fetch: - logger.warn("Failed to get %r", failed_to_fetch) + logger.warning( + "Failed to fetch missing state/auth events for %s: %s", + room_id, + failed_to_fetch + ) event_map = { ev.event_id: ev for ev in fetched_events -- cgit 1.5.1 From 14f13babb00d64009b11ef822ebe6fafe044eebd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 5 Jun 2019 10:38:25 +0100 Subject: Add a test room version where we enforce key validity (#5348) --- changelog.d/5348.bugfix | 1 + synapse/api/room_versions.py | 20 +++++++++++------- synapse/federation/federation_base.py | 39 +++++++++++++++++++++-------------- 3 files changed, 38 insertions(+), 22 deletions(-) create mode 100644 changelog.d/5348.bugfix (limited to 'synapse/federation') diff --git a/changelog.d/5348.bugfix b/changelog.d/5348.bugfix new file mode 100644 index 0000000000..8d396c7990 --- /dev/null +++ b/changelog.d/5348.bugfix @@ -0,0 +1 @@ +Add a new room version where the timestamps on events are checked against the validity periods on signing keys. \ No newline at end of file diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index 4085bd10b9..501cdfb6a4 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -50,6 +50,7 @@ class RoomVersion(object): disposition = attr.ib() # str; one of the RoomDispositions event_format = attr.ib() # int; one of the EventFormatVersions state_res = attr.ib() # int; one of the StateResolutionVersions + enforce_key_validity = attr.ib() # bool class RoomVersions(object): @@ -58,30 +59,35 @@ class RoomVersions(object): RoomDisposition.STABLE, EventFormatVersions.V1, StateResolutionVersions.V1, - ) - STATE_V2_TEST = RoomVersion( - "state-v2-test", - RoomDisposition.UNSTABLE, - EventFormatVersions.V1, - StateResolutionVersions.V2, + enforce_key_validity=False, ) V2 = RoomVersion( "2", RoomDisposition.STABLE, EventFormatVersions.V1, StateResolutionVersions.V2, + enforce_key_validity=False, ) V3 = RoomVersion( "3", RoomDisposition.STABLE, EventFormatVersions.V2, StateResolutionVersions.V2, + enforce_key_validity=False, ) V4 = RoomVersion( "4", RoomDisposition.STABLE, EventFormatVersions.V3, StateResolutionVersions.V2, + enforce_key_validity=False, + ) + VDH_TEST_KEY_VALIDITY = RoomVersion( + "vdh-test-key-validity", + RoomDisposition.UNSTABLE, + EventFormatVersions.V3, + StateResolutionVersions.V2, + enforce_key_validity=False, ) @@ -90,7 +96,7 @@ KNOWN_ROOM_VERSIONS = { RoomVersions.V1, RoomVersions.V2, RoomVersions.V3, - RoomVersions.STATE_V2_TEST, RoomVersions.V4, + RoomVersions.VDH_TEST_KEY_VALIDITY, ) } # type: dict[str, RoomVersion] diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 4b38f7c759..b541913d82 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -223,9 +223,6 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): the signatures are valid, or fail (with a SynapseError) if not. """ - # (currently this is written assuming the v1 room structure; we'll probably want a - # separate function for checking v2 rooms) - # we want to check that the event is signed by: # # (a) the sender's server @@ -257,6 +254,10 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): for p in pdus ] + v = KNOWN_ROOM_VERSIONS.get(room_version) + if not v: + raise RuntimeError("Unrecognized room version %s" % (room_version,)) + # First we check that the sender event is signed by the sender's domain # (except if its a 3pid invite, in which case it may be sent by any server) pdus_to_check_sender = [ @@ -264,10 +265,16 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): if not _is_invite_via_3pid(p.pdu) ] - more_deferreds = keyring.verify_json_objects_for_server([ - (p.sender_domain, p.redacted_pdu_json, 0) - for p in pdus_to_check_sender - ]) + more_deferreds = keyring.verify_json_objects_for_server( + [ + ( + p.sender_domain, + p.redacted_pdu_json, + p.pdu.origin_server_ts if v.enforce_key_validity else 0, + ) + for p in pdus_to_check_sender + ] + ) def sender_err(e, pdu_to_check): errmsg = "event id %s: unable to verify signature for sender %s: %s" % ( @@ -287,20 +294,22 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): # event id's domain (normally only the case for joins/leaves), and add additional # checks. Only do this if the room version has a concept of event ID domain # (ie, the room version uses old-style non-hash event IDs). - v = KNOWN_ROOM_VERSIONS.get(room_version) - if not v: - raise RuntimeError("Unrecognized room version %s" % (room_version,)) - if v.event_format == EventFormatVersions.V1: pdus_to_check_event_id = [ p for p in pdus_to_check if p.sender_domain != get_domain_from_id(p.pdu.event_id) ] - more_deferreds = keyring.verify_json_objects_for_server([ - (get_domain_from_id(p.pdu.event_id), p.redacted_pdu_json, 0) - for p in pdus_to_check_event_id - ]) + more_deferreds = keyring.verify_json_objects_for_server( + [ + ( + get_domain_from_id(p.pdu.event_id), + p.redacted_pdu_json, + p.pdu.origin_server_ts if v.enforce_key_validity else 0, + ) + for p in pdus_to_check_event_id + ] + ) def event_err(e, pdu_to_check): errmsg = ( -- cgit 1.5.1 From cea9750d112cf74790fb8c16482a0068717954d5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 4 Jun 2019 16:12:57 +0100 Subject: Associate a request_name with each verify request, for logging Also: * rename VerifyKeyRequest->VerifyJsonRequest * calculate key_ids on VerifyJsonRequest construction * refactor things to pass around VerifyJsonRequests instead of 4-tuples --- changelog.d/5353.misc | 2 + synapse/crypto/keyring.py | 112 ++++++++++++++++++++------------- synapse/federation/federation_base.py | 2 + synapse/federation/transport/server.py | 4 +- synapse/groups/attestations.py | 4 +- tests/crypto/test_keyring.py | 18 +++--- 6 files changed, 86 insertions(+), 56 deletions(-) create mode 100644 changelog.d/5353.misc (limited to 'synapse/federation') diff --git a/changelog.d/5353.misc b/changelog.d/5353.misc new file mode 100644 index 0000000000..436245fb11 --- /dev/null +++ b/changelog.d/5353.misc @@ -0,0 +1,2 @@ +Various improvements to debug logging. + diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index e94e71bdad..2b6b5913bc 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -60,9 +60,9 @@ logger = logging.getLogger(__name__) @attr.s(slots=True, cmp=False) -class VerifyKeyRequest(object): +class VerifyJsonRequest(object): """ - A request for a verify key to verify a JSON object. + A request to verify a JSON object. Attributes: server_name(str): The name of the server to verify against. @@ -85,11 +85,15 @@ class VerifyKeyRequest(object): """ server_name = attr.ib() - key_ids = attr.ib() json_object = attr.ib() minimum_valid_until_ts = attr.ib() + request_name = attr.ib() + key_ids = attr.ib(init=False) key_ready = attr.ib(default=attr.Factory(defer.Deferred)) + def __attrs_post_init__(self): + self.key_ids = signature_ids(self.json_object, self.server_name) + class KeyLookupError(ValueError): pass @@ -114,7 +118,9 @@ class Keyring(object): # These are regular, logcontext-agnostic Deferreds. self.key_downloads = {} - def verify_json_for_server(self, server_name, json_object, validity_time): + def verify_json_for_server( + self, server_name, json_object, validity_time, request_name + ): """Verify that a JSON object has been signed by a given server Args: @@ -125,24 +131,31 @@ class Keyring(object): validity_time (int): timestamp at which we require the signing key to be valid. (0 implies we don't care) + request_name (str): an identifier for this json object (eg, an event id) + for logging. + Returns: Deferred[None]: completes if the the object was correctly signed, otherwise errbacks with an error """ - req = server_name, json_object, validity_time - - return logcontext.make_deferred_yieldable( - self.verify_json_objects_for_server((req,))[0] - ) + req = VerifyJsonRequest(server_name, json_object, validity_time, request_name) + requests = (req,) + return logcontext.make_deferred_yieldable(self._verify_objects(requests)[0]) def verify_json_objects_for_server(self, server_and_json): """Bulk verifies signatures of json objects, bulk fetching keys as necessary. Args: - server_and_json (iterable[Tuple[str, dict, int]): - Iterable of triplets of (server_name, json_object, validity_time) - validity_time is a timestamp at which the signing key must be valid. + server_and_json (iterable[Tuple[str, dict, int, str]): + Iterable of (server_name, json_object, validity_time, request_name) + tuples. + + validity_time is a timestamp at which the signing key must be + valid. + + request_name is an identifier for this json object (eg, an event id) + for logging. Returns: List: for each input triplet, a deferred indicating success @@ -150,38 +163,54 @@ class Keyring(object): server_name. The deferreds run their callbacks in the sentinel logcontext. """ - # a list of VerifyKeyRequests - verify_requests = [] + return self._verify_objects( + VerifyJsonRequest(server_name, json_object, validity_time, request_name) + for server_name, json_object, validity_time, request_name in server_and_json + ) + + def _verify_objects(self, verify_requests): + """Does the work of verify_json_[objects_]for_server + + + Args: + verify_requests (iterable[VerifyJsonRequest]): + Iterable of verification requests. + + Returns: + List: for each input item, a deferred indicating success + or failure to verify each json object's signature for the given + server_name. The deferreds run their callbacks in the sentinel + logcontext. + """ + # a list of VerifyJsonRequests which are awaiting a key lookup + key_lookups = [] handle = preserve_fn(_handle_key_deferred) - def process(server_name, json_object, validity_time): + def process(verify_request): """Process an entry in the request list - Given a (server_name, json_object, validity_time) triplet from the request - list, adds a key request to verify_requests, and returns a deferred which + Adds a key request to key_lookups, and returns a deferred which will complete or fail (in the sentinel context) when verification completes. """ - key_ids = signature_ids(json_object, server_name) - - if not key_ids: + if not verify_request.key_ids: return defer.fail( SynapseError( - 400, "Not signed by %s" % (server_name,), Codes.UNAUTHORIZED + 400, + "Not signed by %s" % (verify_request.server_name,), + Codes.UNAUTHORIZED, ) ) logger.debug( - "Verifying for %s with key_ids %s, min_validity %i", - server_name, - key_ids, - validity_time, + "Verifying %s for %s with key_ids %s, min_validity %i", + verify_request.request_name, + verify_request.server_name, + verify_request.key_ids, + verify_request.minimum_valid_until_ts, ) # add the key request to the queue, but don't start it off yet. - verify_request = VerifyKeyRequest( - server_name, key_ids, json_object, validity_time - ) - verify_requests.append(verify_request) + key_lookups.append(verify_request) # now run _handle_key_deferred, which will wait for the key request # to complete and then do the verification. @@ -190,13 +219,10 @@ class Keyring(object): # wrap it with preserve_fn (aka run_in_background) return handle(verify_request) - results = [ - process(server_name, json_object, validity_time) - for server_name, json_object, validity_time in server_and_json - ] + results = [process(r) for r in verify_requests] - if verify_requests: - run_in_background(self._start_key_lookups, verify_requests) + if key_lookups: + run_in_background(self._start_key_lookups, key_lookups) return results @@ -207,7 +233,7 @@ class Keyring(object): Once each fetch completes, verify_request.key_ready will be resolved. Args: - verify_requests (List[VerifyKeyRequest]): + verify_requests (List[VerifyJsonRequest]): """ try: @@ -308,7 +334,7 @@ class Keyring(object): with a SynapseError if none of the keys are found. Args: - verify_requests (list[VerifyKeyRequest]): list of verify requests + verify_requests (list[VerifyJsonRequest]): list of verify requests """ remaining_requests = set( @@ -357,7 +383,7 @@ class Keyring(object): Args: fetcher (KeyFetcher): fetcher to use to fetch the keys - remaining_requests (set[VerifyKeyRequest]): outstanding key requests. + remaining_requests (set[VerifyJsonRequest]): outstanding key requests. Any successfully-completed requests will be removed from the list. """ # dict[str, dict[str, int]]: keys to fetch. @@ -376,7 +402,7 @@ class Keyring(object): # the requests. keys_for_server[key_id] = max( keys_for_server.get(key_id, -1), - verify_request.minimum_valid_until_ts + verify_request.minimum_valid_until_ts, ) results = yield fetcher.get_keys(missing_keys) @@ -386,7 +412,7 @@ class Keyring(object): server_name = verify_request.server_name # see if any of the keys we got this time are sufficient to - # complete this VerifyKeyRequest. + # complete this VerifyJsonRequest. result_keys = results.get(server_name, {}) for key_id in verify_request.key_ids: fetch_key_result = result_keys.get(key_id) @@ -454,9 +480,7 @@ class BaseV2KeyFetcher(object): self.config = hs.get_config() @defer.inlineCallbacks - def process_v2_response( - self, from_server, response_json, time_added_ms - ): + def process_v2_response(self, from_server, response_json, time_added_ms): """Parse a 'Server Keys' structure from the result of a /key request This is used to parse either the entirety of the response from @@ -852,7 +876,7 @@ def _handle_key_deferred(verify_request): """Waits for the key to become available, and then performs a verification Args: - verify_request (VerifyKeyRequest): + verify_request (VerifyJsonRequest): Returns: Deferred[None] diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index b541913d82..fc5cfb7d83 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -271,6 +271,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): p.sender_domain, p.redacted_pdu_json, p.pdu.origin_server_ts if v.enforce_key_validity else 0, + p.pdu.event_id, ) for p in pdus_to_check_sender ] @@ -306,6 +307,7 @@ def _check_sigs_on_pdus(keyring, room_version, pdus): get_domain_from_id(p.pdu.event_id), p.redacted_pdu_json, p.pdu.origin_server_ts if v.enforce_key_validity else 0, + p.pdu.event_id, ) for p in pdus_to_check_event_id ] diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 0db8858cf1..949a5fb2aa 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -140,7 +140,9 @@ class Authenticator(object): 401, "Missing Authorization headers", Codes.UNAUTHORIZED, ) - yield self.keyring.verify_json_for_server(origin, json_request, now) + yield self.keyring.verify_json_for_server( + origin, json_request, now, "Incoming request" + ) logger.info("Request from %s", origin) request.authenticated_entity = origin diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index fa6b641ee1..e5dda1975f 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -101,7 +101,9 @@ class GroupAttestationSigning(object): if valid_until_ms < now: raise SynapseError(400, "Attestation expired") - yield self.keyring.verify_json_for_server(server_name, attestation, now) + yield self.keyring.verify_json_for_server( + server_name, attestation, now, "Group attestation" + ) def create_attestation(self, group_id, user_id): """Create an attestation for the group_id and user_id with default diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index 4cff7e36c8..18121f4f6c 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -134,7 +134,7 @@ class KeyringTestCase(unittest.HomeserverTestCase): context_11.request = "11" res_deferreds = kr.verify_json_objects_for_server( - [("server10", json1, 0), ("server11", {}, 0)] + [("server10", json1, 0, "test10"), ("server11", {}, 0, "test11")] ) # the unsigned json should be rejected pretty quickly @@ -171,7 +171,7 @@ class KeyringTestCase(unittest.HomeserverTestCase): self.http_client.post_json.return_value = defer.Deferred() res_deferreds_2 = kr.verify_json_objects_for_server( - [("server10", json1, 0)] + [("server10", json1, 0, "test")] ) res_deferreds_2[0].addBoth(self.check_context, None) yield logcontext.make_deferred_yieldable(res_deferreds_2[0]) @@ -205,11 +205,11 @@ class KeyringTestCase(unittest.HomeserverTestCase): signedjson.sign.sign_json(json1, "server9", key1) # should fail immediately on an unsigned object - d = _verify_json_for_server(kr, "server9", {}, 0) + d = _verify_json_for_server(kr, "server9", {}, 0, "test unsigned") self.failureResultOf(d, SynapseError) # should suceed on a signed object - d = _verify_json_for_server(kr, "server9", json1, 500) + d = _verify_json_for_server(kr, "server9", json1, 500, "test signed") # self.assertFalse(d.called) self.get_success(d) @@ -239,7 +239,7 @@ class KeyringTestCase(unittest.HomeserverTestCase): # the first request should succeed; the second should fail because the key # has expired results = kr.verify_json_objects_for_server( - [("server1", json1, 500), ("server1", json1, 1500)] + [("server1", json1, 500, "test1"), ("server1", json1, 1500, "test2")] ) self.assertEqual(len(results), 2) self.get_success(results[0]) @@ -284,7 +284,7 @@ class KeyringTestCase(unittest.HomeserverTestCase): signedjson.sign.sign_json(json1, "server1", key1) results = kr.verify_json_objects_for_server( - [("server1", json1, 1200), ("server1", json1, 1500)] + [("server1", json1, 1200, "test1"), ("server1", json1, 1500, "test2")] ) self.assertEqual(len(results), 2) self.get_success(results[0]) @@ -522,16 +522,14 @@ def run_in_context(f, *args, **kwargs): defer.returnValue(rv) -def _verify_json_for_server(keyring, server_name, json_object, validity_time): +def _verify_json_for_server(kr, *args): """thin wrapper around verify_json_for_server which makes sure it is wrapped with the patched defer.inlineCallbacks. """ @defer.inlineCallbacks def v(): - rv1 = yield keyring.verify_json_for_server( - server_name, json_object, validity_time - ) + rv1 = yield kr.verify_json_for_server(*args) defer.returnValue(rv1) return run_in_context(v) -- cgit 1.5.1 From 2d1d7b7e6f2bec3b96b0d23993369ce46aad4f32 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 6 Jun 2019 23:54:00 +0100 Subject: Prevent multiple device list updates from breaking a batch send (#5156) fixes #5153 --- changelog.d/5156.bugfix | 1 + synapse/federation/sender/per_destination_queue.py | 5 +- synapse/storage/devices.py | 152 +++++++++++++++++---- tests/storage/test_devices.py | 69 ++++++++++ 4 files changed, 196 insertions(+), 31 deletions(-) create mode 100644 changelog.d/5156.bugfix (limited to 'synapse/federation') diff --git a/changelog.d/5156.bugfix b/changelog.d/5156.bugfix new file mode 100644 index 0000000000..e8aa7d8241 --- /dev/null +++ b/changelog.d/5156.bugfix @@ -0,0 +1 @@ +Prevent federation device list updates breaking when processing multiple updates at once. \ No newline at end of file diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index fae8bea392..564c57203d 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -349,9 +349,10 @@ class PerDestinationQueue(object): @defer.inlineCallbacks def _get_new_device_messages(self, limit): last_device_list = self._last_device_list_stream_id - # Will return at most 20 entries + + # Retrieve list of new device updates to send to the destination now_stream_id, results = yield self._store.get_devices_by_remote( - self._destination, last_device_list + self._destination, last_device_list, limit=limit, ) edus = [ Edu( diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index fd869b934c..d102e07372 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from six import iteritems, itervalues +from six import iteritems from canonicaljson import json @@ -72,11 +72,14 @@ class DeviceWorkerStore(SQLBaseStore): defer.returnValue({d["device_id"]: d for d in devices}) - def get_devices_by_remote(self, destination, from_stream_id): + @defer.inlineCallbacks + def get_devices_by_remote(self, destination, from_stream_id, limit): """Get stream of updates to send to remote servers Returns: - (int, list[dict]): current stream id and list of updates + Deferred[tuple[int, list[dict]]]: + current stream id (ie, the stream id of the last update included in the + response), and the list of updates """ now_stream_id = self._device_list_id_gen.get_current_token() @@ -84,55 +87,131 @@ class DeviceWorkerStore(SQLBaseStore): destination, int(from_stream_id) ) if not has_changed: - return (now_stream_id, []) - - return self.runInteraction( + defer.returnValue((now_stream_id, [])) + + # We retrieve n+1 devices from the list of outbound pokes where n is + # our outbound device update limit. We then check if the very last + # device has the same stream_id as the second-to-last device. If so, + # then we ignore all devices with that stream_id and only send the + # devices with a lower stream_id. + # + # If when culling the list we end up with no devices afterwards, we + # consider the device update to be too large, and simply skip the + # stream_id; the rationale being that such a large device list update + # is likely an error. + updates = yield self.runInteraction( "get_devices_by_remote", self._get_devices_by_remote_txn, destination, from_stream_id, now_stream_id, + limit + 1, ) + # Return an empty list if there are no updates + if not updates: + defer.returnValue((now_stream_id, [])) + + # if we have exceeded the limit, we need to exclude any results with the + # same stream_id as the last row. + if len(updates) > limit: + stream_id_cutoff = updates[-1][2] + now_stream_id = stream_id_cutoff - 1 + else: + stream_id_cutoff = None + + # Perform the equivalent of a GROUP BY + # + # Iterate through the updates list and copy non-duplicate + # (user_id, device_id) entries into a map, with the value being + # the max stream_id across each set of duplicate entries + # + # maps (user_id, device_id) -> stream_id + # as long as their stream_id does not match that of the last row + query_map = {} + for update in updates: + if stream_id_cutoff is not None and update[2] >= stream_id_cutoff: + # Stop processing updates + break + + key = (update[0], update[1]) + query_map[key] = max(query_map.get(key, 0), update[2]) + + # If we didn't find any updates with a stream_id lower than the cutoff, it + # means that there are more than limit updates all of which have the same + # steam_id. + + # That should only happen if a client is spamming the server with new + # devices, in which case E2E isn't going to work well anyway. We'll just + # skip that stream_id and return an empty list, and continue with the next + # stream_id next time. + if not query_map: + defer.returnValue((stream_id_cutoff, [])) + + results = yield self._get_device_update_edus_by_remote( + destination, + from_stream_id, + query_map, + ) + + defer.returnValue((now_stream_id, results)) + def _get_devices_by_remote_txn( - self, txn, destination, from_stream_id, now_stream_id + self, txn, destination, from_stream_id, now_stream_id, limit ): + """Return device update information for a given remote destination + + Args: + txn (LoggingTransaction): The transaction to execute + destination (str): The host the device updates are intended for + from_stream_id (int): The minimum stream_id to filter updates by, exclusive + now_stream_id (int): The maximum stream_id to filter updates by, inclusive + limit (int): Maximum number of device updates to return + + Returns: + List: List of device updates + """ sql = """ - SELECT user_id, device_id, max(stream_id) FROM device_lists_outbound_pokes + SELECT user_id, device_id, stream_id FROM device_lists_outbound_pokes WHERE destination = ? AND ? < stream_id AND stream_id <= ? AND sent = ? - GROUP BY user_id, device_id - LIMIT 20 + ORDER BY stream_id + LIMIT ? """ - txn.execute(sql, (destination, from_stream_id, now_stream_id, False)) + txn.execute(sql, (destination, from_stream_id, now_stream_id, False, limit)) - # maps (user_id, device_id) -> stream_id - query_map = {(r[0], r[1]): r[2] for r in txn} - if not query_map: - return (now_stream_id, []) + return list(txn) - if len(query_map) >= 20: - now_stream_id = max(stream_id for stream_id in itervalues(query_map)) + @defer.inlineCallbacks + def _get_device_update_edus_by_remote( + self, destination, from_stream_id, query_map, + ): + """Returns a list of device update EDUs as well as E2EE keys - devices = self._get_e2e_device_keys_txn( - txn, + Args: + destination (str): The host the device updates are intended for + from_stream_id (int): The minimum stream_id to filter updates by, exclusive + query_map (Dict[(str, str): int]): Dictionary mapping + user_id/device_id to update stream_id + + Returns: + List[Dict]: List of objects representing an device update EDU + + """ + devices = yield self.runInteraction( + "_get_e2e_device_keys_txn", + self._get_e2e_device_keys_txn, query_map.keys(), include_all_devices=True, include_deleted_devices=True, ) - prev_sent_id_sql = """ - SELECT coalesce(max(stream_id), 0) as stream_id - FROM device_lists_outbound_last_success - WHERE destination = ? AND user_id = ? AND stream_id <= ? - """ - results = [] for user_id, user_devices in iteritems(devices): # The prev_id for the first row is always the last row before # `from_stream_id` - txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) - rows = txn.fetchall() - prev_id = rows[0][0] + prev_id = yield self._get_last_device_update_for_remote_user( + destination, user_id, from_stream_id, + ) for device_id, device in iteritems(user_devices): stream_id = query_map[(user_id, device_id)] result = { @@ -156,7 +235,22 @@ class DeviceWorkerStore(SQLBaseStore): results.append(result) - return (now_stream_id, results) + defer.returnValue(results) + + def _get_last_device_update_for_remote_user( + self, destination, user_id, from_stream_id, + ): + def f(txn): + prev_sent_id_sql = """ + SELECT coalesce(max(stream_id), 0) as stream_id + FROM device_lists_outbound_last_success + WHERE destination = ? AND user_id = ? AND stream_id <= ? + """ + txn.execute(prev_sent_id_sql, (destination, user_id, from_stream_id)) + rows = txn.fetchall() + return rows[0][0] + + return self.runInteraction("get_last_device_update_for_remote_user", f) def mark_as_sent_devices_by_remote(self, destination, stream_id): """Mark that updates have successfully been sent to the destination. diff --git a/tests/storage/test_devices.py b/tests/storage/test_devices.py index aef4dfaf57..6396ccddb5 100644 --- a/tests/storage/test_devices.py +++ b/tests/storage/test_devices.py @@ -71,6 +71,75 @@ class DeviceStoreTestCase(tests.unittest.TestCase): res["device2"], ) + @defer.inlineCallbacks + def test_get_devices_by_remote(self): + device_ids = ["device_id1", "device_id2"] + + # Add two device updates with a single stream_id + yield self.store.add_device_change_to_streams( + "user_id", device_ids, ["somehost"], + ) + + # Get all device updates ever meant for this remote + now_stream_id, device_updates = yield self.store.get_devices_by_remote( + "somehost", -1, limit=100, + ) + + # Check original device_ids are contained within these updates + self._check_devices_in_updates(device_ids, device_updates) + + @defer.inlineCallbacks + def test_get_devices_by_remote_limited(self): + # Test breaking the update limit in 1, 101, and 1 device_id segments + + # first add one device + device_ids1 = ["device_id0"] + yield self.store.add_device_change_to_streams( + "user_id", device_ids1, ["someotherhost"], + ) + + # then add 101 + device_ids2 = ["device_id" + str(i + 1) for i in range(101)] + yield self.store.add_device_change_to_streams( + "user_id", device_ids2, ["someotherhost"], + ) + + # then one more + device_ids3 = ["newdevice"] + yield self.store.add_device_change_to_streams( + "user_id", device_ids3, ["someotherhost"], + ) + + # + # now read them back. + # + + # first we should get a single update + now_stream_id, device_updates = yield self.store.get_devices_by_remote( + "someotherhost", -1, limit=100, + ) + self._check_devices_in_updates(device_ids1, device_updates) + + # Then we should get an empty list back as the 101 devices broke the limit + now_stream_id, device_updates = yield self.store.get_devices_by_remote( + "someotherhost", now_stream_id, limit=100, + ) + self.assertEqual(len(device_updates), 0) + + # The 101 devices should've been cleared, so we should now just get one device + # update + now_stream_id, device_updates = yield self.store.get_devices_by_remote( + "someotherhost", now_stream_id, limit=100, + ) + self._check_devices_in_updates(device_ids3, device_updates) + + def _check_devices_in_updates(self, expected_device_ids, device_updates): + """Check that an specific device ids exist in a list of device update EDUs""" + self.assertEqual(len(device_updates), len(expected_device_ids)) + + received_device_ids = {update["device_id"] for update in device_updates} + self.assertEqual(received_device_ids, set(expected_device_ids)) + @defer.inlineCallbacks def test_update_device(self): yield self.store.store_device("user_id", "device_id", "display_name 1") -- cgit 1.5.1 From 5c15039e065d710459dac9e558c8ec94edf7b6c4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 13 Jun 2019 13:52:08 +0100 Subject: Clean up code for sending federation EDUs. (#5381) This code confused the hell out of me today. Split _get_new_device_messages into its two (unrelated) parts. --- changelog.d/5381.misc | 1 + synapse/federation/sender/per_destination_queue.py | 40 ++++++++++++++-------- 2 files changed, 27 insertions(+), 14 deletions(-) create mode 100644 changelog.d/5381.misc (limited to 'synapse/federation') diff --git a/changelog.d/5381.misc b/changelog.d/5381.misc new file mode 100644 index 0000000000..bbf70a0445 --- /dev/null +++ b/changelog.d/5381.misc @@ -0,0 +1 @@ +Clean up code for sending federation EDUs. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 564c57203d..22a2735405 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -189,11 +189,21 @@ class PerDestinationQueue(object): pending_pdus = [] while True: - device_message_edus, device_stream_id, dev_list_id = ( - # We have to keep 2 free slots for presence and rr_edus - yield self._get_new_device_messages(MAX_EDUS_PER_TRANSACTION - 2) + # We have to keep 2 free slots for presence and rr_edus + limit = MAX_EDUS_PER_TRANSACTION - 2 + + device_update_edus, dev_list_id = ( + yield self._get_device_update_edus(limit) + ) + + limit -= len(device_update_edus) + + to_device_edus, device_stream_id = ( + yield self._get_to_device_message_edus(limit) ) + pending_edus = device_update_edus + to_device_edus + # BEGIN CRITICAL SECTION # # In order to avoid a race condition, we need to make sure that @@ -208,10 +218,6 @@ class PerDestinationQueue(object): # We can only include at most 50 PDUs per transactions pending_pdus, self._pending_pdus = pending_pdus[:50], pending_pdus[50:] - pending_edus = [] - - # We can only include at most 100 EDUs per transactions - # rr_edus and pending_presence take at most one slot each pending_edus.extend(self._get_rr_edus(force_flush=False)) pending_presence = self._pending_presence self._pending_presence = {} @@ -232,7 +238,6 @@ class PerDestinationQueue(object): ) ) - pending_edus.extend(device_message_edus) pending_edus.extend( self._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus)) ) @@ -272,10 +277,13 @@ class PerDestinationQueue(object): sent_edus_by_type.labels(edu.edu_type).inc() # Remove the acknowledged device messages from the database # Only bother if we actually sent some device messages - if device_message_edus: + if to_device_edus: yield self._store.delete_device_msgs_for_remote( self._destination, device_stream_id ) + + # also mark the device updates as sent + if device_update_edus: logger.info( "Marking as sent %r %r", self._destination, dev_list_id ) @@ -347,7 +355,7 @@ class PerDestinationQueue(object): return pending_edus @defer.inlineCallbacks - def _get_new_device_messages(self, limit): + def _get_device_update_edus(self, limit): last_device_list = self._last_device_list_stream_id # Retrieve list of new device updates to send to the destination @@ -366,15 +374,19 @@ class PerDestinationQueue(object): assert len(edus) <= limit, "get_devices_by_remote returned too many EDUs" + defer.returnValue((edus, now_stream_id)) + + @defer.inlineCallbacks + def _get_to_device_message_edus(self, limit): last_device_stream_id = self._last_device_stream_id to_device_stream_id = self._store.get_to_device_stream_token() contents, stream_id = yield self._store.get_new_device_msgs_for_remote( self._destination, last_device_stream_id, to_device_stream_id, - limit - len(edus), + limit, ) - edus.extend( + edus = [ Edu( origin=self._server_name, destination=self._destination, @@ -382,6 +394,6 @@ class PerDestinationQueue(object): content=content, ) for content in contents - ) + ] - defer.returnValue((edus, stream_id, now_stream_id)) + defer.returnValue((edus, stream_id)) -- cgit 1.5.1