diff options
-rw-r--r-- | changelog.d/5328.misc | 1 | ||||
-rw-r--r-- | changelog.d/5332.misc | 1 | ||||
-rw-r--r-- | changelog.d/5333.bugfix | 1 | ||||
-rw-r--r-- | changelog.d/5334.bugfix | 1 | ||||
-rw-r--r-- | synapse/crypto/keyring.py | 82 | ||||
-rw-r--r-- | synapse/http/matrixfederationclient.py | 71 | ||||
-rw-r--r-- | synapse/rest/client/v1/voip.py | 1 | ||||
-rw-r--r-- | synapse/rest/key/v2/remote_key_resource.py | 12 | ||||
-rw-r--r-- | tests/crypto/test_keyring.py | 12 |
9 files changed, 125 insertions, 57 deletions
diff --git a/changelog.d/5328.misc b/changelog.d/5328.misc new file mode 100644 index 0000000000..e1b9dc58a3 --- /dev/null +++ b/changelog.d/5328.misc @@ -0,0 +1 @@ +The base classes for the v1 and v2_alpha REST APIs have been unified. diff --git a/changelog.d/5332.misc b/changelog.d/5332.misc new file mode 100644 index 0000000000..dcfac4eac9 --- /dev/null +++ b/changelog.d/5332.misc @@ -0,0 +1 @@ +Improve docstrings on MatrixFederationClient. diff --git a/changelog.d/5333.bugfix b/changelog.d/5333.bugfix new file mode 100644 index 0000000000..cb05a6dd63 --- /dev/null +++ b/changelog.d/5333.bugfix @@ -0,0 +1 @@ +Fix various problems which made the signing-key notary server time out for some requests. \ No newline at end of file diff --git a/changelog.d/5334.bugfix b/changelog.d/5334.bugfix new file mode 100644 index 0000000000..ed141e0918 --- /dev/null +++ b/changelog.d/5334.bugfix @@ -0,0 +1 @@ +Fix bug which would make certain operations (such as room joins) block for 20 minutes while attemoting to fetch verification keys. diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index cdec06c88e..0fd15287e7 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -46,6 +46,7 @@ from synapse.api.errors import ( ) from synapse.storage.keys import FetchKeyResult from synapse.util import logcontext, unwrapFirstError +from synapse.util.async_helpers import yieldable_gather_results from synapse.util.logcontext import ( LoggingContext, PreserveLoggingContext, @@ -169,7 +170,12 @@ class Keyring(object): ) ) - logger.debug("Verifying for %s with key_ids %s", server_name, key_ids) + logger.debug( + "Verifying for %s with key_ids %s, min_validity %i", + server_name, + key_ids, + validity_time, + ) # add the key request to the queue, but don't start it off yet. verify_request = VerifyKeyRequest( @@ -638,7 +644,6 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): for server_name, server_keys in keys_to_fetch.items() } }, - long_retries=True, ) except (NotRetryingDestination, RequestSendFailed) as e: raise_from(KeyLookupError("Failed to connect to remote server"), e) @@ -744,34 +749,50 @@ class ServerKeyFetcher(BaseV2KeyFetcher): self.clock = hs.get_clock() self.client = hs.get_http_client() - @defer.inlineCallbacks def get_keys(self, keys_to_fetch): - """see KeyFetcher.get_keys""" - # TODO make this more resilient - results = yield logcontext.make_deferred_yieldable( - defer.gatherResults( - [ - run_in_background( - self.get_server_verify_key_v2_direct, - server_name, - server_keys.keys(), - ) - for server_name, server_keys in keys_to_fetch.items() - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) - ) + """ + Args: + keys_to_fetch (dict[str, iterable[str]]): + the keys to be fetched. server_name -> key_ids - merged = {} - for result in results: - merged.update(result) + Returns: + Deferred[dict[str, dict[str, synapse.storage.keys.FetchKeyResult|None]]]: + map from server_name -> key_id -> FetchKeyResult + """ + + results = {} - defer.returnValue( - {server_name: keys for server_name, keys in merged.items() if keys} + @defer.inlineCallbacks + def get_key(key_to_fetch_item): + server_name, key_ids = key_to_fetch_item + try: + keys = yield self.get_server_verify_key_v2_direct(server_name, key_ids) + results[server_name] = keys + except KeyLookupError as e: + logger.warning( + "Error looking up keys %s from %s: %s", key_ids, server_name, e + ) + except Exception: + logger.exception("Error getting keys %s from %s", key_ids, server_name) + + return yieldable_gather_results(get_key, keys_to_fetch.items()).addCallback( + lambda _: results ) @defer.inlineCallbacks def get_server_verify_key_v2_direct(self, server_name, key_ids): + """ + + Args: + server_name (str): + key_ids (iterable[str]): + + Returns: + Deferred[dict[str, FetchKeyResult]]: map from key ID to lookup result + + Raises: + KeyLookupError if there was a problem making the lookup + """ keys = {} # type: dict[str, FetchKeyResult] for requested_key_id in key_ids: @@ -786,6 +807,19 @@ class ServerKeyFetcher(BaseV2KeyFetcher): path="/_matrix/key/v2/server/" + urllib.parse.quote(requested_key_id), ignore_backoff=True, + + # we only give the remote server 10s to respond. It should be an + # easy request to handle, so if it doesn't reply within 10s, it's + # probably not going to. + # + # Furthermore, when we are acting as a notary server, we cannot + # wait all day for all of the origin servers, as the requesting + # server will otherwise time out before we can respond. + # + # (Note that get_json may make 4 attempts, so this can still take + # almost 45 seconds to fetch the headers, plus up to another 60s to + # read the response). + timeout=10000, ) except (NotRetryingDestination, RequestSendFailed) as e: raise_from(KeyLookupError("Failed to connect to remote server"), e) @@ -810,7 +844,7 @@ class ServerKeyFetcher(BaseV2KeyFetcher): ) keys.update(response_keys) - defer.returnValue({server_name: keys}) + defer.returnValue(keys) @defer.inlineCallbacks diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 8197619a78..663ea72a7a 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -285,7 +285,24 @@ class MatrixFederationHttpClient(object): request (MatrixFederationRequest): details of request to be sent timeout (int|None): number of milliseconds to wait for the response headers - (including connecting to the server). 60s by default. + (including connecting to the server), *for each attempt*. + 60s by default. + + long_retries (bool): whether to use the long retry algorithm. + + The regular retry algorithm makes 4 attempts, with intervals + [0.5s, 1s, 2s]. + + The long retry algorithm makes 11 attempts, with intervals + [4s, 16s, 60s, 60s, ...] + + Both algorithms add -20%/+40% jitter to the retry intervals. + + Note that the above intervals are *in addition* to the time spent + waiting for the request to complete (up to `timeout` ms). + + NB: the long retry algorithm takes over 20 minutes to complete, with + a default timeout of 60s! ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. @@ -566,10 +583,14 @@ class MatrixFederationHttpClient(object): the request body. This will be encoded as JSON. json_data_callback (callable): A callable returning the dict to use as the request body. - long_retries (bool): A boolean that indicates whether we should - retry for a short or long time. - timeout(int): How long to try (in ms) the destination for before - giving up. None indicates no timeout. + + long_retries (bool): whether to use the long retry algorithm. See + docs on _send_request for details. + + timeout (int|None): number of milliseconds to wait for the response headers + (including connecting to the server), *for each attempt*. + self._default_timeout (60s) by default. + ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. backoff_on_404 (bool): True if we should count a 404 response as @@ -627,15 +648,22 @@ class MatrixFederationHttpClient(object): Args: destination (str): The remote server to send the HTTP request to. + path (str): The HTTP path. + data (dict): A dict containing the data that will be used as the request body. This will be encoded as JSON. - long_retries (bool): A boolean that indicates whether we should - retry for a short or long time. - timeout(int): How long to try (in ms) the destination for before - giving up. None indicates no timeout. + + long_retries (bool): whether to use the long retry algorithm. See + docs on _send_request for details. + + timeout (int|None): number of milliseconds to wait for the response headers + (including connecting to the server), *for each attempt*. + self._default_timeout (60s) by default. + ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. + args (dict): query params Returns: Deferred[dict|list]: Succeeds when we get a 2xx HTTP response. The @@ -686,14 +714,19 @@ class MatrixFederationHttpClient(object): Args: destination (str): The remote server to send the HTTP request to. + path (str): The HTTP path. + args (dict|None): A dictionary used to create query strings, defaults to None. - timeout (int): How long to try (in ms) the destination for before - giving up. None indicates no timeout and that the request will - be retried. + + timeout (int|None): number of milliseconds to wait for the response headers + (including connecting to the server), *for each attempt*. + self._default_timeout (60s) by default. + ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. + try_trailing_slash_on_400 (bool): True if on a 400 M_UNRECOGNIZED response we should try appending a trailing slash to the end of the request. Workaround for #3622 in Synapse <= v0.99.3. @@ -742,12 +775,18 @@ class MatrixFederationHttpClient(object): destination (str): The remote server to send the HTTP request to. path (str): The HTTP path. - long_retries (bool): A boolean that indicates whether we should - retry for a short or long time. - timeout(int): How long to try (in ms) the destination for before - giving up. None indicates no timeout. + + long_retries (bool): whether to use the long retry algorithm. See + docs on _send_request for details. + + timeout (int|None): number of milliseconds to wait for the response headers + (including connecting to the server), *for each attempt*. + self._default_timeout (60s) by default. + ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. + + args (dict): query params Returns: Deferred[dict|list]: Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. diff --git a/synapse/rest/client/v1/voip.py b/synapse/rest/client/v1/voip.py index 0975df84cf..6381049210 100644 --- a/synapse/rest/client/v1/voip.py +++ b/synapse/rest/client/v1/voip.py @@ -29,6 +29,7 @@ class VoipRestServlet(RestServlet): def __init__(self, hs): super(VoipRestServlet, self).__init__() self.hs = hs + self.auth = hs.get_auth() @defer.inlineCallbacks def on_GET(self, request): diff --git a/synapse/rest/key/v2/remote_key_resource.py b/synapse/rest/key/v2/remote_key_resource.py index 21c3c807b9..8a730bbc35 100644 --- a/synapse/rest/key/v2/remote_key_resource.py +++ b/synapse/rest/key/v2/remote_key_resource.py @@ -20,7 +20,7 @@ from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET from synapse.api.errors import Codes, SynapseError -from synapse.crypto.keyring import KeyLookupError, ServerKeyFetcher +from synapse.crypto.keyring import ServerKeyFetcher from synapse.http.server import respond_with_json_bytes, wrap_json_request_handler from synapse.http.servlet import parse_integer, parse_json_object_from_request @@ -215,15 +215,7 @@ class RemoteKey(Resource): json_results.add(bytes(result["key_json"])) if cache_misses and query_remote_on_cache_miss: - for server_name, key_ids in cache_misses.items(): - try: - yield self.fetcher.get_server_verify_key_v2_direct( - server_name, key_ids - ) - except KeyLookupError as e: - logger.info("Failed to fetch key: %s", e) - except Exception: - logger.exception("Failed to get key for %r", server_name) + yield self.fetcher.get_keys(cache_misses) yield self.query_keys( request, query, query_remote_on_cache_miss=False ) diff --git a/tests/crypto/test_keyring.py b/tests/crypto/test_keyring.py index 096401938d..4cff7e36c8 100644 --- a/tests/crypto/test_keyring.py +++ b/tests/crypto/test_keyring.py @@ -25,11 +25,7 @@ from twisted.internet import defer from synapse.api.errors import SynapseError from synapse.crypto import keyring -from synapse.crypto.keyring import ( - KeyLookupError, - PerspectivesKeyFetcher, - ServerKeyFetcher, -) +from synapse.crypto.keyring import PerspectivesKeyFetcher, ServerKeyFetcher from synapse.storage.keys import FetchKeyResult from synapse.util import logcontext from synapse.util.logcontext import LoggingContext @@ -364,9 +360,11 @@ class ServerKeyFetcherTestCase(unittest.HomeserverTestCase): bytes(res["key_json"]), canonicaljson.encode_canonical_json(response) ) - # change the server name: it should cause a rejection + # change the server name: the result should be ignored response["server_name"] = "OTHER_SERVER" - self.get_failure(fetcher.get_keys(keys_to_fetch), KeyLookupError) + + keys = self.get_success(fetcher.get_keys(keys_to_fetch)) + self.assertEqual(keys, {}) class PerspectivesKeyFetcherTestCase(unittest.HomeserverTestCase): |