diff options
author | Mathieu Velten <mathieuv@matrix.org> | 2023-06-21 10:41:11 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-21 10:41:11 +0200 |
commit | 496f73103df838795b0e98f8c1c7337468e41abc (patch) | |
tree | ec6d75dbb2b449420ffc5cbde0591c76fefc166c /synapse | |
parent | Merge branch 'master' into develop (diff) | |
download | synapse-496f73103df838795b0e98f8c1c7337468e41abc.tar.xz |
Allow for the configuration of max request retries and min/max retry delays in the matrix federation client (#15783)
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/config/federation.py | 16 | ||||
-rw-r--r-- | synapse/http/matrixfederationclient.py | 59 |
2 files changed, 54 insertions, 21 deletions
diff --git a/synapse/config/federation.py b/synapse/config/federation.py index 336fca578a..0e1cb8b6e3 100644 --- a/synapse/config/federation.py +++ b/synapse/config/federation.py @@ -22,6 +22,8 @@ class FederationConfig(Config): section = "federation" def read_config(self, config: JsonDict, **kwargs: Any) -> None: + federation_config = config.setdefault("federation", {}) + # FIXME: federation_domain_whitelist needs sytests self.federation_domain_whitelist: Optional[dict] = None federation_domain_whitelist = config.get("federation_domain_whitelist", None) @@ -49,5 +51,19 @@ class FederationConfig(Config): "allow_device_name_lookup_over_federation", False ) + # Allow for the configuration of timeout, max request retries + # and min/max retry delays in the matrix federation client. + self.client_timeout_ms = Config.parse_duration( + federation_config.get("client_timeout", "60s") + ) + self.max_long_retry_delay_ms = Config.parse_duration( + federation_config.get("max_long_retry_delay", "60s") + ) + self.max_short_retry_delay_ms = Config.parse_duration( + federation_config.get("max_short_retry_delay", "2s") + ) + self.max_long_retries = federation_config.get("max_long_retries", 10) + self.max_short_retries = federation_config.get("max_short_retries", 3) + _METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}} diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index fc0101808d..cc4e258b0f 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -95,8 +95,6 @@ incoming_responses_counter = Counter( ) -MAX_LONG_RETRIES = 10 -MAX_SHORT_RETRIES = 3 MAXINT = sys.maxsize @@ -413,7 +411,16 @@ class MatrixFederationHttpClient: self.clock = hs.get_clock() self._store = hs.get_datastores().main self.version_string_bytes = hs.version_string.encode("ascii") - self.default_timeout = 60 + self.default_timeout_seconds = hs.config.federation.client_timeout_ms / 1000 + + self.max_long_retry_delay_seconds = ( + hs.config.federation.max_long_retry_delay_ms / 1000 + ) + self.max_short_retry_delay_seconds = ( + hs.config.federation.max_short_retry_delay_ms / 1000 + ) + self.max_long_retries = hs.config.federation.max_long_retries + self.max_short_retries = hs.config.federation.max_short_retries self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor)) @@ -542,10 +549,10 @@ class MatrixFederationHttpClient: logger.exception(f"Invalid destination: {request.destination}.") raise FederationDeniedError(request.destination) - if timeout: + if timeout is not None: _sec_timeout = timeout / 1000 else: - _sec_timeout = self.default_timeout + _sec_timeout = self.default_timeout_seconds if ( self.hs.config.federation.federation_domain_whitelist is not None @@ -590,9 +597,9 @@ class MatrixFederationHttpClient: # XXX: Would be much nicer to retry only at the transaction-layer # (once we have reliable transactions in place) if long_retries: - retries_left = MAX_LONG_RETRIES + retries_left = self.max_long_retries else: - retries_left = MAX_SHORT_RETRIES + retries_left = self.max_short_retries url_bytes = request.uri url_str = url_bytes.decode("ascii") @@ -737,24 +744,34 @@ class MatrixFederationHttpClient: if retries_left and not timeout: if long_retries: - delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left) - delay = min(delay, 60) - delay *= random.uniform(0.8, 1.4) + delay_seconds = 4 ** ( + self.max_long_retries + 1 - retries_left + ) + delay_seconds = min( + delay_seconds, self.max_long_retry_delay_seconds + ) + delay_seconds *= random.uniform(0.8, 1.4) else: - delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left) - delay = min(delay, 2) - delay *= random.uniform(0.8, 1.4) + delay_seconds = 0.5 * 2 ** ( + self.max_short_retries - retries_left + ) + delay_seconds = min( + delay_seconds, self.max_short_retry_delay_seconds + ) + delay_seconds *= random.uniform(0.8, 1.4) logger.debug( "{%s} [%s] Waiting %ss before re-sending...", request.txn_id, request.destination, - delay, + delay_seconds, ) # Sleep for the calculated delay, or wake up immediately # if we get notified that the server is back up. - await self._sleeper.sleep(request.destination, delay * 1000) + await self._sleeper.sleep( + request.destination, delay_seconds * 1000 + ) retries_left -= 1 else: raise @@ -953,7 +970,7 @@ class MatrixFederationHttpClient: if timeout is not None: _sec_timeout = timeout / 1000 else: - _sec_timeout = self.default_timeout + _sec_timeout = self.default_timeout_seconds if parser is None: parser = cast(ByteParser[T], JsonParser()) @@ -1031,10 +1048,10 @@ class MatrixFederationHttpClient: ignore_backoff=ignore_backoff, ) - if timeout: + if timeout is not None: _sec_timeout = timeout / 1000 else: - _sec_timeout = self.default_timeout + _sec_timeout = self.default_timeout_seconds body = await _handle_response( self.reactor, _sec_timeout, request, response, start_ms, parser=JsonParser() @@ -1142,7 +1159,7 @@ class MatrixFederationHttpClient: if timeout is not None: _sec_timeout = timeout / 1000 else: - _sec_timeout = self.default_timeout + _sec_timeout = self.default_timeout_seconds if parser is None: parser = cast(ByteParser[T], JsonParser()) @@ -1218,7 +1235,7 @@ class MatrixFederationHttpClient: if timeout is not None: _sec_timeout = timeout / 1000 else: - _sec_timeout = self.default_timeout + _sec_timeout = self.default_timeout_seconds body = await _handle_response( self.reactor, _sec_timeout, request, response, start_ms, parser=JsonParser() @@ -1270,7 +1287,7 @@ class MatrixFederationHttpClient: try: d = read_body_with_max_size(response, output_stream, max_size) - d.addTimeout(self.default_timeout, self.reactor) + d.addTimeout(self.default_timeout_seconds, self.reactor) length = await make_deferred_yieldable(d) except BodyExceededMaxSize: msg = "Requested file is too large > %r bytes" % (max_size,) |