diff --git a/changelog.d/12504.misc b/changelog.d/12504.misc
new file mode 100644
index 0000000000..0bebaa213d
--- /dev/null
+++ b/changelog.d/12504.misc
@@ -0,0 +1 @@
+Allow for the configuration of max request retries and min/max retry delays in the matrix federation client.
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 0cf6e075ff..8426de0417 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1196,6 +1196,32 @@ Example configuration:
allow_device_name_lookup_over_federation: true
```
---
+### `federation`
+
+The federation section defines some sub-options related to federation.
+
+The following options are related to configuring timeout and retry logic for one request,
+independently of the others.
+Short retry algorithm is used when something or someone will wait for the request to have an
+answer, while long retry is used for requests that happen in the background,
+like sending a federation transaction.
+
+* `client_timeout`: timeout for the federation requests in seconds. Default to 60s.
+* `max_short_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 2s.
+* `max_long_retry_delay`: maximum delay to be used for the short retry algo in seconds. Default to 60s.
+* `max_short_retries`: maximum number of retries for the short retry algo. Default to 3 attempts.
+* `max_long_retries`: maximum number of retries for the long retry algo. Default to 10 attempts.
+
+Example configuration:
+```yaml
+federation:
+ client_timeout: 180
+ max_short_retry_delay: 7
+ max_long_retry_delay: 100
+ max_short_retries: 5
+ max_long_retries: 20
+```
+---
## Caching
Options related to caching.
diff --git a/synapse/config/federation.py b/synapse/config/federation.py
index 336fca578a..d21f7fd02a 100644
--- a/synapse/config/federation.py
+++ b/synapse/config/federation.py
@@ -22,6 +22,8 @@ class FederationConfig(Config):
section = "federation"
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
+ federation_config = config.setdefault("federation", {})
+
# FIXME: federation_domain_whitelist needs sytests
self.federation_domain_whitelist: Optional[dict] = None
federation_domain_whitelist = config.get("federation_domain_whitelist", None)
@@ -49,5 +51,13 @@ class FederationConfig(Config):
"allow_device_name_lookup_over_federation", False
)
+ # Allow for the configuration of timeout, max request retries
+ # and min/max retry delays in the matrix federation client.
+ self.client_timeout = federation_config.get("client_timeout", 60)
+ self.max_long_retry_delay = federation_config.get("max_long_retry_delay", 60)
+ self.max_short_retry_delay = federation_config.get("max_short_retry_delay", 2)
+ self.max_long_retries = federation_config.get("max_long_retries", 10)
+ self.max_short_retries = federation_config.get("max_short_retries", 3)
+
_METRICS_FOR_DOMAINS_SCHEMA = {"type": "array", "items": {"type": "string"}}
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index abb5ae5815..ed36825b67 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -95,8 +95,6 @@ incoming_responses_counter = Counter(
)
-MAX_LONG_RETRIES = 10
-MAX_SHORT_RETRIES = 3
MAXINT = sys.maxsize
@@ -406,7 +404,12 @@ class MatrixFederationHttpClient:
self.clock = hs.get_clock()
self._store = hs.get_datastores().main
self.version_string_bytes = hs.version_string.encode("ascii")
- self.default_timeout = 60
+ self.default_timeout = hs.config.federation.client_timeout
+
+ self.max_long_retry_delay = hs.config.federation.max_long_retry_delay
+ self.max_short_retry_delay = hs.config.federation.max_short_retry_delay
+ self.max_long_retries = hs.config.federation.max_long_retries
+ self.max_short_retries = hs.config.federation.max_short_retries
self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor))
@@ -583,9 +586,9 @@ class MatrixFederationHttpClient:
# XXX: Would be much nicer to retry only at the transaction-layer
# (once we have reliable transactions in place)
if long_retries:
- retries_left = MAX_LONG_RETRIES
+ retries_left = self.max_long_retries
else:
- retries_left = MAX_SHORT_RETRIES
+ retries_left = self.max_short_retries
url_bytes = request.uri
url_str = url_bytes.decode("ascii")
@@ -730,12 +733,12 @@ class MatrixFederationHttpClient:
if retries_left and not timeout:
if long_retries:
- delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
- delay = min(delay, 60)
+ delay = 4 ** (self.max_long_retries + 1 - retries_left)
+ delay = min(delay, self.max_long_retry_delay)
delay *= random.uniform(0.8, 1.4)
else:
- delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
- delay = min(delay, 2)
+ delay = 0.5 * 2 ** (self.max_short_retries - retries_left)
+ delay = min(delay, self.max_short_retry_delay)
delay *= random.uniform(0.8, 1.4)
logger.debug(
diff --git a/tests/http/test_matrixfederationclient.py b/tests/http/test_matrixfederationclient.py
index 0dfc03ce50..8565f8ac64 100644
--- a/tests/http/test_matrixfederationclient.py
+++ b/tests/http/test_matrixfederationclient.py
@@ -40,7 +40,7 @@ from synapse.server import HomeServer
from synapse.util import Clock
from tests.server import FakeTransport
-from tests.unittest import HomeserverTestCase
+from tests.unittest import HomeserverTestCase, override_config
def check_logcontext(context: LoggingContextOrSentinel) -> None:
@@ -640,3 +640,21 @@ class FederationClientTests(HomeserverTestCase):
self.cl.build_auth_headers(
b"", b"GET", b"https://example.com", destination_is=b""
)
+
+ @override_config(
+ {
+ "federation": {
+ "client_timeout": 180,
+ "max_long_retry_delay": 100,
+ "max_short_retry_delay": 7,
+ "max_long_retries": 20,
+ "max_short_retries": 5,
+ }
+ }
+ )
+ def test_configurable_retry_and_delay_values(self) -> None:
+ self.assertEqual(self.cl.default_timeout, 180)
+ self.assertEqual(self.cl.max_long_retry_delay, 100)
+ self.assertEqual(self.cl.max_short_retry_delay, 7)
+ self.assertEqual(self.cl.max_long_retries, 20)
+ self.assertEqual(self.cl.max_short_retries, 5)
|