summary refs log tree commit diff
path: root/synapse/util/retryutils.py
diff options
context:
space:
mode:
authorMathieu Velten <mathieuv@matrix.org>2023-08-03 20:36:55 +0200
committerGitHub <noreply@github.com>2023-08-03 14:36:55 -0400
commitf0a860908ba0309c89c9dba452d99b4f9c6928f7 (patch)
tree0246657c2956d574bbd57dcaae6965f41f9ac3de /synapse/util/retryutils.py
parentAllow modules to check whether the current worker is configured to run backgr... (diff)
downloadsynapse-f0a860908ba0309c89c9dba452d99b4f9c6928f7.tar.xz
Allow config of the backoff algorithm for the federation client. (#15754)
Adds three new configuration variables:

* destination_min_retry_interval is identical to before (10mn).
* destination_retry_multiplier is now 2 instead of 5, the maximum value will
  be reached slower.
* destination_max_retry_interval is one day instead of (essentially) infinity.

Capping this will cause destinations to continue to be retried sometimes instead
of being lost forever. The previous value was 2 ^ 62 milliseconds.
Diffstat (limited to 'synapse/util/retryutils.py')
-rw-r--r--synapse/util/retryutils.py29
1 files changed, 16 insertions, 13 deletions
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index dcc037b982..27e9fc976c 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -27,15 +27,6 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-# the initial backoff, after the first transaction fails
-MIN_RETRY_INTERVAL = 10 * 60 * 1000
-
-# how much we multiply the backoff by after each subsequent fail
-RETRY_MULTIPLIER = 5
-
-# a cap on the backoff. (Essentially none)
-MAX_RETRY_INTERVAL = 2**62
-
 
 class NotRetryingDestination(Exception):
     def __init__(self, retry_last_ts: int, retry_interval: int, destination: str):
@@ -169,6 +160,16 @@ class RetryDestinationLimiter:
         self.notifier = notifier
         self.replication_client = replication_client
 
+        self.destination_min_retry_interval_ms = (
+            self.store.hs.config.federation.destination_min_retry_interval_ms
+        )
+        self.destination_retry_multiplier = (
+            self.store.hs.config.federation.destination_retry_multiplier
+        )
+        self.destination_max_retry_interval_ms = (
+            self.store.hs.config.federation.destination_max_retry_interval_ms
+        )
+
     def __enter__(self) -> None:
         pass
 
@@ -220,13 +221,15 @@ class RetryDestinationLimiter:
             # We couldn't connect.
             if self.retry_interval:
                 self.retry_interval = int(
-                    self.retry_interval * RETRY_MULTIPLIER * random.uniform(0.8, 1.4)
+                    self.retry_interval
+                    * self.destination_retry_multiplier
+                    * random.uniform(0.8, 1.4)
                 )
 
-                if self.retry_interval >= MAX_RETRY_INTERVAL:
-                    self.retry_interval = MAX_RETRY_INTERVAL
+                if self.retry_interval >= self.destination_max_retry_interval_ms:
+                    self.retry_interval = self.destination_max_retry_interval_ms
             else:
-                self.retry_interval = MIN_RETRY_INTERVAL
+                self.retry_interval = self.destination_min_retry_interval_ms
 
             logger.info(
                 "Connection to %s was unsuccessful (%s(%s)); backoff now %i",