summary refs log tree commit diff
path: root/synapse/util
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2019-09-17 11:41:54 +0100
committerGitHub <noreply@github.com>2019-09-17 11:41:54 +0100
commit1e19ce00bff8d67168d39201cdf9424f7b2f22f6 (patch)
tree7258ff60d38719b8c3dcda2249e7ce8169d5417f /synapse/util
parentFix well-known lookups with the federation certificate whitelist (#5997) (diff)
downloadsynapse-1e19ce00bff8d67168d39201cdf9424f7b2f22f6.tar.xz
Add 'failure_ts' column to 'destinations' table (#6016)
Track the time that a server started failing at, for general analysis purposes.
Diffstat (limited to 'synapse/util')
-rw-r--r--synapse/util/retryutils.py16
1 files changed, 15 insertions, 1 deletions
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index b740913b58..a5f2fbef5c 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -80,11 +80,13 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs)
             # We aren't ready to retry that destination.
             raise
     """
+    failure_ts = None
     retry_last_ts, retry_interval = (0, 0)
 
     retry_timings = yield store.get_destination_retry_timings(destination)
 
     if retry_timings:
+        failure_ts = retry_timings["failure_ts"]
         retry_last_ts, retry_interval = (
             retry_timings["retry_last_ts"],
             retry_timings["retry_interval"],
@@ -108,6 +110,7 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False, **kwargs)
         destination,
         clock,
         store,
+        failure_ts,
         retry_interval,
         backoff_on_failure=backoff_on_failure,
         **kwargs
@@ -120,6 +123,7 @@ class RetryDestinationLimiter(object):
         destination,
         clock,
         store,
+        failure_ts,
         retry_interval,
         backoff_on_404=False,
         backoff_on_failure=True,
@@ -133,6 +137,8 @@ class RetryDestinationLimiter(object):
             destination (str)
             clock (Clock)
             store (DataStore)
+            failure_ts (int|None): when this destination started failing (in ms since
+                the epoch), or zero if the last request was successful
             retry_interval (int): The next retry interval taken from the
                 database in milliseconds, or zero if the last request was
                 successful.
@@ -145,6 +151,7 @@ class RetryDestinationLimiter(object):
         self.store = store
         self.destination = destination
 
+        self.failure_ts = failure_ts
         self.retry_interval = retry_interval
         self.backoff_on_404 = backoff_on_404
         self.backoff_on_failure = backoff_on_failure
@@ -186,6 +193,7 @@ class RetryDestinationLimiter(object):
             logger.debug(
                 "Connection to %s was successful; clearing backoff", self.destination
             )
+            self.failure_ts = None
             retry_last_ts = 0
             self.retry_interval = 0
         elif not self.backoff_on_failure:
@@ -211,11 +219,17 @@ class RetryDestinationLimiter(object):
             )
             retry_last_ts = int(self.clock.time_msec())
 
+            if self.failure_ts is None:
+                self.failure_ts = retry_last_ts
+
         @defer.inlineCallbacks
         def store_retry_timings():
             try:
                 yield self.store.set_destination_retry_timings(
-                    self.destination, retry_last_ts, self.retry_interval
+                    self.destination,
+                    self.failure_ts,
+                    retry_last_ts,
+                    self.retry_interval,
                 )
             except Exception:
                 logger.exception("Failed to store destination_retry_timings")