Immediately retry any requests that have backed off when a server comes back online. (#12500)

Otherwise it can take up to a minute for any in-flight `/send` requests to be retried.
author: Erik Johnston <erik@matrix.org> 2022-05-10 10:39:54 +0100
committer: GitHub <noreply@github.com> 2022-05-10 10:39:54 +0100
commit: 8dd3e0e084304dfc02ff072a1beaed5266cf4e33 (patch)
tree: b546dc87747960e269c98276115f18c4842e2d92 /synapse/util/retryutils.py
parent: Implement MSC3786: Add a default push rule to ignore m.room.server_acl events... (diff)
download: synapse-8dd3e0e084304dfc02ff072a1beaed5266cf4e33.tar.xz
1 files changed, 23 insertions, 1 deletions
diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py
index d81f2527d7..81bfed268e 100644
--- a/synapse/util/retryutils.py
+++ b/synapse/util/retryutils.py
@@ -14,13 +14,17 @@
 import logging
 import random
 from types import TracebackType
-from typing import Any, Optional, Type
+from typing import TYPE_CHECKING, Any, Optional, Type
 
 import synapse.logging.context
 from synapse.api.errors import CodeMessageException
 from synapse.storage import DataStore
 from synapse.util import Clock
 
+if TYPE_CHECKING:
+    from synapse.notifier import Notifier
+    from synapse.replication.tcp.handler import ReplicationCommandHandler
+
 logger = logging.getLogger(__name__)
 
 # the initial backoff, after the first transaction fails
@@ -131,6 +135,8 @@ class RetryDestinationLimiter:
         retry_interval: int,
         backoff_on_404: bool = False,
         backoff_on_failure: bool = True,
+        notifier: Optional["Notifier"] = None,
+        replication_client: Optional["ReplicationCommandHandler"] = None,
     ):
         """Marks the destination as "down" if an exception is thrown in the
         context, except for CodeMessageException with code < 500.
@@ -160,6 +166,9 @@ class RetryDestinationLimiter:
         self.backoff_on_404 = backoff_on_404
         self.backoff_on_failure = backoff_on_failure
 
+        self.notifier = notifier
+        self.replication_client = replication_client
+
     def __enter__(self) -> None:
         pass
 
@@ -239,6 +248,19 @@ class RetryDestinationLimiter:
                     retry_last_ts,
                     self.retry_interval,
                 )
+
+                if self.notifier:
+                    # Inform the relevant places that the remote server is back up.
+                    self.notifier.notify_remote_server_up(self.destination)
+
+                if self.replication_client:
+                    # If we're on a worker we try and inform master about this. The
+                    # replication client doesn't hook into the notifier to avoid
+                    # infinite loops where we send a `REMOTE_SERVER_UP` command to
+                    # master, which then echoes it back to us which in turn pokes
+                    # the notifier.
+                    self.replication_client.send_remote_server_up(self.destination)
+
             except Exception:
                 logger.exception("Failed to store destination_retry_timings")
author	Erik Johnston <erik@matrix.org>	2022-05-10 10:39:54 +0100
committer	GitHub <noreply@github.com>	2022-05-10 10:39:54 +0100
commit	8dd3e0e084304dfc02ff072a1beaed5266cf4e33 (patch)
tree	b546dc87747960e269c98276115f18c4842e2d92 /synapse/util/retryutils.py
parent	Implement MSC3786: Add a default push rule to ignore m.room.server_acl events... (diff)
download	synapse-8dd3e0e084304dfc02ff072a1beaed5266cf4e33.tar.xz