summary refs log tree commit diff
diff options
context:
space:
mode:
authorreivilibre <38398653+reivilibre@users.noreply.github.com>2020-08-13 12:35:04 +0100
committerGitHub <noreply@github.com>2020-08-13 12:35:04 +0100
commitff0e8946569a2627e3b0e06aae7e7612fcc726c1 (patch)
tree909b9f2a4903f3dd3f359be5d964deb5fe4528ed
parentFix unawaited coroutine error in tests. (#8072) (diff)
downloadsynapse-ff0e8946569a2627e3b0e06aae7e7612fcc726c1.tar.xz
Drop federation transmission queues during a significant remote outage. (#7864)
* Empty federation transmission queues when we are backing off.

Fixes #7828.

Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>

* Address feedback

Signed-off-by: Olivier Wilkinson (reivilibre) <olivier@librepush.net>

* Reword newsfile
-rw-r--r--changelog.d/7864.bugfix1
-rw-r--r--synapse/federation/sender/per_destination_queue.py22
2 files changed, 23 insertions, 0 deletions
diff --git a/changelog.d/7864.bugfix b/changelog.d/7864.bugfix
new file mode 100644
index 0000000000..8623355fe9
--- /dev/null
+++ b/changelog.d/7864.bugfix
@@ -0,0 +1 @@
+Fix a memory leak by limiting the length of time that messages will be queued for a remote server that has been unreachable.
diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py
index dd150f89a6..8cbc23d901 100644
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
@@ -337,6 +337,28 @@ class PerDestinationQueue(object):
                     (e.retry_last_ts + e.retry_interval) / 1000.0
                 ),
             )
+
+            if e.retry_interval > 60 * 60 * 1000:
+                # we won't retry for another hour!
+                # (this suggests a significant outage)
+                # We drop pending PDUs and EDUs because otherwise they will
+                # rack up indefinitely.
+                # Note that:
+                # - the EDUs that are being dropped here are those that we can
+                #   afford to drop (specifically, only typing notifications,
+                #   read receipts and presence updates are being dropped here)
+                # - Other EDUs such as to_device messages are queued with a
+                #   different mechanism
+                # - this is all volatile state that would be lost if the
+                #   federation sender restarted anyway
+
+                # dropping read receipts is a bit sad but should be solved
+                # through another mechanism, because this is all volatile!
+                self._pending_pdus = []
+                self._pending_edus = []
+                self._pending_edus_keyed = {}
+                self._pending_presence = {}
+                self._pending_rrs = {}
         except FederationDeniedError as e:
             logger.info(e)
         except HttpResponseException as e: