summary refs log tree commit diff
path: root/synapse/federation
diff options
context:
space:
mode:
authorRichard van der Hoff <richard@matrix.org>2018-04-27 11:07:40 +0100
committerRichard van der Hoff <richard@matrix.org>2018-04-27 11:07:40 +0100
commit9255a6cb17716c022ebae1dbe9c142b78ca86ea7 (patch)
tree185b1b00af216695daca391eb62a871e78d32e49 /synapse/federation
parentMerge pull request #3134 from matrix-org/erikj/fix_admin_media_api (diff)
downloadsynapse-9255a6cb17716c022ebae1dbe9c142b78ca86ea7.tar.xz
Improve exception handling for background processes
There were a bunch of places where we fire off a process to happen in the
background, but don't have any exception handling on it - instead relying on
the unhandled error being logged when the relevent deferred gets
garbage-collected.

This is unsatisfactory for a number of reasons:
 - logging on garbage collection is best-effort and may happen some time after
   the error, if at all
 - it can be hard to figure out where the error actually happened.
 - it is logged as a scary CRITICAL error which (a) I always forget to grep for
   and (b) it's not really CRITICAL if a background process we don't care about
   fails.

So this is an attempt to add exception handling to everything we fire off into
the background.
Diffstat (limited to 'synapse/federation')
-rw-r--r--synapse/federation/transaction_queue.py2
-rw-r--r--synapse/federation/transport/server.py13
2 files changed, 12 insertions, 3 deletions
diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 963d938edd..ded2b1871a 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -323,6 +323,8 @@ class TransactionQueue(object):
                     break
 
                 yield self._process_presence_inner(states_map.values())
+        except Exception:
+            logger.exception("Error sending presence states to servers")
         finally:
             self._processing_pending_presence = False
 
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index ff0656df3e..19d09f5422 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -25,7 +25,7 @@ from synapse.http.servlet import (
 )
 from synapse.util.ratelimitutils import FederationRateLimiter
 from synapse.util.versionstring import get_version_string
-from synapse.util.logcontext import preserve_fn
+from synapse.util.logcontext import run_in_background
 from synapse.types import ThirdPartyInstanceID, get_domain_from_id
 
 import functools
@@ -152,11 +152,18 @@ class Authenticator(object):
         # alive
         retry_timings = yield self.store.get_destination_retry_timings(origin)
         if retry_timings and retry_timings["retry_last_ts"]:
-            logger.info("Marking origin %r as up", origin)
-            preserve_fn(self.store.set_destination_retry_timings)(origin, 0, 0)
+            run_in_background(self._reset_retry_timings, origin)
 
         defer.returnValue(origin)
 
+    @defer.inlineCallbacks
+    def _reset_retry_timings(self, origin):
+        try:
+            logger.info("Marking origin %r as up", origin)
+            yield self.store.set_destination_retry_timings(origin, 0, 0)
+        except Exception:
+            logger.exception("Error resetting retry timings on %s", origin)
+
 
 class BaseFederationServlet(object):
     REQUIRE_AUTH = True