From 9255a6cb17716c022ebae1dbe9c142b78ca86ea7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 27 Apr 2018 11:07:40 +0100 Subject: Improve exception handling for background processes There were a bunch of places where we fire off a process to happen in the background, but don't have any exception handling on it - instead relying on the unhandled error being logged when the relevent deferred gets garbage-collected. This is unsatisfactory for a number of reasons: - logging on garbage collection is best-effort and may happen some time after the error, if at all - it can be hard to figure out where the error actually happened. - it is logged as a scary CRITICAL error which (a) I always forget to grep for and (b) it's not really CRITICAL if a background process we don't care about fails. So this is an attempt to add exception handling to everything we fire off into the background. --- synapse/handlers/message.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'synapse/handlers/message.py') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21628a8540..d168ff5b86 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -857,15 +857,25 @@ class EventCreationHandler(object): @defer.inlineCallbacks def _notify(): yield run_on_reactor() - self.notifier.on_new_room_event( - event, event_stream_id, max_stream_id, - extra_users=extra_users - ) + try: + self.notifier.on_new_room_event( + event, event_stream_id, max_stream_id, + extra_users=extra_users + ) + except Exception: + logger.exception("Error notifying about new room event") preserve_fn(_notify)() if event.type == EventTypes.Message: - presence = self.hs.get_presence_handler() # We don't want to block sending messages on any presence code. This # matters as sometimes presence code can take a while. - preserve_fn(presence.bump_presence_active_time)(requester.user) + run_in_background(self._bump_active_time, requester.user) + + @defer.inlineCallbacks + def _bump_active_time(self, user): + try: + presence = self.hs.get_presence_handler() + yield presence.bump_presence_active_time(user) + except Exception: + logger.exception("Error bumping presence active time") -- cgit 1.4.1 From 6493b22b42685a6cc06c1113196d305e4d52deed Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 27 Apr 2018 11:40:06 +0100 Subject: reraise exceptions more carefully We need to be careful (under python 2, at least) that when we reraise an exception after doing some error handling, we actually reraise the original exception rather than anything that might have been raised (and handled) during the error handling. --- synapse/handlers/federation.py | 16 ++++++++++------ synapse/handlers/message.py | 21 ++++++++++++++------- 2 files changed, 24 insertions(+), 13 deletions(-) (limited to 'synapse/handlers/message.py') diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ae7e0d6da2..260df025f9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -19,9 +19,11 @@ import httplib import itertools import logging +import sys from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json +import six from twisted.internet import defer from unpaddedbase64 import decode_base64 @@ -1513,12 +1515,14 @@ class FederationHandler(BaseHandler): backfilled=backfilled, ) except: # noqa: E722, as we reraise the exception this is fine. - # Ensure that we actually remove the entries in the push actions - # staging area - logcontext.preserve_fn( - self.store.remove_push_actions_from_staging - )(event.event_id) - raise + tp, value, tb = sys.exc_info() + + logcontext.run_in_background( + self.store.remove_push_actions_from_staging, + event.event_id, + ) + + six.reraise(tp, value, tb) if not backfilled: # this intentionally does not yield: we don't care about the result diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 21628a8540..8e2e44bdcd 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -13,6 +13,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging +import simplejson +import sys + +from canonicaljson import encode_canonical_json +import six from twisted.internet import defer, reactor from twisted.python.failure import Failure @@ -34,11 +40,6 @@ from synapse.replication.http.send_event import send_event_to_master from ._base import BaseHandler -from canonicaljson import encode_canonical_json - -import logging -import simplejson - logger = logging.getLogger(__name__) @@ -729,8 +730,14 @@ class EventCreationHandler(object): except: # noqa: E722, as we reraise the exception this is fine. # Ensure that we actually remove the entries in the push actions # staging area, if we calculated them. - preserve_fn(self.store.remove_push_actions_from_staging)(event.event_id) - raise + tp, value, tb = sys.exc_info() + + run_in_background( + self.store.remove_push_actions_from_staging, + event.event_id, + ) + + six.reraise(tp, value, tb) @defer.inlineCallbacks def persist_and_notify_client_event( -- cgit 1.4.1