From c7273c11bc73630e3f2ab1a82d20a40d8b17f9a3 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Oct 2018 16:12:11 +0100 Subject: Give pushers their own background logcontext Each pusher has its own loop which runs for as long as it has work to do. This should run in its own background thread with its own logcontext, as other similar loops elsewhere in the system do - which means that CPU usage is consistently attributed to that loop, rather than to whatever request happened to start the loop. --- synapse/push/httppusher.py | 64 ++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 34 deletions(-) (limited to 'synapse/push/httppusher.py') diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 48abd5e4d6..5f6b21bc67 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -22,9 +22,8 @@ from prometheus_client import Counter from twisted.internet import defer from twisted.internet.error import AlreadyCalled, AlreadyCancelled +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import PusherConfigException -from synapse.util.logcontext import LoggingContext -from synapse.util.metrics import Measure from . import push_rule_evaluator, push_tools @@ -92,34 +91,30 @@ class HttpPusher(object): self.data_minus_url.update(self.data) del self.data_minus_url['url'] - @defer.inlineCallbacks def on_started(self): - try: - yield self._process() - except Exception: - logger.exception("Error starting http pusher") + self._start_processing() + return defer.succeed(None) - @defer.inlineCallbacks def on_new_notifications(self, min_stream_ordering, max_stream_ordering): self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering or 0) - yield self._process() + self._start_processing() + return defer.suceed(None) - @defer.inlineCallbacks def on_new_receipts(self, min_stream_id, max_stream_id): # Note that the min here shouldn't be relied upon to be accurate. # We could check the receipts are actually m.read receipts here, # but currently that's the only type of receipt anyway... - with LoggingContext("push.on_new_receipts"): - with Measure(self.clock, "push.on_new_receipts"): - badge = yield push_tools.get_badge_count( - self.hs.get_datastore(), self.user_id - ) - yield self._send_badge(badge) + run_as_background_process("http_pusher.on_new_receipts", self._update_badge) + return defer.succeed(None) @defer.inlineCallbacks + def _update_badge(self): + badge = yield push_tools.get_badge_count(self.hs.get_datastore(), self.user_id) + yield self._send_badge(badge) + def on_timer(self): - yield self._process() + self._start_processing() def on_stop(self): if self.timed_call: @@ -129,27 +124,28 @@ class HttpPusher(object): pass self.timed_call = None - @defer.inlineCallbacks - def _process(self): + def _start_processing(self): if self.processing: return - with LoggingContext("push._process"): - with Measure(self.clock, "push._process"): + run_as_background_process("httppush.process", self._process) + + @defer.inlineCallbacks + def _process(self): + try: + self.processing = True + # if the max ordering changes while we're running _unsafe_process, + # call it again, and so on until we've caught up. + while True: + starting_max_ordering = self.max_stream_ordering try: - self.processing = True - # if the max ordering changes while we're running _unsafe_process, - # call it again, and so on until we've caught up. - while True: - starting_max_ordering = self.max_stream_ordering - try: - yield self._unsafe_process() - except Exception: - logger.exception("Exception processing notifs") - if self.max_stream_ordering == starting_max_ordering: - break - finally: - self.processing = False + yield self._unsafe_process() + except Exception: + logger.exception("Exception processing notifs") + if self.max_stream_ordering == starting_max_ordering: + break + finally: + self.processing = False @defer.inlineCallbacks def _unsafe_process(self): -- cgit 1.4.1 From e7a16c6210191e9556fb1c11cbff2d98f0a5206c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Oct 2018 16:12:11 +0100 Subject: Remove redundant run_as_background_process() from pusherpool `on_new_notifications` and `on_new_receipts` in `HttpPusher` and `EmailPusher` now always return synchronously, so we can remove the `defer.gatherResults` on their results, and the `run_as_background_process` wrappers can be removed too because the PusherPool methods will now complete quickly enough. --- synapse/app/pusher.py | 4 ++-- synapse/handlers/federation.py | 4 ++-- synapse/handlers/message.py | 2 +- synapse/handlers/receipts.py | 2 +- synapse/push/emailpusher.py | 3 +-- synapse/push/httppusher.py | 2 -- synapse/push/pusherpool.py | 47 +++++++----------------------------------- 7 files changed, 14 insertions(+), 50 deletions(-) (limited to 'synapse/push/httppusher.py') diff --git a/synapse/app/pusher.py b/synapse/app/pusher.py index e06b70894e..83b0863f00 100644 --- a/synapse/app/pusher.py +++ b/synapse/app/pusher.py @@ -161,11 +161,11 @@ class PusherReplicationHandler(ReplicationClientHandler): else: yield self.start_pusher(row.user_id, row.app_id, row.pushkey) elif stream_name == "events": - self.pusher_pool.on_new_notifications( + yield self.pusher_pool.on_new_notifications( token, token, ) elif stream_name == "receipts": - self.pusher_pool.on_new_receipts( + yield self.pusher_pool.on_new_receipts( token, token, set(row.room_id for row in rows) ) except Exception: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cab57a8849..63e495e3f8 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2520,7 +2520,7 @@ class FederationHandler(BaseHandler): if not backfilled: # Never notify for backfilled events for event, _ in event_and_contexts: - self._notify_persisted_event(event, max_stream_id) + yield self._notify_persisted_event(event, max_stream_id) def _notify_persisted_event(self, event, max_stream_id): """Checks to see if notifier/pushers should be notified about the @@ -2553,7 +2553,7 @@ class FederationHandler(BaseHandler): extra_users=extra_users ) - self.pusher_pool.on_new_notifications( + return self.pusher_pool.on_new_notifications( event_stream_id, max_stream_id, ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4954b23a0d..6c4fcfb10a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -779,7 +779,7 @@ class EventCreationHandler(object): event, context=context ) - self.pusher_pool.on_new_notifications( + yield self.pusher_pool.on_new_notifications( event_stream_id, max_stream_id, ) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index a6f3181f09..4c2690ba26 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -119,7 +119,7 @@ class ReceiptsHandler(BaseHandler): "receipt_key", max_batch_id, rooms=affected_room_ids ) # Note that the min here shouldn't be relied upon to be accurate. - self.hs.get_pusherpool().on_new_receipts( + yield self.hs.get_pusherpool().on_new_receipts( min_batch_id, max_batch_id, affected_room_ids, ) diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index 0c9c0201e8..d5a99b838c 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -94,13 +94,12 @@ class EmailPusher(object): def on_new_notifications(self, min_stream_ordering, max_stream_ordering): self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering) self._start_processing() - return defer.succeed(None) def on_new_receipts(self, min_stream_id, max_stream_id): # We could wake up and cancel the timer but there tend to be quite a # lot of read receipts so it's probably less work to just let the # timer fire - return defer.succeed(None) + pass def on_timer(self): self.timed_call = None diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 5f6b21bc67..770f55feae 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -98,7 +98,6 @@ class HttpPusher(object): def on_new_notifications(self, min_stream_ordering, max_stream_ordering): self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering or 0) self._start_processing() - return defer.suceed(None) def on_new_receipts(self, min_stream_id, max_stream_id): # Note that the min here shouldn't be relied upon to be accurate. @@ -106,7 +105,6 @@ class HttpPusher(object): # We could check the receipts are actually m.read receipts here, # but currently that's the only type of receipt anyway... run_as_background_process("http_pusher.on_new_receipts", self._update_badge) - return defer.succeed(None) @defer.inlineCallbacks def _update_badge(self): diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index b9b68ec829..a4d1ce3aad 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -18,9 +18,8 @@ import logging from twisted.internet import defer -from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push.pusher import PusherFactory -from synapse.util.logcontext import make_deferred_yieldable, run_in_background +from synapse.util.logcontext import run_in_background logger = logging.getLogger(__name__) @@ -122,45 +121,23 @@ class PusherPool: p['app_id'], p['pushkey'], p['user_name'], ) - def on_new_notifications(self, min_stream_id, max_stream_id): - run_as_background_process( - "on_new_notifications", - self._on_new_notifications, min_stream_id, max_stream_id, - ) - @defer.inlineCallbacks - def _on_new_notifications(self, min_stream_id, max_stream_id): + def on_new_notifications(self, min_stream_id, max_stream_id): try: users_affected = yield self.store.get_push_action_users_in_range( min_stream_id, max_stream_id ) - deferreds = [] - for u in users_affected: if u in self.pushers: for p in self.pushers[u].values(): - deferreds.append( - run_in_background( - p.on_new_notifications, - min_stream_id, max_stream_id, - ) - ) - - yield make_deferred_yieldable( - defer.gatherResults(deferreds, consumeErrors=True), - ) + p.on_new_notifications(min_stream_id, max_stream_id) + except Exception: logger.exception("Exception in pusher on_new_notifications") - def on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids): - run_as_background_process( - "on_new_receipts", - self._on_new_receipts, min_stream_id, max_stream_id, affected_room_ids, - ) - @defer.inlineCallbacks - def _on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids): + def on_new_receipts(self, min_stream_id, max_stream_id, affected_room_ids): try: # Need to subtract 1 from the minimum because the lower bound here # is not inclusive @@ -170,21 +147,11 @@ class PusherPool: # This returns a tuple, user_id is at index 3 users_affected = set([r[3] for r in updated_receipts]) - deferreds = [] - for u in users_affected: if u in self.pushers: for p in self.pushers[u].values(): - deferreds.append( - run_in_background( - p.on_new_receipts, - min_stream_id, max_stream_id, - ) - ) - - yield make_deferred_yieldable( - defer.gatherResults(deferreds, consumeErrors=True), - ) + p.on_new_receipts(min_stream_id, max_stream_id) + except Exception: logger.exception("Exception in pusher on_new_receipts") -- cgit 1.4.1 From f749607c911a82e64685f1cfaeee892c49a1b606 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 22 Oct 2018 16:12:11 +0100 Subject: Make on_started synchronous too This brings it into line with on_new_notifications and on_new_receipts. It requires a little bit of hoop-jumping in EmailPusher to load the throttle params before the first loop. --- synapse/push/emailpusher.py | 15 +++++++-------- synapse/push/httppusher.py | 1 - synapse/push/pusherpool.py | 16 ++++++++++++++-- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'synapse/push/httppusher.py') diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index d5a99b838c..0bc548ae7a 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -72,16 +72,9 @@ class EmailPusher(object): self.processing = False - @defer.inlineCallbacks def on_started(self): if self.mailer is not None: - try: - self.throttle_params = yield self.store.get_throttle_params_by_room( - self.pusher_id - ) - self._start_processing() - except Exception: - logger.exception("Error starting email pusher") + self._start_processing() def on_stop(self): if self.timed_call: @@ -116,6 +109,12 @@ class EmailPusher(object): try: self.processing = True + if self.throttle_params is None: + # this is our first loop: load up the throttle params + self.throttle_params = yield self.store.get_throttle_params_by_room( + self.pusher_id + ) + # if the max ordering changes while we're running _unsafe_process, # call it again, and so on until we've caught up. while True: diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 770f55feae..33034d44da 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -93,7 +93,6 @@ class HttpPusher(object): def on_started(self): self._start_processing() - return defer.succeed(None) def on_new_notifications(self, min_stream_ordering, max_stream_ordering): self.max_stream_ordering = max(max_stream_ordering, self.max_stream_ordering or 0) diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index a4d1ce3aad..695e582dce 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -19,12 +19,24 @@ import logging from twisted.internet import defer from synapse.push.pusher import PusherFactory -from synapse.util.logcontext import run_in_background logger = logging.getLogger(__name__) class PusherPool: + """ + The pusher pool. This is responsible for dispatching notifications of new events to + the http and email pushers. + + It provides three methods which are designed to be called by the rest of the + application: `start`, `on_new_notifications`, and `on_new_receipts`: each of these + delegates to each of the relevant pushers. + + Note that it is expected that each pusher will have its own 'processing' loop which + will send out the notifications in the background, rather than blocking until the + notifications are sent; accordingly Pusher.on_started, Pusher.on_new_notifications and + Pusher.on_new_receipts are not expected to return deferreds. + """ def __init__(self, _hs): self.hs = _hs self.pusher_factory = PusherFactory(_hs) @@ -216,7 +228,7 @@ class PusherPool: if appid_pushkey in byuser: byuser[appid_pushkey].on_stop() byuser[appid_pushkey] = p - run_in_background(p.on_started) + p.on_started() @defer.inlineCallbacks def remove_pusher(self, app_id, pushkey, user_id): -- cgit 1.4.1 From e564306e315fc3dfd37e5fed495ae300fbb58c8a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 24 Oct 2018 09:23:33 +0100 Subject: sanity-check the is_processing flag ... and rename it, for even more sanity --- synapse/push/emailpusher.py | 11 +++++++---- synapse/push/httppusher.py | 11 +++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'synapse/push/httppusher.py') diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index 0bc548ae7a..f369124258 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -70,7 +70,7 @@ class EmailPusher(object): # See httppusher self.max_stream_ordering = None - self.processing = False + self._is_processing = False def on_started(self): if self.mailer is not None: @@ -99,15 +99,18 @@ class EmailPusher(object): self._start_processing() def _start_processing(self): - if self.processing: + if self._is_processing: return run_as_background_process("emailpush.process", self._process) @defer.inlineCallbacks def _process(self): + # we should never get here if we are already processing + assert not self._is_processing + try: - self.processing = True + self._is_processing = True if self.throttle_params is None: # this is our first loop: load up the throttle params @@ -126,7 +129,7 @@ class EmailPusher(object): if self.max_stream_ordering == starting_max_ordering: break finally: - self.processing = False + self._is_processing = False @defer.inlineCallbacks def _unsafe_process(self): diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 33034d44da..6bd703632d 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -60,7 +60,7 @@ class HttpPusher(object): self.backoff_delay = HttpPusher.INITIAL_BACKOFF_SEC self.failing_since = pusherdict['failing_since'] self.timed_call = None - self.processing = False + self._is_processing = False # This is the highest stream ordering we know it's safe to process. # When new events arrive, we'll be given a window of new events: we @@ -122,15 +122,18 @@ class HttpPusher(object): self.timed_call = None def _start_processing(self): - if self.processing: + if self._is_processing: return run_as_background_process("httppush.process", self._process) @defer.inlineCallbacks def _process(self): + # we should never get here if we are already processing + assert not self._is_processing + try: - self.processing = True + self._is_processing = True # if the max ordering changes while we're running _unsafe_process, # call it again, and so on until we've caught up. while True: @@ -142,7 +145,7 @@ class HttpPusher(object): if self.max_stream_ordering == starting_max_ordering: break finally: - self.processing = False + self._is_processing = False @defer.inlineCallbacks def _unsafe_process(self): -- cgit 1.4.1