From e5082494ebb4b880643021eb94c848b15ae16f2c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 9 Apr 2018 12:07:39 +0100 Subject: Limit concurrent event sends for a room --- synapse/handlers/message.py | 99 +++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 49 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6de6e13b7b..1814b205da 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -454,40 +454,39 @@ class EventCreationHandler(object): """ builder = self.event_builder_factory.new(event_dict) - with (yield self.limiter.queue(builder.room_id)): - self.validator.validate_new(builder) - - if builder.type == EventTypes.Member: - membership = builder.content.get("membership", None) - target = UserID.from_string(builder.state_key) - - if membership in {Membership.JOIN, Membership.INVITE}: - # If event doesn't include a display name, add one. - profile = self.profile_handler - content = builder.content - - try: - if "displayname" not in content: - content["displayname"] = yield profile.get_displayname(target) - if "avatar_url" not in content: - content["avatar_url"] = yield profile.get_avatar_url(target) - except Exception as e: - logger.info( - "Failed to get profile information for %r: %s", - target, e - ) + self.validator.validate_new(builder) + + if builder.type == EventTypes.Member: + membership = builder.content.get("membership", None) + target = UserID.from_string(builder.state_key) + + if membership in {Membership.JOIN, Membership.INVITE}: + # If event doesn't include a display name, add one. + profile = self.profile_handler + content = builder.content + + try: + if "displayname" not in content: + content["displayname"] = yield profile.get_displayname(target) + if "avatar_url" not in content: + content["avatar_url"] = yield profile.get_avatar_url(target) + except Exception as e: + logger.info( + "Failed to get profile information for %r: %s", + target, e + ) - if token_id is not None: - builder.internal_metadata.token_id = token_id + if token_id is not None: + builder.internal_metadata.token_id = token_id - if txn_id is not None: - builder.internal_metadata.txn_id = txn_id + if txn_id is not None: + builder.internal_metadata.txn_id = txn_id - event, context = yield self.create_new_client_event( - builder=builder, - requester=requester, - prev_event_ids=prev_event_ids, - ) + event, context = yield self.create_new_client_event( + builder=builder, + requester=requester, + prev_event_ids=prev_event_ids, + ) defer.returnValue((event, context)) @@ -557,27 +556,29 @@ class EventCreationHandler(object): See self.create_event and self.send_nonmember_event. """ - event, context = yield self.create_event( - requester, - event_dict, - token_id=requester.access_token_id, - txn_id=txn_id - ) - spam_error = self.spam_checker.check_event_for_spam(event) - if spam_error: - if not isinstance(spam_error, basestring): - spam_error = "Spam is not permitted here" - raise SynapseError( - 403, spam_error, Codes.FORBIDDEN + with (yield self.limiter.queue(event_dict["room_id"])): + event, context = yield self.create_event( + requester, + event_dict, + token_id=requester.access_token_id, + txn_id=txn_id ) - yield self.send_nonmember_event( - requester, - event, - context, - ratelimit=ratelimit, - ) + spam_error = self.spam_checker.check_event_for_spam(event) + if spam_error: + if not isinstance(spam_error, basestring): + spam_error = "Spam is not permitted here" + raise SynapseError( + 403, spam_error, Codes.FORBIDDEN + ) + + yield self.send_nonmember_event( + requester, + event, + context, + ratelimit=ratelimit, + ) defer.returnValue(event) @measure_func("create_new_client_event") -- cgit 1.5.1 From f8e8ec013bad0afb010d8cfda3da63e36d37fea5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 10 Apr 2018 14:00:24 +0100 Subject: Note why we're limiting concurrent event sends --- synapse/handlers/message.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'synapse/handlers') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 1814b205da..54cd691f91 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -557,6 +557,11 @@ class EventCreationHandler(object): See self.create_event and self.send_nonmember_event. """ + # We limit the number of concurrent event sends in a room so that we + # don't fork the DAG too much. If we don't limit then we can end up in + # a situation where event persistence can't keep up, causing + # extremities to pile up, which in turn leads to state resolution + # taking longer. with (yield self.limiter.queue(event_dict["room_id"])): event, context = yield self.create_event( requester, -- cgit 1.5.1 From b3384232a031cc209fb5f0e085bc073a220448be Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 10 Apr 2018 23:14:47 +0100 Subject: Add metrics for ResponseCache --- synapse/appservice/api.py | 3 ++- synapse/federation/federation_server.py | 2 +- synapse/handlers/room_list.py | 5 +++-- synapse/handlers/sync.py | 2 +- synapse/replication/http/send_event.py | 2 +- synapse/util/caches/response_cache.py | 14 +++++++++++++- 6 files changed, 21 insertions(+), 7 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 40c433d7ae..11e9c37c63 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -73,7 +73,8 @@ class ApplicationServiceApi(SimpleHttpClient): super(ApplicationServiceApi, self).__init__(hs) self.clock = hs.get_clock() - self.protocol_meta_cache = ResponseCache(hs, timeout_ms=HOUR_IN_MS) + self.protocol_meta_cache = ResponseCache(hs, "as_protocol_meta", + timeout_ms=HOUR_IN_MS) @defer.inlineCallbacks def query_user(self, service, user_id): diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index bea7fd0b71..e4ce037acf 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -65,7 +65,7 @@ class FederationServer(FederationBase): # We cache responses to state queries, as they take a while and often # come in waves. - self._state_resp_cache = ResponseCache(hs, timeout_ms=30000) + self._state_resp_cache = ResponseCache(hs, "state_resp", timeout_ms=30000) @defer.inlineCallbacks @log_function diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 5d81f59b44..8028d793c2 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -44,8 +44,9 @@ EMTPY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) class RoomListHandler(BaseHandler): def __init__(self, hs): super(RoomListHandler, self).__init__(hs) - self.response_cache = ResponseCache(hs) - self.remote_response_cache = ResponseCache(hs, timeout_ms=30 * 1000) + self.response_cache = ResponseCache(hs, "room_list") + self.remote_response_cache = ResponseCache(hs, "remote_room_list", + timeout_ms=30 * 1000) def get_local_public_room_list(self, limit=None, since_token=None, search_filter=None, diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0f713ce038..06d17ab20c 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -169,7 +169,7 @@ class SyncHandler(object): self.presence_handler = hs.get_presence_handler() self.event_sources = hs.get_event_sources() self.clock = hs.get_clock() - self.response_cache = ResponseCache(hs) + self.response_cache = ResponseCache(hs, "sync") self.state = hs.get_state_handler() def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0, diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index bbe2f967b7..c6a6551d24 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -115,7 +115,7 @@ class ReplicationSendEventRestServlet(RestServlet): self.clock = hs.get_clock() # The responses are tiny, so we may as well cache them for a while - self.response_cache = ResponseCache(hs, timeout_ms=30 * 60 * 1000) + self.response_cache = ResponseCache(hs, "send_event", timeout_ms=30 * 60 * 1000) def on_PUT(self, request, event_id): result = self.response_cache.get(event_id) diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py index 00af539880..7f68289723 100644 --- a/synapse/util/caches/response_cache.py +++ b/synapse/util/caches/response_cache.py @@ -14,6 +14,7 @@ # limitations under the License. from synapse.util.async import ObservableDeferred +from synapse.util.caches import metrics as cache_metrics class ResponseCache(object): @@ -24,17 +25,28 @@ class ResponseCache(object): used rather than trying to compute a new response. """ - def __init__(self, hs, timeout_ms=0): + def __init__(self, hs, name, timeout_ms=0): self.pending_result_cache = {} # Requests that haven't finished yet. self.clock = hs.get_clock() self.timeout_sec = timeout_ms / 1000. + self._metrics = cache_metrics.register_cache( + "response_cache", + size_callback=lambda: self.size(), + cache_name=name, + ) + + def size(self): + return len(self.pending_result_cache) + def get(self, key): result = self.pending_result_cache.get(key) if result is not None: + self._metrics.inc_hits() return result.observe() else: + self._metrics.inc_misses() return None def set(self, key, deferred): -- cgit 1.5.1 From 121591568bdc6f9abde0c09cd80e4a3aa74ee109 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Apr 2018 09:56:00 +0100 Subject: Send events to ASes concurrently --- synapse/handlers/appservice.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 3dd3fa2a27..69eacff949 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -18,7 +18,9 @@ from twisted.internet import defer import synapse from synapse.api.constants import EventTypes from synapse.util.metrics import Measure -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn +from synapse.util.logcontext import ( + make_deferred_yieldable, preserve_fn, run_in_background, +) import logging @@ -84,11 +86,16 @@ class ApplicationServicesHandler(object): if not events: break + events_by_room = {} for event in events: + events_by_room.setdefault(event.room_id, []).append(event) + + @defer.inlineCallbacks + def handle_event(event): # Gather interested services services = yield self._get_services_for_event(event) if len(services) == 0: - continue # no services need notifying + return # no services need notifying # Do we know this user exists? If not, poke the user # query API for all services which match that user regex. @@ -108,6 +115,16 @@ class ApplicationServicesHandler(object): service, event ) + @defer.inlineCallbacks + def handle_room_events(events): + for event in events: + yield handle_event(event) + + yield make_deferred_yieldable(defer.gatherResults([ + run_in_background(handle_room_events, evs) + for evs in events_by_room.itervalues() + ], consumeErrors=True)) + events_processed_counter.inc_by(len(events)) yield self.store.set_appservice_last_pos(upper_bound) -- cgit 1.5.1 From 92e34615c5c9ed5df2b0072a2686d3e42239e420 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Apr 2018 11:07:51 +0100 Subject: Track where event stream processing have gotten up to --- synapse/federation/transaction_queue.py | 4 ++++ synapse/handlers/appservice.py | 4 ++++ synapse/metrics/__init__.py | 13 +++++++++++++ synapse/storage/events.py | 3 +++ 4 files changed, 24 insertions(+) (limited to 'synapse/handlers') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 5b0b798e57..d68f0d8950 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -239,6 +239,10 @@ class TransactionQueue(object): "events", next_token ) + synapse.metrics.event_processing_positions.set( + next_token, "federation_sender", + ) + finally: self._is_processing = False diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 3dd3fa2a27..2a7e31e711 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -111,6 +111,10 @@ class ApplicationServicesHandler(object): events_processed_counter.inc_by(len(events)) yield self.store.set_appservice_last_pos(upper_bound) + + synapse.metrics.event_processing_positions.set( + upper_bound, "appservice_sender", + ) finally: self.is_processing = False diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 2666f982a6..a6c0d7e1bf 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -151,6 +151,19 @@ reactor_metrics = get_metrics_for("python.twisted.reactor") tick_time = reactor_metrics.register_distribution("tick_time") pending_calls_metric = reactor_metrics.register_distribution("pending_calls") +synapse_metrics = get_metrics_for("synapse") + +# Used to track where various components have processed in the event stream, +# e.g. federation sending, appservice sending, etc. +event_processing_positions = synapse_metrics.register_gauge( + "event_processing_positions", labels=["name"], +) + +# Used to track the current max events stream position +event_persisted_position = synapse_metrics.register_gauge( + "event_persisted_position", +) + def runUntilCurrentTimer(func): diff --git a/synapse/storage/events.py b/synapse/storage/events.py index ece5e6c41f..da44b52fd6 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -444,6 +444,9 @@ class EventsStore(EventsWorkerStore): new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc_by(len(chunk)) + synapse.metrics.event_persisted_position.set( + chunk[-1][0].internal_metadata.stream_ordering, + ) for event, context in chunk: if context.app_service: origin_type = "local" -- cgit 1.5.1 From 4dae4a97ed0e0b2cc9b5493172670ec7353ded2e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Apr 2018 11:52:19 +0100 Subject: Track last processed event received_ts --- synapse/federation/transaction_queue.py | 11 +++++++++++ synapse/handlers/appservice.py | 10 ++++++++++ synapse/metrics/__init__.py | 13 +++++++++++++ synapse/storage/events_worker.py | 18 ++++++++++++++++++ 4 files changed, 52 insertions(+) (limited to 'synapse/handlers') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index d68f0d8950..1c466cbded 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -243,6 +243,17 @@ class TransactionQueue(object): next_token, "federation_sender", ) + if events: + now = self.clock.time_msec() + ts = yield self.store.get_received_ts(events[-1].event_id) + + synapse.metrics.event_processing_lag.set( + now - ts, "federation_sender", + ) + synapse.metrics.event_processing_last_ts.set( + ts, "federation_sender", + ) + finally: self._is_processing = False diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 2a7e31e711..fcc1246bee 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -115,6 +115,16 @@ class ApplicationServicesHandler(object): synapse.metrics.event_processing_positions.set( upper_bound, "appservice_sender", ) + + now = self.clock.time_msec() + ts = yield self.store.get_received_ts(events[-1].event_id) + + synapse.metrics.event_processing_lag.set( + now - ts, "appservice_sender", + ) + synapse.metrics.event_processing_last_ts.set( + ts, "appservice_sender", + ) finally: self.is_processing = False diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index a6c0d7e1bf..e3b831db67 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -164,6 +164,19 @@ event_persisted_position = synapse_metrics.register_gauge( "event_persisted_position", ) +# Used to track the received_ts of the last event processed by various +# components +event_processing_last_ts = synapse_metrics.register_gauge( + "event_processing_last_ts", labels=["name"], +) + +# Used to track the lag processing events. This is the time difference +# between the last processed event's received_ts and the time it was +# finished being processed. +event_processing_lag = synapse_metrics.register_gauge( + "event_processing_lag", labels=["name"], +) + def runUntilCurrentTimer(func): diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 2e23dd78ba..769eb51489 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -51,6 +51,24 @@ _EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event")) class EventsWorkerStore(SQLBaseStore): + def get_received_ts(self, event_id): + """Get received_ts (when it was persisted) for the event + + Args: + event_id (str) + + Returns: + Deferred[int|None]: Timstamp in milliseconds, or None for events + that were persisted before received_ts was implemented. + """ + return self._simple_select_one_onecol( + table="events", + keyvalues={ + "event_id": event_id, + }, + retcol="received_ts", + desc="get_received_ts", + ) @defer.inlineCallbacks def get_event(self, event_id, check_redacted=True, -- cgit 1.5.1 From f67e906e189cb528cab18cb5190be352dcc8914b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Apr 2018 11:18:19 +0100 Subject: Set all metrics at the same time --- synapse/federation/transaction_queue.py | 12 ++++++------ synapse/handlers/appservice.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'synapse/handlers') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 1c466cbded..963d938edd 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -233,16 +233,10 @@ class TransactionQueue(object): consumeErrors=True )) - events_processed_counter.inc_by(len(events)) - yield self.store.update_federation_out_pos( "events", next_token ) - synapse.metrics.event_processing_positions.set( - next_token, "federation_sender", - ) - if events: now = self.clock.time_msec() ts = yield self.store.get_received_ts(events[-1].event_id) @@ -254,6 +248,12 @@ class TransactionQueue(object): ts, "federation_sender", ) + events_processed_counter.inc_by(len(events)) + + synapse.metrics.event_processing_positions.set( + next_token, "federation_sender", + ) + finally: self._is_processing = False diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index fcc1246bee..ce0814bc25 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -108,16 +108,16 @@ class ApplicationServicesHandler(object): service, event ) - events_processed_counter.inc_by(len(events)) - yield self.store.set_appservice_last_pos(upper_bound) + now = self.clock.time_msec() + ts = yield self.store.get_received_ts(events[-1].event_id) + synapse.metrics.event_processing_positions.set( upper_bound, "appservice_sender", ) - now = self.clock.time_msec() - ts = yield self.store.get_received_ts(events[-1].event_id) + events_processed_counter.inc_by(len(events)) synapse.metrics.event_processing_lag.set( now - ts, "appservice_sender", -- cgit 1.5.1