From b3384232a031cc209fb5f0e085bc073a220448be Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 10 Apr 2018 23:14:47 +0100 Subject: Add metrics for ResponseCache --- synapse/federation/federation_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/federation/federation_server.py') diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index bea7fd0b71..e4ce037acf 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -65,7 +65,7 @@ class FederationServer(FederationBase): # We cache responses to state queries, as they take a while and often # come in waves. - self._state_resp_cache = ResponseCache(hs, timeout_ms=30000) + self._state_resp_cache = ResponseCache(hs, "state_resp", timeout_ms=30000) @defer.inlineCallbacks @log_function -- cgit 1.4.1 From b78395b7fe449d59a5c46c81a869f9f191cd934f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 12 Apr 2018 12:08:59 +0100 Subject: Refactor ResponseCache usage Adds a `.wrap` method to ResponseCache which wraps up the boilerplate of a (get, set) pair, and then use it throughout the codebase. This will be largely non-functional, but does include the following functional changes: * federation_server.on_context_state_request: drops use of _server_linearizer which looked redundant and could cause incorrect cache misses by yielding between the get and the set. * RoomListHandler.get_remote_public_room_list(): fixes logcontext leaks * the wrap function includes some logging. I'm hoping this won't be too noisy on production. --- synapse/appservice/api.py | 8 +---- synapse/federation/federation_server.py | 16 +++------ synapse/handlers/room_list.py | 38 ++++++++------------- synapse/handlers/sync.py | 16 ++++----- synapse/replication/http/send_event.py | 18 ++++------ synapse/util/caches/response_cache.py | 58 +++++++++++++++++++++++++++++++-- 6 files changed, 87 insertions(+), 67 deletions(-) (limited to 'synapse/federation/federation_server.py') diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 11e9c37c63..00efff1464 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -18,7 +18,6 @@ from synapse.api.constants import ThirdPartyEntityKind from synapse.api.errors import CodeMessageException from synapse.http.client import SimpleHttpClient from synapse.events.utils import serialize_event -from synapse.util.logcontext import preserve_fn, make_deferred_yieldable from synapse.util.caches.response_cache import ResponseCache from synapse.types import ThirdPartyInstanceID @@ -194,12 +193,7 @@ class ApplicationServiceApi(SimpleHttpClient): defer.returnValue(None) key = (service.id, protocol) - result = self.protocol_meta_cache.get(key) - if not result: - result = self.protocol_meta_cache.set( - key, preserve_fn(_get)() - ) - return make_deferred_yieldable(result) + return self.protocol_meta_cache.wrap(key, _get) @defer.inlineCallbacks def push_bulk(self, service, events, txn_id=None): diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index e4ce037acf..d1611f39a9 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -30,7 +30,6 @@ import synapse.metrics from synapse.types import get_domain_from_id from synapse.util import async from synapse.util.caches.response_cache import ResponseCache -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.logutils import log_function # when processing incoming transactions, we try to handle multiple rooms in @@ -212,16 +211,11 @@ class FederationServer(FederationBase): if not in_room: raise AuthError(403, "Host not in room.") - result = self._state_resp_cache.get((room_id, event_id)) - if not result: - with (yield self._server_linearizer.queue((origin, room_id))): - d = self._state_resp_cache.set( - (room_id, event_id), - preserve_fn(self._on_context_state_request_compute)(room_id, event_id) - ) - resp = yield make_deferred_yieldable(d) - else: - resp = yield make_deferred_yieldable(result) + resp = yield self._state_resp_cache.wrap( + (room_id, event_id), + self._on_context_state_request_compute, + room_id, event_id, + ) defer.returnValue((200, resp)) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 8028d793c2..add3f9b009 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -20,7 +20,6 @@ from ._base import BaseHandler from synapse.api.constants import ( EventTypes, JoinRules, ) -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.async import concurrently_execute from synapse.util.caches.descriptors import cachedInlineCallbacks from synapse.util.caches.response_cache import ResponseCache @@ -78,18 +77,11 @@ class RoomListHandler(BaseHandler): ) key = (limit, since_token, network_tuple) - result = self.response_cache.get(key) - if not result: - logger.info("No cached result, calculating one.") - result = self.response_cache.set( - key, - preserve_fn(self._get_public_room_list)( - limit, since_token, network_tuple=network_tuple - ) - ) - else: - logger.info("Using cached deferred result.") - return make_deferred_yieldable(result) + return self.response_cache.wrap( + key, + self._get_public_room_list, + limit, since_token, network_tuple=network_tuple, + ) @defer.inlineCallbacks def _get_public_room_list(self, limit=None, since_token=None, @@ -423,18 +415,14 @@ class RoomListHandler(BaseHandler): server_name, limit, since_token, include_all_networks, third_party_instance_id, ) - result = self.remote_response_cache.get(key) - if not result: - result = self.remote_response_cache.set( - key, - repl_layer.get_public_rooms( - server_name, limit=limit, since_token=since_token, - search_filter=search_filter, - include_all_networks=include_all_networks, - third_party_instance_id=third_party_instance_id, - ) - ) - return result + return self.remote_response_cache.wrap( + key, + repl_layer.get_public_rooms, + server_name, limit=limit, since_token=since_token, + search_filter=search_filter, + include_all_networks=include_all_networks, + third_party_instance_id=third_party_instance_id, + ) class RoomListNextBatch(namedtuple("RoomListNextBatch", ( diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 06d17ab20c..c6946831ab 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -15,7 +15,7 @@ from synapse.api.constants import Membership, EventTypes from synapse.util.async import concurrently_execute -from synapse.util.logcontext import LoggingContext, make_deferred_yieldable, preserve_fn +from synapse.util.logcontext import LoggingContext from synapse.util.metrics import Measure, measure_func from synapse.util.caches.response_cache import ResponseCache from synapse.push.clientformat import format_push_rules_for_user @@ -180,15 +180,11 @@ class SyncHandler(object): Returns: A Deferred SyncResult. """ - result = self.response_cache.get(sync_config.request_key) - if not result: - result = self.response_cache.set( - sync_config.request_key, - preserve_fn(self._wait_for_sync_for_user)( - sync_config, since_token, timeout, full_state - ) - ) - return make_deferred_yieldable(result) + return self.response_cache.wrap( + sync_config.request_key, + self._wait_for_sync_for_user, + sync_config, since_token, timeout, full_state, + ) @defer.inlineCallbacks def _wait_for_sync_for_user(self, sync_config, since_token, timeout, diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index c6a6551d24..a9baa2c1c3 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -23,7 +23,6 @@ from synapse.events.snapshot import EventContext from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.util.async import sleep from synapse.util.caches.response_cache import ResponseCache -from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.metrics import Measure from synapse.types import Requester, UserID @@ -118,17 +117,12 @@ class ReplicationSendEventRestServlet(RestServlet): self.response_cache = ResponseCache(hs, "send_event", timeout_ms=30 * 60 * 1000) def on_PUT(self, request, event_id): - result = self.response_cache.get(event_id) - if not result: - result = self.response_cache.set( - event_id, - self._handle_request(request) - ) - else: - logger.warn("Returning cached response") - return make_deferred_yieldable(result) - - @preserve_fn + return self.response_cache.wrap( + event_id, + self._handle_request, + request + ) + @defer.inlineCallbacks def _handle_request(self, request): with Measure(self.clock, "repl_send_event_parse"): diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py index 066fa423fd..0c2c347953 100644 --- a/synapse/util/caches/response_cache.py +++ b/synapse/util/caches/response_cache.py @@ -12,9 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging from synapse.util.async import ObservableDeferred from synapse.util.caches import metrics as cache_metrics +from synapse.util.logcontext import make_deferred_yieldable, run_in_background + +logger = logging.getLogger(__name__) class ResponseCache(object): @@ -31,6 +35,7 @@ class ResponseCache(object): self.clock = hs.get_clock() self.timeout_sec = timeout_ms / 1000. + self._name = name self._metrics = cache_metrics.register_cache( "response_cache", size_callback=lambda: self.size(), @@ -47,7 +52,7 @@ class ResponseCache(object): so you'll probably want to make_deferred_yieldable it. Args: - key (str): + key (hashable): Returns: twisted.internet.defer.Deferred|None: None if there is no entry @@ -76,7 +81,7 @@ class ResponseCache(object): to do it everywhere ResponseCache is used.) Args: - key (str): + key (hashable): deferred (twisted.internet.defer.Deferred): Returns: @@ -97,3 +102,52 @@ class ResponseCache(object): result.addBoth(remove) return result.observe() + + def wrap(self, key, callback, *args, **kwargs): + """Wrap together a *get* and *set* call, taking care of logcontexts + + First looks up the key in the cache, and if it is present makes it + follow the synapse logcontext rules and returns it. + + Otherwise, makes a call to *callback(*args, **kwargs)*, which should + follow the synapse logcontext rules, and adds the result to the cache. + + Example usage: + + @defer.inlineCallbacks + def handle_request(request): + # etc + defer.returnValue(result) + + result = yield response_cache.wrap( + key, + handle_request, + request, + ) + + Args: + key (hashable): key to get/set in the cache + + callback (callable): function to call if the key is not found in + the cache + + *args: positional parameters to pass to the callback, if it is used + + **kwargs: named paramters to pass to the callback, if it is used + + Returns: + twisted.internet.defer.Deferred: yieldable result + """ + result = self.get(key) + if not result: + logger.info("[%s]: no cached result for [%s], calculating new one", + self._name, key) + d = run_in_background(callback, *args, **kwargs) + result = self.set(key, d) + elif result.called: + logger.info("[%s]: using completed cached result for [%s]", + self._name, key) + else: + logger.info("[%s]: using incomplete cached result for [%s]", + self._name, key) + return make_deferred_yieldable(result) -- cgit 1.4.1 From 36c59ce66908a770b580c95bee0dd3aaf9906f0e Mon Sep 17 00:00:00 2001 From: Adrian Tschira Date: Sun, 15 Apr 2018 16:51:07 +0200 Subject: Use six.itervalues in some places There's more where that came from Signed-off-by: Adrian Tschira --- synapse/api/errors.py | 3 ++- synapse/app/synchrotron.py | 4 +++- synapse/federation/federation_server.py | 8 +++++--- synapse/federation/send_queue.py | 14 ++++++++------ 4 files changed, 18 insertions(+), 11 deletions(-) (limited to 'synapse/federation/federation_server.py') diff --git a/synapse/api/errors.py b/synapse/api/errors.py index bee59e80dd..a9ff5576f3 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -18,6 +18,7 @@ import logging import simplejson as json +from six import iteritems logger = logging.getLogger(__name__) @@ -297,7 +298,7 @@ def cs_error(msg, code=Codes.UNKNOWN, **kwargs): A dict representing the error response JSON. """ err = {"error": msg, "errcode": code} - for key, value in kwargs.iteritems(): + for key, value in iteritems(kwargs): err[key] = value return err diff --git a/synapse/app/synchrotron.py b/synapse/app/synchrotron.py index 508b66613d..2fddcd935a 100644 --- a/synapse/app/synchrotron.py +++ b/synapse/app/synchrotron.py @@ -58,6 +58,8 @@ from synapse.util.versionstring import get_version_string from twisted.internet import defer, reactor from twisted.web.resource import NoResource +from six import iteritems + logger = logging.getLogger("synapse.app.synchrotron") @@ -211,7 +213,7 @@ class SynchrotronPresence(object): def get_currently_syncing_users(self): return [ - user_id for user_id, count in self.user_to_num_current_syncs.iteritems() + user_id for user_id, count in iteritems(self.user_to_num_current_syncs) if count > 0 ] diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index e4ce037acf..2dadf7deab 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -33,6 +33,8 @@ from synapse.util.caches.response_cache import ResponseCache from synapse.util.logcontext import make_deferred_yieldable, preserve_fn from synapse.util.logutils import log_function +from six import iteritems + # when processing incoming transactions, we try to handle multiple rooms in # parallel, up to this limit. TRANSACTION_CONCURRENCY_LIMIT = 10 @@ -425,9 +427,9 @@ class FederationServer(FederationBase): "Claimed one-time-keys: %s", ",".join(( "%s for %s:%s" % (key_id, user_id, device_id) - for user_id, user_keys in json_result.iteritems() - for device_id, device_keys in user_keys.iteritems() - for key_id, _ in device_keys.iteritems() + for user_id, user_keys in iteritems(json_result) + for device_id, device_keys in iteritems(user_keys) + for key_id, _ in iteritems(device_keys) )), ) diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 93e5acebc1..0f0c687b37 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -40,6 +40,8 @@ from collections import namedtuple import logging +from six import itervalues, iteritems + logger = logging.getLogger(__name__) @@ -122,7 +124,7 @@ class FederationRemoteSendQueue(object): user_ids = set( user_id - for uids in self.presence_changed.itervalues() + for uids in itervalues(self.presence_changed) for user_id in uids ) @@ -276,7 +278,7 @@ class FederationRemoteSendQueue(object): # stream position. keyed_edus = {self.keyed_edu_changed[k]: k for k in keys[i:j]} - for ((destination, edu_key), pos) in keyed_edus.iteritems(): + for ((destination, edu_key), pos) in iteritems(keyed_edus): rows.append((pos, KeyedEduRow( key=edu_key, edu=self.keyed_edu[(destination, edu_key)], @@ -309,7 +311,7 @@ class FederationRemoteSendQueue(object): j = keys.bisect_right(to_token) + 1 device_messages = {self.device_messages[k]: k for k in keys[i:j]} - for (destination, pos) in device_messages.iteritems(): + for (destination, pos) in iteritems(device_messages): rows.append((pos, DeviceRow( destination=destination, ))) @@ -528,19 +530,19 @@ def process_rows_for_federation(transaction_queue, rows): if buff.presence: transaction_queue.send_presence(buff.presence) - for destination, edu_map in buff.keyed_edus.iteritems(): + for destination, edu_map in iteritems(buff.keyed_edus): for key, edu in edu_map.items(): transaction_queue.send_edu( edu.destination, edu.edu_type, edu.content, key=key, ) - for destination, edu_list in buff.edus.iteritems(): + for destination, edu_list in iteritems(buff.edus): for edu in edu_list: transaction_queue.send_edu( edu.destination, edu.edu_type, edu.content, key=None, ) - for destination, failure_list in buff.failures.iteritems(): + for destination, failure_list in iteritems(buff.failures): for failure in failure_list: transaction_queue.send_failure(destination, failure) -- cgit 1.4.1 From 1f4b498b7313b9bff5bfc3eeeed25c7659276e82 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 17 Apr 2018 23:41:36 +0100 Subject: Add some comments --- synapse/federation/federation_server.py | 25 +++++++++++++++++++++++-- synapse/handlers/federation.py | 15 ++++++++++++--- 2 files changed, 35 insertions(+), 5 deletions(-) (limited to 'synapse/federation/federation_server.py') diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index e4ce037acf..682721cec7 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -494,13 +495,33 @@ class FederationServer(FederationBase): def _handle_received_pdu(self, origin, pdu): """ Process a PDU received in a federation /send/ transaction. + If the event is invalid, then this method throws a FederationError. + (The error will then be logged and sent back to the sender (which + probably won't do anything with it), and other events in the + transaction will be processed as normal). + + It is likely that we'll then receive other events which refer to + this rejected_event in their prev_events, etc. When that happens, + we'll attempt to fetch the rejected event again, which will presumably + fail, so those second-generation events will also get rejected. + + Eventually, we get to the point where there are more than 10 events + between any new events and the original rejected event. Since we + only try to backfill 10 events deep on received pdu, we then accept the + new event, possibly introducing a discontinuity in the DAG, with new + forward extremities, so normal service is approximately returned, + until we try to backfill across the discontinuity. + Args: origin (str): server which sent the pdu pdu (FrozenEvent): received pdu Returns (Deferred): completes with None - Raises: FederationError if the signatures / hash do not match - """ + + Raises: FederationError if the signatures / hash do not match, or + if the event was unacceptable for any other reason (eg, too large, + too many prev_events, couldn't find the prev_events) + """ # check that it's actually being sent from a valid destination to # workaround bug #1753 in 0.18.5 and 0.18.6 if origin != get_domain_from_id(pdu.event_id): diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 310db3fb46..1296e22af6 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -542,9 +542,16 @@ class FederationHandler(BaseHandler): def backfill(self, dest, room_id, limit, extremities): """ Trigger a backfill request to `dest` for the given `room_id` - This will attempt to get more events from the remote. This may return - be successfull and still return no events if the other side has no new - events to offer. + This will attempt to get more events from the remote. If the other side + has no new events to offer, this will return an empty list. + + As the events are received, we check their signatures, and also do some + sanity-checking on them. If any of the backfilled events are invalid, + this method throws a SynapseError. + + TODO: make this more useful to distinguish failures of the remote + server from invalid events (there is probably no point in trying to + re-fetch invalid events from every other HS in the room.) """ if dest == self.server_name: raise SynapseError(400, "Can't backfill from self.") @@ -556,6 +563,8 @@ class FederationHandler(BaseHandler): extremities=extremities, ) + # do some sanity-checking of the received events, before we go and + # do state resolution across 1000 events. for ev in events: self._sanity_check_event(ev) -- cgit 1.4.1 From 0c280d4d99e93d2fd9120a7ec93c0997d372adb4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 20 Apr 2018 11:10:04 +0100 Subject: Reinstate linearizer for federation_server.on_context_state_request --- synapse/federation/federation_server.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'synapse/federation/federation_server.py') diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index d1611f39a9..12843fe179 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -211,11 +211,17 @@ class FederationServer(FederationBase): if not in_room: raise AuthError(403, "Host not in room.") - resp = yield self._state_resp_cache.wrap( - (room_id, event_id), - self._on_context_state_request_compute, - room_id, event_id, - ) + # we grab the linearizer to protect ourselves from servers which hammer + # us. In theory we might already have the response to this query + # in the cache so we could return it without waiting for the linearizer + # - but that's non-trivial to get right, and anyway somewhat defeats + # the point of the linearizer. + with (yield self._server_linearizer.queue((origin, room_id))): + resp = yield self._state_resp_cache.wrap( + (room_id, event_id), + self._on_context_state_request_compute, + room_id, event_id, + ) defer.returnValue((200, resp)) -- cgit 1.4.1