diff options
Diffstat (limited to 'synapse/federation')
-rw-r--r-- | synapse/federation/federation_client.py | 300 | ||||
-rw-r--r-- | synapse/federation/federation_server.py | 5 | ||||
-rw-r--r-- | synapse/federation/send_queue.py | 63 | ||||
-rw-r--r-- | synapse/federation/transaction_queue.py | 32 | ||||
-rw-r--r-- | synapse/federation/transport/server.py | 5 | ||||
-rw-r--r-- | synapse/federation/units.py | 1 |
6 files changed, 171 insertions, 235 deletions
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 62d7ed13cf..7550e11b6e 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -48,6 +48,13 @@ sent_queries_counter = Counter("synapse_federation_client_sent_queries", "", ["t PDU_RETRY_TIME_MS = 1 * 60 * 1000 +class InvalidResponseError(RuntimeError): + """Helper for _try_destination_list: indicates that the server returned a response + we couldn't parse + """ + pass + + class FederationClient(FederationBase): def __init__(self, hs): super(FederationClient, self).__init__(hs) @@ -458,6 +465,61 @@ class FederationClient(FederationBase): defer.returnValue(signed_auth) @defer.inlineCallbacks + def _try_destination_list(self, description, destinations, callback): + """Try an operation on a series of servers, until it succeeds + + Args: + description (unicode): description of the operation we're doing, for logging + + destinations (Iterable[unicode]): list of server_names to try + + callback (callable): Function to run for each server. Passed a single + argument: the server_name to try. May return a deferred. + + If the callback raises a CodeMessageException with a 300/400 code, + attempts to perform the operation stop immediately and the exception is + reraised. + + Otherwise, if the callback raises an Exception the error is logged and the + next server tried. Normally the stacktrace is logged but this is + suppressed if the exception is an InvalidResponseError. + + Returns: + The [Deferred] result of callback, if it succeeds + + Raises: + SynapseError if the chosen remote server returns a 300/400 code. + + RuntimeError if no servers were reachable. + """ + for destination in destinations: + if destination == self.server_name: + continue + + try: + res = yield callback(destination) + defer.returnValue(res) + except InvalidResponseError as e: + logger.warn( + "Failed to %s via %s: %s", + description, destination, e, + ) + except HttpResponseException as e: + if not 500 <= e.code < 600: + raise e.to_synapse_error() + else: + logger.warn( + "Failed to %s via %s: %i %s", + description, destination, e.code, e.message, + ) + except Exception: + logger.warn( + "Failed to %s via %s", + description, destination, exc_info=1, + ) + + raise RuntimeError("Failed to %s via any server", description) + def make_membership_event(self, destinations, room_id, user_id, membership, content={},): """ @@ -481,7 +543,7 @@ class FederationClient(FederationBase): Deferred: resolves to a tuple of (origin (str), event (object)) where origin is the remote homeserver which generated the event. - Fails with a ``CodeMessageException`` if the chosen remote server + Fails with a ``SynapseError`` if the chosen remote server returns a 300/400 code. Fails with a ``RuntimeError`` if no servers were reachable. @@ -492,50 +554,35 @@ class FederationClient(FederationBase): "make_membership_event called with membership='%s', must be one of %s" % (membership, ",".join(valid_memberships)) ) - for destination in destinations: - if destination == self.server_name: - continue - try: - ret = yield self.transport_layer.make_membership_event( - destination, room_id, user_id, membership - ) + @defer.inlineCallbacks + def send_request(destination): + ret = yield self.transport_layer.make_membership_event( + destination, room_id, user_id, membership + ) - pdu_dict = ret["event"] + pdu_dict = ret["event"] - logger.debug("Got response to make_%s: %s", membership, pdu_dict) + logger.debug("Got response to make_%s: %s", membership, pdu_dict) - pdu_dict["content"].update(content) + pdu_dict["content"].update(content) - # The protoevent received over the JSON wire may not have all - # the required fields. Lets just gloss over that because - # there's some we never care about - if "prev_state" not in pdu_dict: - pdu_dict["prev_state"] = [] + # The protoevent received over the JSON wire may not have all + # the required fields. Lets just gloss over that because + # there's some we never care about + if "prev_state" not in pdu_dict: + pdu_dict["prev_state"] = [] - ev = builder.EventBuilder(pdu_dict) + ev = builder.EventBuilder(pdu_dict) - defer.returnValue( - (destination, ev) - ) - break - except CodeMessageException as e: - if not 500 <= e.code < 600: - raise - else: - logger.warn( - "Failed to make_%s via %s: %s", - membership, destination, e.message - ) - except Exception as e: - logger.warn( - "Failed to make_%s via %s: %s", - membership, destination, e.message - ) + defer.returnValue( + (destination, ev) + ) - raise RuntimeError("Failed to send to any server.") + return self._try_destination_list( + "make_" + membership, destinations, send_request, + ) - @defer.inlineCallbacks def send_join(self, destinations, pdu): """Sends a join event to one of a list of homeservers. @@ -552,103 +599,91 @@ class FederationClient(FederationBase): giving the serer the event was sent to, ``state`` (?) and ``auth_chain``. - Fails with a ``CodeMessageException`` if the chosen remote server + Fails with a ``SynapseError`` if the chosen remote server returns a 300/400 code. Fails with a ``RuntimeError`` if no servers were reachable. """ - for destination in destinations: - if destination == self.server_name: - continue - - try: - time_now = self._clock.time_msec() - _, content = yield self.transport_layer.send_join( - destination=destination, - room_id=pdu.room_id, - event_id=pdu.event_id, - content=pdu.get_pdu_json(time_now), - ) + @defer.inlineCallbacks + def send_request(destination): + time_now = self._clock.time_msec() + _, content = yield self.transport_layer.send_join( + destination=destination, + room_id=pdu.room_id, + event_id=pdu.event_id, + content=pdu.get_pdu_json(time_now), + ) - logger.debug("Got content: %s", content) + logger.debug("Got content: %s", content) - state = [ - event_from_pdu_json(p, outlier=True) - for p in content.get("state", []) - ] + state = [ + event_from_pdu_json(p, outlier=True) + for p in content.get("state", []) + ] - auth_chain = [ - event_from_pdu_json(p, outlier=True) - for p in content.get("auth_chain", []) - ] + auth_chain = [ + event_from_pdu_json(p, outlier=True) + for p in content.get("auth_chain", []) + ] - pdus = { - p.event_id: p - for p in itertools.chain(state, auth_chain) - } + pdus = { + p.event_id: p + for p in itertools.chain(state, auth_chain) + } - valid_pdus = yield self._check_sigs_and_hash_and_fetch( - destination, list(pdus.values()), - outlier=True, - ) + valid_pdus = yield self._check_sigs_and_hash_and_fetch( + destination, list(pdus.values()), + outlier=True, + ) - valid_pdus_map = { - p.event_id: p - for p in valid_pdus - } - - # NB: We *need* to copy to ensure that we don't have multiple - # references being passed on, as that causes... issues. - signed_state = [ - copy.copy(valid_pdus_map[p.event_id]) - for p in state - if p.event_id in valid_pdus_map - ] + valid_pdus_map = { + p.event_id: p + for p in valid_pdus + } - signed_auth = [ - valid_pdus_map[p.event_id] - for p in auth_chain - if p.event_id in valid_pdus_map - ] + # NB: We *need* to copy to ensure that we don't have multiple + # references being passed on, as that causes... issues. + signed_state = [ + copy.copy(valid_pdus_map[p.event_id]) + for p in state + if p.event_id in valid_pdus_map + ] - # NB: We *need* to copy to ensure that we don't have multiple - # references being passed on, as that causes... issues. - for s in signed_state: - s.internal_metadata = copy.deepcopy(s.internal_metadata) + signed_auth = [ + valid_pdus_map[p.event_id] + for p in auth_chain + if p.event_id in valid_pdus_map + ] - auth_chain.sort(key=lambda e: e.depth) + # NB: We *need* to copy to ensure that we don't have multiple + # references being passed on, as that causes... issues. + for s in signed_state: + s.internal_metadata = copy.deepcopy(s.internal_metadata) - defer.returnValue({ - "state": signed_state, - "auth_chain": signed_auth, - "origin": destination, - }) - except CodeMessageException as e: - if not 500 <= e.code < 600: - raise - else: - logger.exception( - "Failed to send_join via %s: %s", - destination, e.message - ) - except Exception as e: - logger.exception( - "Failed to send_join via %s: %s", - destination, e.message - ) + auth_chain.sort(key=lambda e: e.depth) - raise RuntimeError("Failed to send to any server.") + defer.returnValue({ + "state": signed_state, + "auth_chain": signed_auth, + "origin": destination, + }) + return self._try_destination_list("send_join", destinations, send_request) @defer.inlineCallbacks def send_invite(self, destination, room_id, event_id, pdu): time_now = self._clock.time_msec() - code, content = yield self.transport_layer.send_invite( - destination=destination, - room_id=room_id, - event_id=event_id, - content=pdu.get_pdu_json(time_now), - ) + try: + code, content = yield self.transport_layer.send_invite( + destination=destination, + room_id=room_id, + event_id=event_id, + content=pdu.get_pdu_json(time_now), + ) + except HttpResponseException as e: + if e.code == 403: + raise e.to_synapse_error() + raise pdu_dict = content["event"] @@ -663,7 +698,6 @@ class FederationClient(FederationBase): defer.returnValue(pdu) - @defer.inlineCallbacks def send_leave(self, destinations, pdu): """Sends a leave event to one of a list of homeservers. @@ -680,35 +714,25 @@ class FederationClient(FederationBase): Return: Deferred: resolves to None. - Fails with a ``CodeMessageException`` if the chosen remote server - returns a non-200 code. + Fails with a ``SynapseError`` if the chosen remote server + returns a 300/400 code. Fails with a ``RuntimeError`` if no servers were reachable. """ - for destination in destinations: - if destination == self.server_name: - continue - - try: - time_now = self._clock.time_msec() - _, content = yield self.transport_layer.send_leave( - destination=destination, - room_id=pdu.room_id, - event_id=pdu.event_id, - content=pdu.get_pdu_json(time_now), - ) + @defer.inlineCallbacks + def send_request(destination): + time_now = self._clock.time_msec() + _, content = yield self.transport_layer.send_leave( + destination=destination, + room_id=pdu.room_id, + event_id=pdu.event_id, + content=pdu.get_pdu_json(time_now), + ) - logger.debug("Got content: %s", content) - defer.returnValue(None) - except CodeMessageException: - raise - except Exception as e: - logger.exception( - "Failed to send_leave via %s: %s", - destination, e.message - ) + logger.debug("Got content: %s", content) + defer.returnValue(None) - raise RuntimeError("Failed to send to any server.") + return self._try_destination_list("send_leave", destinations, send_request) def get_public_rooms(self, destination, limit=None, since_token=None, search_filter=None, include_all_networks=False, diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index e501251b6e..bf89d568af 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -207,10 +207,6 @@ class FederationServer(FederationBase): edu.content ) - pdu_failures = getattr(transaction, "pdu_failures", []) - for fail in pdu_failures: - logger.info("Got failure %r", fail) - response = { "pdus": pdu_results, } @@ -430,6 +426,7 @@ class FederationServer(FederationBase): ret = yield self.handler.on_query_auth( origin, event_id, + room_id, signed_auth, content.get("rejects", []), content.get("missing", []), diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 5157c3860d..0bb468385d 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -62,8 +62,6 @@ class FederationRemoteSendQueue(object): self.edus = SortedDict() # stream position -> Edu - self.failures = SortedDict() # stream position -> (destination, Failure) - self.device_messages = SortedDict() # stream position -> destination self.pos = 1 @@ -79,7 +77,7 @@ class FederationRemoteSendQueue(object): for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", - "edus", "failures", "device_messages", "pos_time", + "edus", "device_messages", "pos_time", ]: register(queue_name, getattr(self, queue_name)) @@ -149,12 +147,6 @@ class FederationRemoteSendQueue(object): for key in keys[:i]: del self.edus[key] - # Delete things out of failure map - keys = self.failures.keys() - i = self.failures.bisect_left(position_to_delete) - for key in keys[:i]: - del self.failures[key] - # Delete things out of device map keys = self.device_messages.keys() i = self.device_messages.bisect_left(position_to_delete) @@ -204,13 +196,6 @@ class FederationRemoteSendQueue(object): self.notifier.on_new_replication_data() - def send_failure(self, failure, destination): - """As per TransactionQueue""" - pos = self._next_pos() - - self.failures[pos] = (destination, str(failure)) - self.notifier.on_new_replication_data() - def send_device_messages(self, destination): """As per TransactionQueue""" pos = self._next_pos() @@ -285,17 +270,6 @@ class FederationRemoteSendQueue(object): for (pos, edu) in edus: rows.append((pos, EduRow(edu))) - # Fetch changed failures - i = self.failures.bisect_right(from_token) - j = self.failures.bisect_right(to_token) + 1 - failures = self.failures.items()[i:j] - - for (pos, (destination, failure)) in failures: - rows.append((pos, FailureRow( - destination=destination, - failure=failure, - ))) - # Fetch changed device messages i = self.device_messages.bisect_right(from_token) j = self.device_messages.bisect_right(to_token) + 1 @@ -417,34 +391,6 @@ class EduRow(BaseFederationRow, namedtuple("EduRow", ( buff.edus.setdefault(self.edu.destination, []).append(self.edu) -class FailureRow(BaseFederationRow, namedtuple("FailureRow", ( - "destination", # str - "failure", -))): - """Streams failures to a remote server. Failures are issued when there was - something wrong with a transaction the remote sent us, e.g. it included - an event that was invalid. - """ - - TypeId = "f" - - @staticmethod - def from_data(data): - return FailureRow( - destination=data["destination"], - failure=data["failure"], - ) - - def to_data(self): - return { - "destination": self.destination, - "failure": self.failure, - } - - def add_to_buffer(self, buff): - buff.failures.setdefault(self.destination, []).append(self.failure) - - class DeviceRow(BaseFederationRow, namedtuple("DeviceRow", ( "destination", # str ))): @@ -471,7 +417,6 @@ TypeToRow = { PresenceRow, KeyedEduRow, EduRow, - FailureRow, DeviceRow, ) } @@ -481,7 +426,6 @@ ParsedFederationStreamData = namedtuple("ParsedFederationStreamData", ( "presence", # list(UserPresenceState) "keyed_edus", # dict of destination -> { key -> Edu } "edus", # dict of destination -> [Edu] - "failures", # dict of destination -> [failures] "device_destinations", # set of destinations )) @@ -503,7 +447,6 @@ def process_rows_for_federation(transaction_queue, rows): presence=[], keyed_edus={}, edus={}, - failures={}, device_destinations=set(), ) @@ -532,9 +475,5 @@ def process_rows_for_federation(transaction_queue, rows): edu.destination, edu.edu_type, edu.content, key=None, ) - for destination, failure_list in iteritems(buff.failures): - for failure in failure_list: - transaction_queue.send_failure(destination, failure) - for destination in buff.device_destinations: transaction_queue.send_device_messages(destination) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 6996d6b695..78f9d40a3a 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -116,9 +116,6 @@ class TransactionQueue(object): ), ) - # destination -> list of tuple(failure, deferred) - self.pending_failures_by_dest = {} - # destination -> stream_id of last successfully sent to-device message. # NB: may be a long or an int. self.last_device_stream_id_by_dest = {} @@ -382,19 +379,6 @@ class TransactionQueue(object): self._attempt_new_transaction(destination) - def send_failure(self, failure, destination): - if destination == self.server_name or destination == "localhost": - return - - if not self.can_send_to(destination): - return - - self.pending_failures_by_dest.setdefault( - destination, [] - ).append(failure) - - self._attempt_new_transaction(destination) - def send_device_messages(self, destination): if destination == self.server_name or destination == "localhost": return @@ -469,7 +453,6 @@ class TransactionQueue(object): pending_pdus = self.pending_pdus_by_dest.pop(destination, []) pending_edus = self.pending_edus_by_dest.pop(destination, []) pending_presence = self.pending_presence_by_dest.pop(destination, {}) - pending_failures = self.pending_failures_by_dest.pop(destination, []) pending_edus.extend( self.pending_edus_keyed_by_dest.pop(destination, {}).values() @@ -497,7 +480,7 @@ class TransactionQueue(object): logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d", destination, len(pending_pdus)) - if not pending_pdus and not pending_edus and not pending_failures: + if not pending_pdus and not pending_edus: logger.debug("TX [%s] Nothing to send", destination) self.last_device_stream_id_by_dest[destination] = ( device_stream_id @@ -507,7 +490,7 @@ class TransactionQueue(object): # END CRITICAL SECTION success = yield self._send_new_transaction( - destination, pending_pdus, pending_edus, pending_failures, + destination, pending_pdus, pending_edus, ) if success: sent_transactions_counter.inc() @@ -584,14 +567,12 @@ class TransactionQueue(object): @measure_func("_send_new_transaction") @defer.inlineCallbacks - def _send_new_transaction(self, destination, pending_pdus, pending_edus, - pending_failures): + def _send_new_transaction(self, destination, pending_pdus, pending_edus): # Sort based on the order field pending_pdus.sort(key=lambda t: t[1]) pdus = [x[0] for x in pending_pdus] edus = pending_edus - failures = [x.get_dict() for x in pending_failures] success = True @@ -601,11 +582,10 @@ class TransactionQueue(object): logger.debug( "TX [%s] {%s} Attempting new transaction" - " (pdus: %d, edus: %d, failures: %d)", + " (pdus: %d, edus: %d)", destination, txn_id, len(pdus), len(edus), - len(failures) ) logger.debug("TX [%s] Persisting transaction...", destination) @@ -617,7 +597,6 @@ class TransactionQueue(object): destination=destination, pdus=pdus, edus=edus, - pdu_failures=failures, ) self._next_txn_id += 1 @@ -627,12 +606,11 @@ class TransactionQueue(object): logger.debug("TX [%s] Persisted transaction", destination) logger.info( "TX [%s] {%s} Sending transaction [%s]," - " (PDUs: %d, EDUs: %d, failures: %d)", + " (PDUs: %d, EDUs: %d)", destination, txn_id, transaction.transaction_id, len(pdus), len(edus), - len(failures), ) # Actually send the transaction diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 8574898f0c..eae5f2b427 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -165,7 +165,7 @@ def _parse_auth_header(header_bytes): param_dict = dict(kv.split("=") for kv in params) def strip_quotes(value): - if value.startswith(b"\""): + if value.startswith("\""): return value[1:-1] else: return value @@ -283,11 +283,10 @@ class FederationSendServlet(BaseFederationServlet): ) logger.info( - "Received txn %s from %s. (PDUs: %d, EDUs: %d, failures: %d)", + "Received txn %s from %s. (PDUs: %d, EDUs: %d)", transaction_id, origin, len(transaction_data.get("pdus", [])), len(transaction_data.get("edus", [])), - len(transaction_data.get("failures", [])), ) # We should ideally be getting this from the security layer. diff --git a/synapse/federation/units.py b/synapse/federation/units.py index bb1b3b13f7..c5ab14314e 100644 --- a/synapse/federation/units.py +++ b/synapse/federation/units.py @@ -73,7 +73,6 @@ class Transaction(JsonEncodedObject): "previous_ids", "pdus", "edus", - "pdu_failures", ] internal_keys = [ |