summary refs log tree commit diff
path: root/synapse/federation
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2018-08-03 09:25:15 +0100
committerErik Johnston <erik@matrix.org>2018-08-03 09:25:15 +0100
commitcb298ff623c1ad375084f7687b7f6e546c4c1c1f (patch)
tree81854fc4ae0a45bed487a58f2c40974b683adb92 /synapse/federation
parentNewsfile (diff)
parentMerge pull request #3645 from matrix-org/michaelkaye/mention_newsfragment (diff)
downloadsynapse-cb298ff623c1ad375084f7687b7f6e546c4c1c1f.tar.xz
Merge branch 'develop' of github.com:matrix-org/synapse into erikj/refactor_repl_servlet
Diffstat (limited to 'synapse/federation')
-rw-r--r--synapse/federation/federation_client.py300
-rw-r--r--synapse/federation/federation_server.py5
-rw-r--r--synapse/federation/send_queue.py63
-rw-r--r--synapse/federation/transaction_queue.py32
-rw-r--r--synapse/federation/transport/server.py5
-rw-r--r--synapse/federation/units.py1
6 files changed, 171 insertions, 235 deletions
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 62d7ed13cf..7550e11b6e 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -48,6 +48,13 @@ sent_queries_counter = Counter("synapse_federation_client_sent_queries", "", ["t
 PDU_RETRY_TIME_MS = 1 * 60 * 1000
 
 
+class InvalidResponseError(RuntimeError):
+    """Helper for _try_destination_list: indicates that the server returned a response
+    we couldn't parse
+    """
+    pass
+
+
 class FederationClient(FederationBase):
     def __init__(self, hs):
         super(FederationClient, self).__init__(hs)
@@ -458,6 +465,61 @@ class FederationClient(FederationBase):
         defer.returnValue(signed_auth)
 
     @defer.inlineCallbacks
+    def _try_destination_list(self, description, destinations, callback):
+        """Try an operation on a series of servers, until it succeeds
+
+        Args:
+            description (unicode): description of the operation we're doing, for logging
+
+            destinations (Iterable[unicode]): list of server_names to try
+
+            callback (callable):  Function to run for each server. Passed a single
+                argument: the server_name to try. May return a deferred.
+
+                If the callback raises a CodeMessageException with a 300/400 code,
+                attempts to perform the operation stop immediately and the exception is
+                reraised.
+
+                Otherwise, if the callback raises an Exception the error is logged and the
+                next server tried. Normally the stacktrace is logged but this is
+                suppressed if the exception is an InvalidResponseError.
+
+        Returns:
+            The [Deferred] result of callback, if it succeeds
+
+        Raises:
+            SynapseError if the chosen remote server returns a 300/400 code.
+
+            RuntimeError if no servers were reachable.
+        """
+        for destination in destinations:
+            if destination == self.server_name:
+                continue
+
+            try:
+                res = yield callback(destination)
+                defer.returnValue(res)
+            except InvalidResponseError as e:
+                logger.warn(
+                    "Failed to %s via %s: %s",
+                    description, destination, e,
+                )
+            except HttpResponseException as e:
+                if not 500 <= e.code < 600:
+                    raise e.to_synapse_error()
+                else:
+                    logger.warn(
+                        "Failed to %s via %s: %i %s",
+                        description, destination, e.code, e.message,
+                    )
+            except Exception:
+                logger.warn(
+                    "Failed to %s via %s",
+                    description, destination, exc_info=1,
+                )
+
+        raise RuntimeError("Failed to %s via any server", description)
+
     def make_membership_event(self, destinations, room_id, user_id, membership,
                               content={},):
         """
@@ -481,7 +543,7 @@ class FederationClient(FederationBase):
             Deferred: resolves to a tuple of (origin (str), event (object))
             where origin is the remote homeserver which generated the event.
 
-            Fails with a ``CodeMessageException`` if the chosen remote server
+            Fails with a ``SynapseError`` if the chosen remote server
             returns a 300/400 code.
 
             Fails with a ``RuntimeError`` if no servers were reachable.
@@ -492,50 +554,35 @@ class FederationClient(FederationBase):
                 "make_membership_event called with membership='%s', must be one of %s" %
                 (membership, ",".join(valid_memberships))
             )
-        for destination in destinations:
-            if destination == self.server_name:
-                continue
 
-            try:
-                ret = yield self.transport_layer.make_membership_event(
-                    destination, room_id, user_id, membership
-                )
+        @defer.inlineCallbacks
+        def send_request(destination):
+            ret = yield self.transport_layer.make_membership_event(
+                destination, room_id, user_id, membership
+            )
 
-                pdu_dict = ret["event"]
+            pdu_dict = ret["event"]
 
-                logger.debug("Got response to make_%s: %s", membership, pdu_dict)
+            logger.debug("Got response to make_%s: %s", membership, pdu_dict)
 
-                pdu_dict["content"].update(content)
+            pdu_dict["content"].update(content)
 
-                # The protoevent received over the JSON wire may not have all
-                # the required fields. Lets just gloss over that because
-                # there's some we never care about
-                if "prev_state" not in pdu_dict:
-                    pdu_dict["prev_state"] = []
+            # The protoevent received over the JSON wire may not have all
+            # the required fields. Lets just gloss over that because
+            # there's some we never care about
+            if "prev_state" not in pdu_dict:
+                pdu_dict["prev_state"] = []
 
-                ev = builder.EventBuilder(pdu_dict)
+            ev = builder.EventBuilder(pdu_dict)
 
-                defer.returnValue(
-                    (destination, ev)
-                )
-                break
-            except CodeMessageException as e:
-                if not 500 <= e.code < 600:
-                    raise
-                else:
-                    logger.warn(
-                        "Failed to make_%s via %s: %s",
-                        membership, destination, e.message
-                    )
-            except Exception as e:
-                logger.warn(
-                    "Failed to make_%s via %s: %s",
-                    membership, destination, e.message
-                )
+            defer.returnValue(
+                (destination, ev)
+            )
 
-        raise RuntimeError("Failed to send to any server.")
+        return self._try_destination_list(
+            "make_" + membership, destinations, send_request,
+        )
 
-    @defer.inlineCallbacks
     def send_join(self, destinations, pdu):
         """Sends a join event to one of a list of homeservers.
 
@@ -552,103 +599,91 @@ class FederationClient(FederationBase):
             giving the serer the event was sent to, ``state`` (?) and
             ``auth_chain``.
 
-            Fails with a ``CodeMessageException`` if the chosen remote server
+            Fails with a ``SynapseError`` if the chosen remote server
             returns a 300/400 code.
 
             Fails with a ``RuntimeError`` if no servers were reachable.
         """
 
-        for destination in destinations:
-            if destination == self.server_name:
-                continue
-
-            try:
-                time_now = self._clock.time_msec()
-                _, content = yield self.transport_layer.send_join(
-                    destination=destination,
-                    room_id=pdu.room_id,
-                    event_id=pdu.event_id,
-                    content=pdu.get_pdu_json(time_now),
-                )
+        @defer.inlineCallbacks
+        def send_request(destination):
+            time_now = self._clock.time_msec()
+            _, content = yield self.transport_layer.send_join(
+                destination=destination,
+                room_id=pdu.room_id,
+                event_id=pdu.event_id,
+                content=pdu.get_pdu_json(time_now),
+            )
 
-                logger.debug("Got content: %s", content)
+            logger.debug("Got content: %s", content)
 
-                state = [
-                    event_from_pdu_json(p, outlier=True)
-                    for p in content.get("state", [])
-                ]
+            state = [
+                event_from_pdu_json(p, outlier=True)
+                for p in content.get("state", [])
+            ]
 
-                auth_chain = [
-                    event_from_pdu_json(p, outlier=True)
-                    for p in content.get("auth_chain", [])
-                ]
+            auth_chain = [
+                event_from_pdu_json(p, outlier=True)
+                for p in content.get("auth_chain", [])
+            ]
 
-                pdus = {
-                    p.event_id: p
-                    for p in itertools.chain(state, auth_chain)
-                }
+            pdus = {
+                p.event_id: p
+                for p in itertools.chain(state, auth_chain)
+            }
 
-                valid_pdus = yield self._check_sigs_and_hash_and_fetch(
-                    destination, list(pdus.values()),
-                    outlier=True,
-                )
+            valid_pdus = yield self._check_sigs_and_hash_and_fetch(
+                destination, list(pdus.values()),
+                outlier=True,
+            )
 
-                valid_pdus_map = {
-                    p.event_id: p
-                    for p in valid_pdus
-                }
-
-                # NB: We *need* to copy to ensure that we don't have multiple
-                # references being passed on, as that causes... issues.
-                signed_state = [
-                    copy.copy(valid_pdus_map[p.event_id])
-                    for p in state
-                    if p.event_id in valid_pdus_map
-                ]
+            valid_pdus_map = {
+                p.event_id: p
+                for p in valid_pdus
+            }
 
-                signed_auth = [
-                    valid_pdus_map[p.event_id]
-                    for p in auth_chain
-                    if p.event_id in valid_pdus_map
-                ]
+            # NB: We *need* to copy to ensure that we don't have multiple
+            # references being passed on, as that causes... issues.
+            signed_state = [
+                copy.copy(valid_pdus_map[p.event_id])
+                for p in state
+                if p.event_id in valid_pdus_map
+            ]
 
-                # NB: We *need* to copy to ensure that we don't have multiple
-                # references being passed on, as that causes... issues.
-                for s in signed_state:
-                    s.internal_metadata = copy.deepcopy(s.internal_metadata)
+            signed_auth = [
+                valid_pdus_map[p.event_id]
+                for p in auth_chain
+                if p.event_id in valid_pdus_map
+            ]
 
-                auth_chain.sort(key=lambda e: e.depth)
+            # NB: We *need* to copy to ensure that we don't have multiple
+            # references being passed on, as that causes... issues.
+            for s in signed_state:
+                s.internal_metadata = copy.deepcopy(s.internal_metadata)
 
-                defer.returnValue({
-                    "state": signed_state,
-                    "auth_chain": signed_auth,
-                    "origin": destination,
-                })
-            except CodeMessageException as e:
-                if not 500 <= e.code < 600:
-                    raise
-                else:
-                    logger.exception(
-                        "Failed to send_join via %s: %s",
-                        destination, e.message
-                    )
-            except Exception as e:
-                logger.exception(
-                    "Failed to send_join via %s: %s",
-                    destination, e.message
-                )
+            auth_chain.sort(key=lambda e: e.depth)
 
-        raise RuntimeError("Failed to send to any server.")
+            defer.returnValue({
+                "state": signed_state,
+                "auth_chain": signed_auth,
+                "origin": destination,
+            })
+        return self._try_destination_list("send_join", destinations, send_request)
 
     @defer.inlineCallbacks
     def send_invite(self, destination, room_id, event_id, pdu):
         time_now = self._clock.time_msec()
-        code, content = yield self.transport_layer.send_invite(
-            destination=destination,
-            room_id=room_id,
-            event_id=event_id,
-            content=pdu.get_pdu_json(time_now),
-        )
+        try:
+            code, content = yield self.transport_layer.send_invite(
+                destination=destination,
+                room_id=room_id,
+                event_id=event_id,
+                content=pdu.get_pdu_json(time_now),
+            )
+        except HttpResponseException as e:
+            if e.code == 403:
+                raise e.to_synapse_error()
+            raise
 
         pdu_dict = content["event"]
 
@@ -663,7 +698,6 @@ class FederationClient(FederationBase):
 
         defer.returnValue(pdu)
 
-    @defer.inlineCallbacks
     def send_leave(self, destinations, pdu):
         """Sends a leave event to one of a list of homeservers.
 
@@ -680,35 +714,25 @@ class FederationClient(FederationBase):
         Return:
             Deferred: resolves to None.
 
-            Fails with a ``CodeMessageException`` if the chosen remote server
-            returns a non-200 code.
+            Fails with a ``SynapseError`` if the chosen remote server
+            returns a 300/400 code.
 
             Fails with a ``RuntimeError`` if no servers were reachable.
         """
-        for destination in destinations:
-            if destination == self.server_name:
-                continue
-
-            try:
-                time_now = self._clock.time_msec()
-                _, content = yield self.transport_layer.send_leave(
-                    destination=destination,
-                    room_id=pdu.room_id,
-                    event_id=pdu.event_id,
-                    content=pdu.get_pdu_json(time_now),
-                )
+        @defer.inlineCallbacks
+        def send_request(destination):
+            time_now = self._clock.time_msec()
+            _, content = yield self.transport_layer.send_leave(
+                destination=destination,
+                room_id=pdu.room_id,
+                event_id=pdu.event_id,
+                content=pdu.get_pdu_json(time_now),
+            )
 
-                logger.debug("Got content: %s", content)
-                defer.returnValue(None)
-            except CodeMessageException:
-                raise
-            except Exception as e:
-                logger.exception(
-                    "Failed to send_leave via %s: %s",
-                    destination, e.message
-                )
+            logger.debug("Got content: %s", content)
+            defer.returnValue(None)
 
-        raise RuntimeError("Failed to send to any server.")
+        return self._try_destination_list("send_leave", destinations, send_request)
 
     def get_public_rooms(self, destination, limit=None, since_token=None,
                          search_filter=None, include_all_networks=False,
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index e501251b6e..bf89d568af 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -207,10 +207,6 @@ class FederationServer(FederationBase):
                     edu.content
                 )
 
-        pdu_failures = getattr(transaction, "pdu_failures", [])
-        for fail in pdu_failures:
-            logger.info("Got failure %r", fail)
-
         response = {
             "pdus": pdu_results,
         }
@@ -430,6 +426,7 @@ class FederationServer(FederationBase):
             ret = yield self.handler.on_query_auth(
                 origin,
                 event_id,
+                room_id,
                 signed_auth,
                 content.get("rejects", []),
                 content.get("missing", []),
diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py
index 5157c3860d..0bb468385d 100644
--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
@@ -62,8 +62,6 @@ class FederationRemoteSendQueue(object):
 
         self.edus = SortedDict()  # stream position -> Edu
 
-        self.failures = SortedDict()  # stream position -> (destination, Failure)
-
         self.device_messages = SortedDict()  # stream position -> destination
 
         self.pos = 1
@@ -79,7 +77,7 @@ class FederationRemoteSendQueue(object):
 
         for queue_name in [
             "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed",
-            "edus", "failures", "device_messages", "pos_time",
+            "edus", "device_messages", "pos_time",
         ]:
             register(queue_name, getattr(self, queue_name))
 
@@ -149,12 +147,6 @@ class FederationRemoteSendQueue(object):
             for key in keys[:i]:
                 del self.edus[key]
 
-            # Delete things out of failure map
-            keys = self.failures.keys()
-            i = self.failures.bisect_left(position_to_delete)
-            for key in keys[:i]:
-                del self.failures[key]
-
             # Delete things out of device map
             keys = self.device_messages.keys()
             i = self.device_messages.bisect_left(position_to_delete)
@@ -204,13 +196,6 @@ class FederationRemoteSendQueue(object):
 
         self.notifier.on_new_replication_data()
 
-    def send_failure(self, failure, destination):
-        """As per TransactionQueue"""
-        pos = self._next_pos()
-
-        self.failures[pos] = (destination, str(failure))
-        self.notifier.on_new_replication_data()
-
     def send_device_messages(self, destination):
         """As per TransactionQueue"""
         pos = self._next_pos()
@@ -285,17 +270,6 @@ class FederationRemoteSendQueue(object):
         for (pos, edu) in edus:
             rows.append((pos, EduRow(edu)))
 
-        # Fetch changed failures
-        i = self.failures.bisect_right(from_token)
-        j = self.failures.bisect_right(to_token) + 1
-        failures = self.failures.items()[i:j]
-
-        for (pos, (destination, failure)) in failures:
-            rows.append((pos, FailureRow(
-                destination=destination,
-                failure=failure,
-            )))
-
         # Fetch changed device messages
         i = self.device_messages.bisect_right(from_token)
         j = self.device_messages.bisect_right(to_token) + 1
@@ -417,34 +391,6 @@ class EduRow(BaseFederationRow, namedtuple("EduRow", (
         buff.edus.setdefault(self.edu.destination, []).append(self.edu)
 
 
-class FailureRow(BaseFederationRow, namedtuple("FailureRow", (
-    "destination",  # str
-    "failure",
-))):
-    """Streams failures to a remote server. Failures are issued when there was
-    something wrong with a transaction the remote sent us, e.g. it included
-    an event that was invalid.
-    """
-
-    TypeId = "f"
-
-    @staticmethod
-    def from_data(data):
-        return FailureRow(
-            destination=data["destination"],
-            failure=data["failure"],
-        )
-
-    def to_data(self):
-        return {
-            "destination": self.destination,
-            "failure": self.failure,
-        }
-
-    def add_to_buffer(self, buff):
-        buff.failures.setdefault(self.destination, []).append(self.failure)
-
-
 class DeviceRow(BaseFederationRow, namedtuple("DeviceRow", (
     "destination",  # str
 ))):
@@ -471,7 +417,6 @@ TypeToRow = {
         PresenceRow,
         KeyedEduRow,
         EduRow,
-        FailureRow,
         DeviceRow,
     )
 }
@@ -481,7 +426,6 @@ ParsedFederationStreamData = namedtuple("ParsedFederationStreamData", (
     "presence",  # list(UserPresenceState)
     "keyed_edus",  # dict of destination -> { key -> Edu }
     "edus",  # dict of destination -> [Edu]
-    "failures",  # dict of destination -> [failures]
     "device_destinations",  # set of destinations
 ))
 
@@ -503,7 +447,6 @@ def process_rows_for_federation(transaction_queue, rows):
         presence=[],
         keyed_edus={},
         edus={},
-        failures={},
         device_destinations=set(),
     )
 
@@ -532,9 +475,5 @@ def process_rows_for_federation(transaction_queue, rows):
                 edu.destination, edu.edu_type, edu.content, key=None,
             )
 
-    for destination, failure_list in iteritems(buff.failures):
-        for failure in failure_list:
-            transaction_queue.send_failure(destination, failure)
-
     for destination in buff.device_destinations:
         transaction_queue.send_device_messages(destination)
diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index 6996d6b695..78f9d40a3a 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -116,9 +116,6 @@ class TransactionQueue(object):
             ),
         )
 
-        # destination -> list of tuple(failure, deferred)
-        self.pending_failures_by_dest = {}
-
         # destination -> stream_id of last successfully sent to-device message.
         # NB: may be a long or an int.
         self.last_device_stream_id_by_dest = {}
@@ -382,19 +379,6 @@ class TransactionQueue(object):
 
         self._attempt_new_transaction(destination)
 
-    def send_failure(self, failure, destination):
-        if destination == self.server_name or destination == "localhost":
-            return
-
-        if not self.can_send_to(destination):
-            return
-
-        self.pending_failures_by_dest.setdefault(
-            destination, []
-        ).append(failure)
-
-        self._attempt_new_transaction(destination)
-
     def send_device_messages(self, destination):
         if destination == self.server_name or destination == "localhost":
             return
@@ -469,7 +453,6 @@ class TransactionQueue(object):
                 pending_pdus = self.pending_pdus_by_dest.pop(destination, [])
                 pending_edus = self.pending_edus_by_dest.pop(destination, [])
                 pending_presence = self.pending_presence_by_dest.pop(destination, {})
-                pending_failures = self.pending_failures_by_dest.pop(destination, [])
 
                 pending_edus.extend(
                     self.pending_edus_keyed_by_dest.pop(destination, {}).values()
@@ -497,7 +480,7 @@ class TransactionQueue(object):
                     logger.debug("TX [%s] len(pending_pdus_by_dest[dest]) = %d",
                                  destination, len(pending_pdus))
 
-                if not pending_pdus and not pending_edus and not pending_failures:
+                if not pending_pdus and not pending_edus:
                     logger.debug("TX [%s] Nothing to send", destination)
                     self.last_device_stream_id_by_dest[destination] = (
                         device_stream_id
@@ -507,7 +490,7 @@ class TransactionQueue(object):
                 # END CRITICAL SECTION
 
                 success = yield self._send_new_transaction(
-                    destination, pending_pdus, pending_edus, pending_failures,
+                    destination, pending_pdus, pending_edus,
                 )
                 if success:
                     sent_transactions_counter.inc()
@@ -584,14 +567,12 @@ class TransactionQueue(object):
 
     @measure_func("_send_new_transaction")
     @defer.inlineCallbacks
-    def _send_new_transaction(self, destination, pending_pdus, pending_edus,
-                              pending_failures):
+    def _send_new_transaction(self, destination, pending_pdus, pending_edus):
 
         # Sort based on the order field
         pending_pdus.sort(key=lambda t: t[1])
         pdus = [x[0] for x in pending_pdus]
         edus = pending_edus
-        failures = [x.get_dict() for x in pending_failures]
 
         success = True
 
@@ -601,11 +582,10 @@ class TransactionQueue(object):
 
         logger.debug(
             "TX [%s] {%s} Attempting new transaction"
-            " (pdus: %d, edus: %d, failures: %d)",
+            " (pdus: %d, edus: %d)",
             destination, txn_id,
             len(pdus),
             len(edus),
-            len(failures)
         )
 
         logger.debug("TX [%s] Persisting transaction...", destination)
@@ -617,7 +597,6 @@ class TransactionQueue(object):
             destination=destination,
             pdus=pdus,
             edus=edus,
-            pdu_failures=failures,
         )
 
         self._next_txn_id += 1
@@ -627,12 +606,11 @@ class TransactionQueue(object):
         logger.debug("TX [%s] Persisted transaction", destination)
         logger.info(
             "TX [%s] {%s} Sending transaction [%s],"
-            " (PDUs: %d, EDUs: %d, failures: %d)",
+            " (PDUs: %d, EDUs: %d)",
             destination, txn_id,
             transaction.transaction_id,
             len(pdus),
             len(edus),
-            len(failures),
         )
 
         # Actually send the transaction
diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py
index 8574898f0c..eae5f2b427 100644
--- a/synapse/federation/transport/server.py
+++ b/synapse/federation/transport/server.py
@@ -165,7 +165,7 @@ def _parse_auth_header(header_bytes):
         param_dict = dict(kv.split("=") for kv in params)
 
         def strip_quotes(value):
-            if value.startswith(b"\""):
+            if value.startswith("\""):
                 return value[1:-1]
             else:
                 return value
@@ -283,11 +283,10 @@ class FederationSendServlet(BaseFederationServlet):
             )
 
             logger.info(
-                "Received txn %s from %s. (PDUs: %d, EDUs: %d, failures: %d)",
+                "Received txn %s from %s. (PDUs: %d, EDUs: %d)",
                 transaction_id, origin,
                 len(transaction_data.get("pdus", [])),
                 len(transaction_data.get("edus", [])),
-                len(transaction_data.get("failures", [])),
             )
 
             # We should ideally be getting this from the security layer.
diff --git a/synapse/federation/units.py b/synapse/federation/units.py
index bb1b3b13f7..c5ab14314e 100644
--- a/synapse/federation/units.py
+++ b/synapse/federation/units.py
@@ -73,7 +73,6 @@ class Transaction(JsonEncodedObject):
         "previous_ids",
         "pdus",
         "edus",
-        "pdu_failures",
     ]
 
     internal_keys = [