summary refs log tree commit diff
path: root/synapse/federation
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/federation')
-rw-r--r--synapse/federation/federation_client.py42
-rw-r--r--synapse/federation/federation_server.py21
-rw-r--r--synapse/federation/transaction_queue.py22
3 files changed, 78 insertions, 7 deletions
diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py
index 70c9a6f46b..6042e366bd 100644
--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
@@ -19,7 +19,8 @@ from twisted.internet import defer
 from .federation_base import FederationBase
 from .units import Edu
 
-from synapse.api.errors import CodeMessageException
+from synapse.api.errors import CodeMessageException, SynapseError
+from synapse.util.expiringcache import ExpiringCache
 from synapse.util.logutils import log_function
 from synapse.events import FrozenEvent
 
@@ -30,6 +31,20 @@ logger = logging.getLogger(__name__)
 
 
 class FederationClient(FederationBase):
+    def __init__(self):
+        self._get_pdu_cache = None
+
+    def start_get_pdu_cache(self):
+        self._get_pdu_cache = ExpiringCache(
+            cache_name="get_pdu_cache",
+            clock=self._clock,
+            max_len=1000,
+            expiry_ms=120*1000,
+            reset_expiry_on_get=False,
+        )
+
+        self._get_pdu_cache.start()
+
     @log_function
     def send_pdu(self, pdu, destinations):
         """Informs the replication layer about a new PDU generated within the
@@ -160,6 +175,11 @@ class FederationClient(FederationBase):
 
         # TODO: Rate limit the number of times we try and get the same event.
 
+        if self._get_pdu_cache:
+            e = self._get_pdu_cache.get(event_id)
+            if e:
+                defer.returnValue(e)
+
         pdu = None
         for destination in destinations:
             try:
@@ -181,8 +201,21 @@ class FederationClient(FederationBase):
                     pdu = yield self._check_sigs_and_hash(pdu)
 
                     break
-            except CodeMessageException:
-                raise
+            except SynapseError:
+                logger.info(
+                    "Failed to get PDU %s from %s because %s",
+                    event_id, destination, e,
+                )
+                continue
+            except CodeMessageException as e:
+                if 400 <= e.code < 500:
+                    raise
+
+                logger.info(
+                    "Failed to get PDU %s from %s because %s",
+                    event_id, destination, e,
+                )
+                continue
             except Exception as e:
                 logger.info(
                     "Failed to get PDU %s from %s because %s",
@@ -190,6 +223,9 @@ class FederationClient(FederationBase):
                 )
                 continue
 
+        if self._get_pdu_cache is not None:
+            self._get_pdu_cache[event_id] = pdu
+
         defer.returnValue(pdu)
 
     @defer.inlineCallbacks
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 078ad0626d..22b9663831 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -114,7 +114,15 @@ class FederationServer(FederationBase):
         with PreserveLoggingContext():
             dl = []
             for pdu in pdu_list:
-                dl.append(self._handle_new_pdu(transaction.origin, pdu))
+                d = self._handle_new_pdu(transaction.origin, pdu)
+
+                def handle_failure(failure):
+                    failure.trap(FederationError)
+                    self.send_failure(failure.value, transaction.origin)
+
+                d.addErrback(handle_failure)
+
+                dl.append(d)
 
             if hasattr(transaction, "edus"):
                 for edu in [Edu(**x) for x in transaction.edus]:
@@ -124,6 +132,9 @@ class FederationServer(FederationBase):
                         edu.content
                     )
 
+            for failure in getattr(transaction, "pdu_failures", []):
+                logger.info("Got failure %r", failure)
+
             results = yield defer.DeferredList(dl, consumeErrors=True)
 
         ret = []
@@ -132,10 +143,16 @@ class FederationServer(FederationBase):
                 ret.append({})
             else:
                 logger.exception(r[1])
-                ret.append({"error": str(r[1])})
+                ret.append({"error": str(r[1].value)})
 
         logger.debug("Returning: %s", str(ret))
 
+        response = {
+            "pdus": dict(zip(
+                (p.event_id for p in pdu_list), ret
+            )),
+        }
+
         yield self.transaction_actions.set_response(
             transaction,
             200, response
diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py
index bb20f2ebab..6faaa066fb 100644
--- a/synapse/federation/transaction_queue.py
+++ b/synapse/federation/transaction_queue.py
@@ -91,7 +91,7 @@ class TransactionQueue(object):
                 if not deferred.called:
                     deferred.errback(failure)
                 else:
-                    logger.warn("Failed to send pdu", failure)
+                    logger.warn("Failed to send pdu", failure.value)
 
             with PreserveLoggingContext():
                 self._attempt_new_transaction(destination).addErrback(eb)
@@ -116,7 +116,7 @@ class TransactionQueue(object):
             if not deferred.called:
                 deferred.errback(failure)
             else:
-                logger.warn("Failed to send edu", failure)
+                logger.warn("Failed to send edu", failure.value)
 
         with PreserveLoggingContext():
             self._attempt_new_transaction(destination).addErrback(eb)
@@ -133,6 +133,15 @@ class TransactionQueue(object):
             (failure, deferred)
         )
 
+        def eb(failure):
+            if not deferred.called:
+                deferred.errback(failure)
+            else:
+                logger.warn("Failed to send failure", failure.value)
+
+        with PreserveLoggingContext():
+            self._attempt_new_transaction(destination).addErrback(eb)
+
         yield deferred
 
     @defer.inlineCallbacks
@@ -249,6 +258,15 @@ class TransactionQueue(object):
                     transaction, json_data_cb
                 )
                 code = 200
+
+                if response:
+                    for e_id, r in getattr(response, "pdus", {}).items():
+                        if "error" in r:
+                            logger.warn(
+                                "Transaction returned error for %s: %s",
+                                e_id, r,
+                            )
+
             except HttpResponseException as e:
                 code = e.code
                 response = e.response