summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.rst6
-rw-r--r--synapse/__init__.py2
-rw-r--r--synapse/federation/transport/client.py1
-rw-r--r--synapse/http/matrixfederationclient.py31
-rw-r--r--tests/federation/test_federation.py2
-rw-r--r--tests/handlers/test_presence.py13
-rw-r--r--tests/handlers/test_typing.py2
7 files changed, 49 insertions, 8 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 068ed9f2ff..5bea8e82d5 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -1,3 +1,9 @@
+Changes in synapse v0.11.0-r1 (2015-11-18)
+==========================================
+
+* Retry and fail federation requests more aggressively for requests that block
+  client side requests (PR #384)
+
 Changes in synapse v0.11.0 (2015-11-17)
 =======================================
 
diff --git a/synapse/__init__.py b/synapse/__init__.py
index f0eac97bab..5a83f5f3ec 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -16,4 +16,4 @@
 """ This is a reference implementation of a Matrix home server.
 """
 
-__version__ = "0.11.0"
+__version__ = "0.11.0-r1"
diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py
index 3d59e1c650..0e0cb7ebc6 100644
--- a/synapse/federation/transport/client.py
+++ b/synapse/federation/transport/client.py
@@ -136,6 +136,7 @@ class TransportLayerClient(object):
             path=PREFIX + "/send/%s/" % transaction.transaction_id,
             data=json_data,
             json_data_callback=json_data_callback,
+            long_retries=True,
         )
 
         logger.debug(
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index ca9591556d..614c06a6d7 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -56,7 +56,8 @@ incoming_responses_counter = metrics.register_counter(
 )
 
 
-MAX_RETRIES = 10
+MAX_LONG_RETRIES = 10
+MAX_SHORT_RETRIES = 3
 
 
 class MatrixFederationEndpointFactory(object):
@@ -103,7 +104,7 @@ class MatrixFederationHttpClient(object):
     def _create_request(self, destination, method, path_bytes,
                         body_callback, headers_dict={}, param_bytes=b"",
                         query_bytes=b"", retry_on_dns_fail=True,
-                        timeout=None):
+                        timeout=None, long_retries=False):
         """ Creates and sends a request to the given url
         """
         headers_dict[b"User-Agent"] = [self.version_string]
@@ -123,7 +124,10 @@ class MatrixFederationHttpClient(object):
 
         # XXX: Would be much nicer to retry only at the transaction-layer
         # (once we have reliable transactions in place)
-        retries_left = MAX_RETRIES
+        if long_retries:
+            retries_left = MAX_LONG_RETRIES
+        else:
+            retries_left = MAX_SHORT_RETRIES
 
         http_url_bytes = urlparse.urlunparse(
             ("", "", path_bytes, param_bytes, query_bytes, "")
@@ -184,9 +188,15 @@ class MatrixFederationHttpClient(object):
                     )
 
                     if retries_left and not timeout:
-                        delay = 4 ** (MAX_RETRIES + 1 - retries_left)
-                        delay = max(delay, 60)
-                        delay *= random.uniform(0.8, 1.4)
+                        if long_retries:
+                            delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left)
+                            delay = max(delay, 60)
+                            delay *= random.uniform(0.8, 1.4)
+                        else:
+                            delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left)
+                            delay = max(delay, 2)
+                            delay *= random.uniform(0.8, 1.4)
+
                         yield sleep(delay)
                         retries_left -= 1
                     else:
@@ -237,7 +247,8 @@ class MatrixFederationHttpClient(object):
         headers_dict[b"Authorization"] = auth_headers
 
     @defer.inlineCallbacks
-    def put_json(self, destination, path, data={}, json_data_callback=None):
+    def put_json(self, destination, path, data={}, json_data_callback=None,
+                 long_retries=False):
         """ Sends the specifed json data using PUT
 
         Args:
@@ -248,6 +259,8 @@ class MatrixFederationHttpClient(object):
                 the request body. This will be encoded as JSON.
             json_data_callback (callable): A callable returning the dict to
                 use as the request body.
+            long_retries (bool): A boolean that indicates whether we should
+                retry for a short or long time.
 
         Returns:
             Deferred: Succeeds when we get a 2xx HTTP response. The result
@@ -273,6 +286,7 @@ class MatrixFederationHttpClient(object):
             path.encode("ascii"),
             body_callback=body_callback,
             headers_dict={"Content-Type": ["application/json"]},
+            long_retries=long_retries,
         )
 
         if 200 <= response.code < 300:
@@ -491,6 +505,9 @@ class _JsonProducer(object):
     def stopProducing(self):
         pass
 
+    def resumeProducing(self):
+        pass
+
 
 def _flatten_response_never_received(e):
     if hasattr(e, "reasons"):
diff --git a/tests/federation/test_federation.py b/tests/federation/test_federation.py
index a4ef60b911..96570f9072 100644
--- a/tests/federation/test_federation.py
+++ b/tests/federation/test_federation.py
@@ -197,6 +197,7 @@ class FederationTestCase(unittest.TestCase):
                 'pdu_failures': [],
             },
             json_data_callback=ANY,
+            long_retries=True,
         )
 
     @defer.inlineCallbacks
@@ -228,6 +229,7 @@ class FederationTestCase(unittest.TestCase):
                 'pdu_failures': [],
             },
             json_data_callback=ANY,
+            long_retries=True,
         )
 
     @defer.inlineCallbacks
diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py
index 10d4482cce..1172ceae8b 100644
--- a/tests/handlers/test_presence.py
+++ b/tests/handlers/test_presence.py
@@ -409,6 +409,7 @@ class PresenceInvitesTestCase(PresenceTestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -443,6 +444,7 @@ class PresenceInvitesTestCase(PresenceTestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -483,6 +485,7 @@ class PresenceInvitesTestCase(PresenceTestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -827,6 +830,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -843,6 +847,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -1033,6 +1038,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -1048,6 +1054,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -1078,6 +1085,7 @@ class PresencePushTestCase(MockedDatastorePresenceTestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -1184,6 +1192,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase):
                     },
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -1200,6 +1209,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase):
                     },
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -1232,6 +1242,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase):
                     },
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -1265,6 +1276,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase):
                     },
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -1297,6 +1309,7 @@ class PresencePollingTestCase(MockedDatastorePresenceTestCase):
                     },
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py
index 2d7ba43561..5b1feeb45b 100644
--- a/tests/handlers/test_typing.py
+++ b/tests/handlers/test_typing.py
@@ -218,6 +218,7 @@ class TypingNotificationsTestCase(unittest.TestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )
@@ -284,6 +285,7 @@ class TypingNotificationsTestCase(unittest.TestCase):
                     }
                 ),
                 json_data_callback=ANY,
+                long_retries=True,
             ),
             defer.succeed((200, "OK"))
         )