summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erikj@jki.re>2018-09-14 19:56:11 +0100
committerGitHub <noreply@github.com>2018-09-14 19:56:11 +0100
commitc30cfff5723a123d9abd5c86300d278882dcb385 (patch)
tree47f652b55ca626f0027bb827bc99faf38f7188e7
parentdon't filter membership events based on history visibility (#3874) (diff)
parentNewsfile (diff)
downloadsynapse-c30cfff5723a123d9abd5c86300d278882dcb385.tar.xz
Merge pull request #3875 from matrix-org/erikj/extra_timeouts
Add an awful secondary timeout to fix wedged requests
-rw-r--r--changelog.d/3875.bugfix1
-rw-r--r--synapse/http/matrixfederationclient.py11
-rw-r--r--synapse/util/async_helpers.py51
3 files changed, 63 insertions, 0 deletions
diff --git a/changelog.d/3875.bugfix b/changelog.d/3875.bugfix
new file mode 100644

index 0000000000..2d2147dd4b --- /dev/null +++ b/changelog.d/3875.bugfix
@@ -0,0 +1 @@ +Mitigate outbound federation randomly becoming wedged diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index da16b5dd8c..13b19f7626 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py
@@ -42,6 +42,7 @@ from synapse.api.errors import ( ) from synapse.http.endpoint import matrix_federation_endpoint from synapse.util import logcontext +from synapse.util.async_helpers import timeout_no_seriously from synapse.util.logcontext import make_deferred_yieldable from synapse.util.metrics import Measure @@ -228,6 +229,16 @@ class MatrixFederationHttpClient(object): ) request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor()) + # Sometimes the timeout above doesn't work, so lets hack yet + # another layer of timeouts in in the vain hope that at some + # point the world made sense and this really really really + # should work. + request_deferred = timeout_no_seriously( + request_deferred, + timeout=_sec_timeout * 2, + reactor=self.hs.get_reactor(), + ) + with Measure(self.clock, "outbound_request"): response = yield make_deferred_yieldable( request_deferred, diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 9b3f2f4b96..083e4f4128 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py
@@ -438,3 +438,54 @@ def _cancelled_to_timed_out_error(value, timeout): value.trap(CancelledError) raise DeferredTimeoutError(timeout, "Deferred") return value + + +def timeout_no_seriously(deferred, timeout, reactor): + """The in build twisted deferred addTimeout (and the method above) + completely fail to time things out under some unknown circumstances. + + Lets try a different way of timing things out and maybe that will make + things work?! + + TODO: Kill this with fire. + """ + + new_d = defer.Deferred() + + timed_out = [False] + + def time_it_out(): + timed_out[0] = True + deferred.cancel() + + if not new_d.called: + new_d.errback(DeferredTimeoutError(timeout, "Deferred")) + + delayed_call = reactor.callLater(timeout, time_it_out) + + def convert_cancelled(value): + if timed_out[0]: + return _cancelled_to_timed_out_error(value, timeout) + return value + + deferred.addBoth(convert_cancelled) + + def cancel_timeout(result): + # stop the pending call to cancel the deferred if it's been fired + if delayed_call.active(): + delayed_call.cancel() + return result + + deferred.addBoth(cancel_timeout) + + def success_cb(val): + if not new_d.called: + new_d.callback(val) + + def failure_cb(val): + if not new_d.called: + new_d.errback(val) + + deferred.addCallbacks(success_cb, failure_cb) + + return new_d