From 0a81038ea0e308d2944b7deae43060e4982b0abe Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 14:39:59 +0100 Subject: Add in flight real time metrics for Measure blocks --- synapse/util/metrics.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'synapse/util') diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 97f1267380..4b4ac5f6c7 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -20,6 +20,7 @@ from prometheus_client import Counter from twisted.internet import defer +from synapse.metrics import InFlightGauge from synapse.util.logcontext import LoggingContext logger = logging.getLogger(__name__) @@ -45,6 +46,13 @@ block_db_txn_duration = Counter( block_db_sched_duration = Counter( "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"]) +# Tracks the number of blocks currently active +in_flight = InFlightGauge( + "synapse_util_metrics_block_in_flight", "", + labels=["block_name"], + sub_metrics=["real_time_max", "real_time_sum"], +) + def measure_func(name): def wrapper(func): @@ -82,10 +90,14 @@ class Measure(object): self.start_usage = self.start_context.get_resource_usage() + in_flight.register((self.name,), self._update_in_flight) + def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: return + in_flight.unregister((self.name,), self._update_in_flight) + duration = self.clock.time() - self.start block_counter.labels(self.name).inc() @@ -120,3 +132,13 @@ class Measure(object): if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) + + def _update_in_flight(self, metrics): + """Gets called when processing in flight metrics + """ + duration = self.clock.time() - self.start + + metrics.real_time_max = max(metrics.real_time_max, duration) + metrics.real_time_sum += duration + + # TODO: Add other in flight metrics. -- cgit 1.5.1 From fcfe7a850dada2e65982f2bc805d8bc409f07512 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 14 Sep 2018 19:23:07 +0100 Subject: Add an awful secondary timeout to fix wedged requests This is an attempt to mitigate #3842 by adding yet-another-timeout --- synapse/http/matrixfederationclient.py | 11 ++++++++ synapse/util/async_helpers.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'synapse/util') diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index da16b5dd8c..13b19f7626 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -42,6 +42,7 @@ from synapse.api.errors import ( ) from synapse.http.endpoint import matrix_federation_endpoint from synapse.util import logcontext +from synapse.util.async_helpers import timeout_no_seriously from synapse.util.logcontext import make_deferred_yieldable from synapse.util.metrics import Measure @@ -228,6 +229,16 @@ class MatrixFederationHttpClient(object): ) request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor()) + # Sometimes the timeout above doesn't work, so lets hack yet + # another layer of timeouts in in the vain hope that at some + # point the world made sense and this really really really + # should work. + request_deferred = timeout_no_seriously( + request_deferred, + timeout=_sec_timeout * 2, + reactor=self.hs.get_reactor(), + ) + with Measure(self.clock, "outbound_request"): response = yield make_deferred_yieldable( request_deferred, diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 9b3f2f4b96..083e4f4128 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -438,3 +438,54 @@ def _cancelled_to_timed_out_error(value, timeout): value.trap(CancelledError) raise DeferredTimeoutError(timeout, "Deferred") return value + + +def timeout_no_seriously(deferred, timeout, reactor): + """The in build twisted deferred addTimeout (and the method above) + completely fail to time things out under some unknown circumstances. + + Lets try a different way of timing things out and maybe that will make + things work?! + + TODO: Kill this with fire. + """ + + new_d = defer.Deferred() + + timed_out = [False] + + def time_it_out(): + timed_out[0] = True + deferred.cancel() + + if not new_d.called: + new_d.errback(DeferredTimeoutError(timeout, "Deferred")) + + delayed_call = reactor.callLater(timeout, time_it_out) + + def convert_cancelled(value): + if timed_out[0]: + return _cancelled_to_timed_out_error(value, timeout) + return value + + deferred.addBoth(convert_cancelled) + + def cancel_timeout(result): + # stop the pending call to cancel the deferred if it's been fired + if delayed_call.active(): + delayed_call.cancel() + return result + + deferred.addBoth(cancel_timeout) + + def success_cb(val): + if not new_d.called: + new_d.callback(val) + + def failure_cb(val): + if not new_d.called: + new_d.errback(val) + + deferred.addCallbacks(success_cb, failure_cb) + + return new_d -- cgit 1.5.1 From 24efb2a70dfd8aa100507612836601991e11affc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Sat, 15 Sep 2018 11:38:39 +0100 Subject: Fix timeout function Turns out deferred.cancel sometimes throws, so we do that last to ensure that we always do resolve the new deferred. --- synapse/util/async_helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'synapse/util') diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 083e4f4128..40c2946129 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -456,11 +456,12 @@ def timeout_no_seriously(deferred, timeout, reactor): def time_it_out(): timed_out[0] = True - deferred.cancel() if not new_d.called: new_d.errback(DeferredTimeoutError(timeout, "Deferred")) + deferred.cancel() + delayed_call = reactor.callLater(timeout, time_it_out) def convert_cancelled(value): -- cgit 1.5.1