From ac80cb08fe247812b46c70843b2d5c9764cca619 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Mon, 17 Sep 2018 17:16:50 +0100 Subject: Fix more b'abcd' noise in metrics --- synapse/http/request_metrics.py | 22 +++++++++++----------- synapse/http/site.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 72c2654678..fedb4e6b18 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -162,7 +162,7 @@ class RequestMetrics(object): with _in_flight_requests_lock: _in_flight_requests.add(self) - def stop(self, time_sec, request): + def stop(self, time_sec, response_code, sent_bytes): with _in_flight_requests_lock: _in_flight_requests.discard(self) @@ -179,35 +179,35 @@ class RequestMetrics(object): ) return - response_code = str(request.code) + response_code = str(response_code) - outgoing_responses_counter.labels(request.method, response_code).inc() + outgoing_responses_counter.labels(self.method, response_code).inc() - response_count.labels(request.method, self.name, tag).inc() + response_count.labels(self.method, self.name, tag).inc() - response_timer.labels(request.method, self.name, tag, response_code).observe( + response_timer.labels(self.method, self.name, tag, response_code).observe( time_sec - self.start ) resource_usage = context.get_resource_usage() - response_ru_utime.labels(request.method, self.name, tag).inc( + response_ru_utime.labels(self.method, self.name, tag).inc( resource_usage.ru_utime, ) - response_ru_stime.labels(request.method, self.name, tag).inc( + response_ru_stime.labels(self.method, self.name, tag).inc( resource_usage.ru_stime, ) - response_db_txn_count.labels(request.method, self.name, tag).inc( + response_db_txn_count.labels(self.method, self.name, tag).inc( resource_usage.db_txn_count ) - response_db_txn_duration.labels(request.method, self.name, tag).inc( + response_db_txn_duration.labels(self.method, self.name, tag).inc( resource_usage.db_txn_duration_sec ) - response_db_sched_duration.labels(request.method, self.name, tag).inc( + response_db_sched_duration.labels(self.method, self.name, tag).inc( resource_usage.db_sched_duration_sec ) - response_size.labels(request.method, self.name, tag).inc(request.sentLength) + response_size.labels(self.method, self.name, tag).inc(sent_bytes) # We always call this at the end to ensure that we update the metrics # regardless of whether a call to /metrics while the request was in diff --git a/synapse/http/site.py b/synapse/http/site.py index 2191de9836..9579e8cd0d 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -288,7 +288,7 @@ class SynapseRequest(Request): ) try: - self.request_metrics.stop(self.finish_time, self) + self.request_metrics.stop(self.finish_time, self.code, self.sentLength) except Exception as e: logger.warn("Failed to stop metrics: %r", e) -- cgit 1.5.1 From b3097396e7cf8d29255b4855e9bf06c6b98a549e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 18 Sep 2018 17:01:12 +0100 Subject: Set SNI to the server_name, not whatever was in the SRV record Fixes #3843 --- synapse/http/endpoint.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index b0c9369519..91025037a3 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -108,7 +108,7 @@ def matrix_federation_endpoint(reactor, destination, tls_client_options_factory= Args: reactor: Twisted reactor. - destination (bytes): The name of the server to connect to. + destination (unicode): The name of the server to connect to. tls_client_options_factory (synapse.crypto.context_factory.ClientTLSOptionsFactory): Factory which generates TLS options for client connections. @@ -126,10 +126,17 @@ def matrix_federation_endpoint(reactor, destination, tls_client_options_factory= transport_endpoint = HostnameEndpoint default_port = 8008 else: + # the SNI string should be the same as the Host header, minus the port. + # as per https://github.com/matrix-org/synapse/issues/2525#issuecomment-336896777, + # the Host header and SNI should therefore be the server_name of the remote + # server. + tls_options = tls_client_options_factory.get_options(domain) + def transport_endpoint(reactor, host, port, timeout): return wrapClientTLS( - tls_client_options_factory.get_options(host), - HostnameEndpoint(reactor, host, port, timeout=timeout)) + tls_options, + HostnameEndpoint(reactor, host, port, timeout=timeout), + ) default_port = 8448 if port is None: -- cgit 1.5.1 From 31c15dcb80c8f11fd03dbb9b0ccff4777dc8e457 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 18 Sep 2018 18:17:15 +0100 Subject: Refactor matrixfederationclient to fix logging (#3906) We want to wait until we have read the response body before we log the request as complete, otherwise a confusing thing happens where the request appears to have completed, but we later fail it. To do this, we factor the salient details of a request out to a separate object, which can then keep track of the txn_id, so that it can be logged. --- changelog.d/3906.misc | 1 + synapse/http/matrixfederationclient.py | 385 +++++++++++++++++++------------ tests/http/test_fedclient.py | 43 +++- tests/replication/slave/storage/_base.py | 35 +-- tests/server.py | 81 +++++++ 5 files changed, 359 insertions(+), 186 deletions(-) create mode 100644 changelog.d/3906.misc (limited to 'synapse/http') diff --git a/changelog.d/3906.misc b/changelog.d/3906.misc new file mode 100644 index 0000000000..11709186d3 --- /dev/null +++ b/changelog.d/3906.misc @@ -0,0 +1 @@ +Improve logging of outbound federation requests \ No newline at end of file diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 083484a687..6a2d447289 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -17,10 +17,12 @@ import cgi import logging import random import sys +from io import BytesIO from six import PY3, string_types from six.moves import urllib +import attr import treq from canonicaljson import encode_canonical_json from prometheus_client import Counter @@ -28,8 +30,9 @@ from signedjson.sign import sign_json from twisted.internet import defer, protocol from twisted.internet.error import DNSLookupError +from twisted.internet.task import _EPSILON, Cooperator from twisted.web._newclient import ResponseDone -from twisted.web.client import Agent, HTTPConnectionPool +from twisted.web.client import Agent, FileBodyProducer, HTTPConnectionPool from twisted.web.http_headers import Headers import synapse.metrics @@ -41,13 +44,11 @@ from synapse.api.errors import ( SynapseError, ) from synapse.http.endpoint import matrix_federation_endpoint -from synapse.util import logcontext from synapse.util.async_helpers import timeout_no_seriously from synapse.util.logcontext import make_deferred_yieldable from synapse.util.metrics import Measure logger = logging.getLogger(__name__) -outbound_logger = logging.getLogger("synapse.http.outbound") outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests", "", ["method"]) @@ -78,6 +79,93 @@ class MatrixFederationEndpointFactory(object): ) +_next_id = 1 + + +@attr.s +class MatrixFederationRequest(object): + method = attr.ib() + """HTTP method + :type: str + """ + + path = attr.ib() + """HTTP path + :type: str + """ + + destination = attr.ib() + """The remote server to send the HTTP request to. + :type: str""" + + json = attr.ib(default=None) + """JSON to send in the body. + :type: dict|None + """ + + json_callback = attr.ib(default=None) + """A callback to generate the JSON. + :type: func|None + """ + + query = attr.ib(default=None) + """Query arguments. + :type: dict|None + """ + + txn_id = attr.ib(default=None) + """Unique ID for this request (for logging) + :type: str|None + """ + + def __attrs_post_init__(self): + global _next_id + self.txn_id = "%s-O-%s" % (self.method, _next_id) + _next_id = (_next_id + 1) % (MAXINT - 1) + + def get_json(self): + if self.json_callback: + return self.json_callback() + return self.json + + +@defer.inlineCallbacks +def _handle_json_response(reactor, timeout_sec, request, response): + """ + Reads the JSON body of a response, with a timeout + + Args: + reactor (IReactor): twisted reactor, for the timeout + timeout_sec (float): number of seconds to wait for response to complete + request (MatrixFederationRequest): the request that triggered the response + response (IResponse): response to the request + + Returns: + dict: parsed JSON response + """ + try: + check_content_type_is_json(response.headers) + d = treq.json_content(response) + d.addTimeout(timeout_sec, reactor) + body = yield make_deferred_yieldable(d) + except Exception as e: + logger.warn( + "{%s} [%d] Error reading response: %s", + request.txn_id, + request.destination, + e, + ) + raise + logger.info( + "{%s} [%d] Completed: %d %s", + request.txn_id, + request.destination, + response.code, + response.phrase.decode('ascii', errors='replace'), + ) + defer.returnValue(body) + + class MatrixFederationHttpClient(object): """HTTP client used to talk to other homeservers over the federation protocol. Send client certificates and signs requests. @@ -102,34 +190,35 @@ class MatrixFederationHttpClient(object): self.clock = hs.get_clock() self._store = hs.get_datastore() self.version_string = hs.version_string.encode('ascii') - self._next_id = 1 self.default_timeout = 60 - def _create_url(self, destination, path_bytes, param_bytes, query_bytes): - return urllib.parse.urlunparse( - (b"matrix", destination, path_bytes, param_bytes, query_bytes, b"") - ) + def schedule(x): + reactor.callLater(_EPSILON, x) + + self._cooperator = Cooperator(scheduler=schedule) @defer.inlineCallbacks - def _request(self, destination, method, path, - json=None, json_callback=None, - param_bytes=b"", - query=None, retry_on_dns_fail=True, - timeout=None, long_retries=False, - ignore_backoff=False, - backoff_on_404=False): + def _send_request( + self, + request, + retry_on_dns_fail=True, + timeout=None, + long_retries=False, + ignore_backoff=False, + backoff_on_404=False + ): """ - Creates and sends a request to the given server. + Sends a request to the given server. Args: - destination (str): The remote server to send the HTTP request to. - method (str): HTTP method - path (str): The HTTP path - json (dict or None): JSON to send in the body. - json_callback (func or None): A callback to generate the JSON. - query (dict or None): Query arguments. + request (MatrixFederationRequest): details of request to be sent + + timeout (int|None): number of milliseconds to wait for the response headers + (including connecting to the server). 60s by default. + ignore_backoff (bool): true to ignore the historical backoff data and try the request anyway. + backoff_on_404 (bool): Back off if we get a 404 Returns: @@ -154,38 +243,32 @@ class MatrixFederationHttpClient(object): if ( self.hs.config.federation_domain_whitelist is not None and - destination not in self.hs.config.federation_domain_whitelist + request.destination not in self.hs.config.federation_domain_whitelist ): - raise FederationDeniedError(destination) + raise FederationDeniedError(request.destination) limiter = yield synapse.util.retryutils.get_retry_limiter( - destination, + request.destination, self.clock, self._store, backoff_on_404=backoff_on_404, ignore_backoff=ignore_backoff, ) - headers_dict = {} - path_bytes = path.encode("ascii") - if query: - query_bytes = encode_query_args(query) + method = request.method + destination = request.destination + path_bytes = request.path.encode("ascii") + if request.query: + query_bytes = encode_query_args(request.query) else: query_bytes = b"" headers_dict = { "User-Agent": [self.version_string], - "Host": [destination], + "Host": [request.destination], } with limiter: - url = self._create_url( - destination.encode("ascii"), path_bytes, param_bytes, query_bytes - ).decode('ascii') - - txn_id = "%s-O-%s" % (method, self._next_id) - self._next_id = (self._next_id + 1) % (MAXINT - 1) - # XXX: Would be much nicer to retry only at the transaction-layer # (once we have reliable transactions in place) if long_retries: @@ -193,16 +276,19 @@ class MatrixFederationHttpClient(object): else: retries_left = MAX_SHORT_RETRIES - http_url = urllib.parse.urlunparse( - (b"", b"", path_bytes, param_bytes, query_bytes, b"") - ).decode('ascii') + url = urllib.parse.urlunparse(( + b"matrix", destination.encode("ascii"), + path_bytes, None, query_bytes, b"", + )).decode('ascii') + + http_url = urllib.parse.urlunparse(( + b"", b"", + path_bytes, None, query_bytes, b"", + )).decode('ascii') - log_result = None while True: try: - if json_callback: - json = json_callback() - + json = request.get_json() if json: data = encode_canonical_json(json) headers_dict["Content-Type"] = ["application/json"] @@ -213,16 +299,24 @@ class MatrixFederationHttpClient(object): data = None self.sign_request(destination, method, http_url, headers_dict) - outbound_logger.info( + logger.info( "{%s} [%s] Sending request: %s %s", - txn_id, destination, method, url + request.txn_id, destination, method, url ) + if data: + producer = FileBodyProducer( + BytesIO(data), + cooperator=self._cooperator + ) + else: + producer = None + request_deferred = treq.request( method, url, headers=Headers(headers_dict), - data=data, + data=producer, agent=self.agent, reactor=self.hs.get_reactor(), unbuffered=True @@ -244,33 +338,19 @@ class MatrixFederationHttpClient(object): request_deferred, ) - log_result = "%d %s" % ( - response.code, - response.phrase.decode('ascii', errors='replace'), - ) break except Exception as e: - if not retry_on_dns_fail and isinstance(e, DNSLookupError): - logger.warn( - "DNS Lookup failed to %s with %s", - destination, - e - ) - log_result = "DNS Lookup failed to %s with %s" % ( - destination, e - ) - raise - logger.warn( - "{%s} Sending request failed to %s: %s %s: %s", - txn_id, + "{%s} [%s] Request failed: %s %s: %s", + request.txn_id, destination, method, url, _flatten_response_never_received(e), ) - log_result = _flatten_response_never_received(e) + if not retry_on_dns_fail and isinstance(e, DNSLookupError): + raise if retries_left and not timeout: if long_retries: @@ -283,33 +363,33 @@ class MatrixFederationHttpClient(object): delay *= random.uniform(0.8, 1.4) logger.debug( - "{%s} Waiting %s before sending to %s...", - txn_id, + "{%s} [%s] Waiting %ss before re-sending...", + request.txn_id, + destination, delay, - destination ) yield self.clock.sleep(delay) retries_left -= 1 else: raise - finally: - outbound_logger.info( - "{%s} [%s] Result: %s", - txn_id, - destination, - log_result, - ) + + logger.info( + "{%s} [%s] Got response headers: %d %s", + request.txn_id, + destination, + response.code, + response.phrase.decode('ascii', errors='replace'), + ) if 200 <= response.code < 300: pass else: # :'( # Update transactions table? - with logcontext.PreserveLoggingContext(): - d = treq.content(response) - d.addTimeout(_sec_timeout, self.hs.get_reactor()) - body = yield make_deferred_yieldable(d) + d = treq.content(response) + d.addTimeout(_sec_timeout, self.hs.get_reactor()) + body = yield make_deferred_yieldable(d) raise HttpResponseException( response.code, response.phrase, body ) @@ -403,29 +483,26 @@ class MatrixFederationHttpClient(object): is not on our federation whitelist """ - if not json_data_callback: - json_data_callback = lambda: data - - response = yield self._request( - destination, - "PUT", - path, - json_callback=json_data_callback, + request = MatrixFederationRequest( + method="PUT", + destination=destination, + path=path, query=args, + json_callback=json_data_callback, + json=data, + ) + + response = yield self._send_request( + request, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, backoff_on_404=backoff_on_404, ) - if 200 <= response.code < 300: - # We need to update the transactions table to say it was sent? - check_content_type_is_json(response.headers) - - with logcontext.PreserveLoggingContext(): - d = treq.json_content(response) - d.addTimeout(self.default_timeout, self.hs.get_reactor()) - body = yield make_deferred_yieldable(d) + body = yield _handle_json_response( + self.hs.get_reactor(), self.default_timeout, request, response, + ) defer.returnValue(body) @defer.inlineCallbacks @@ -459,31 +536,30 @@ class MatrixFederationHttpClient(object): Fails with ``FederationDeniedError`` if this destination is not on our federation whitelist """ - response = yield self._request( - destination, - "POST", - path, + + request = MatrixFederationRequest( + method="POST", + destination=destination, + path=path, query=args, json=data, + ) + + response = yield self._send_request( + request, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, ) - if 200 <= response.code < 300: - # We need to update the transactions table to say it was sent? - check_content_type_is_json(response.headers) - - with logcontext.PreserveLoggingContext(): - d = treq.json_content(response) - if timeout: - _sec_timeout = timeout / 1000 - else: - _sec_timeout = self.default_timeout - - d.addTimeout(_sec_timeout, self.hs.get_reactor()) - body = yield make_deferred_yieldable(d) + if timeout: + _sec_timeout = timeout / 1000 + else: + _sec_timeout = self.default_timeout + body = yield _handle_json_response( + self.hs.get_reactor(), _sec_timeout, request, response, + ) defer.returnValue(body) @defer.inlineCallbacks @@ -519,25 +595,23 @@ class MatrixFederationHttpClient(object): logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) - response = yield self._request( - destination, - "GET", - path, + request = MatrixFederationRequest( + method="GET", + destination=destination, + path=path, query=args, + ) + + response = yield self._send_request( + request, retry_on_dns_fail=retry_on_dns_fail, timeout=timeout, ignore_backoff=ignore_backoff, ) - if 200 <= response.code < 300: - # We need to update the transactions table to say it was sent? - check_content_type_is_json(response.headers) - - with logcontext.PreserveLoggingContext(): - d = treq.json_content(response) - d.addTimeout(self.default_timeout, self.hs.get_reactor()) - body = yield make_deferred_yieldable(d) - + body = yield _handle_json_response( + self.hs.get_reactor(), self.default_timeout, request, response, + ) defer.returnValue(body) @defer.inlineCallbacks @@ -568,25 +642,23 @@ class MatrixFederationHttpClient(object): Fails with ``FederationDeniedError`` if this destination is not on our federation whitelist """ - response = yield self._request( - destination, - "DELETE", - path, + request = MatrixFederationRequest( + method="DELETE", + destination=destination, + path=path, query=args, + ) + + response = yield self._send_request( + request, long_retries=long_retries, timeout=timeout, ignore_backoff=ignore_backoff, ) - if 200 <= response.code < 300: - # We need to update the transactions table to say it was sent? - check_content_type_is_json(response.headers) - - with logcontext.PreserveLoggingContext(): - d = treq.json_content(response) - d.addTimeout(self.default_timeout, self.hs.get_reactor()) - body = yield make_deferred_yieldable(d) - + body = yield _handle_json_response( + self.hs.get_reactor(), self.default_timeout, request, response, + ) defer.returnValue(body) @defer.inlineCallbacks @@ -614,11 +686,15 @@ class MatrixFederationHttpClient(object): Fails with ``FederationDeniedError`` if this destination is not on our federation whitelist """ - response = yield self._request( - destination, - "GET", - path, + request = MatrixFederationRequest( + method="GET", + destination=destination, + path=path, query=args, + ) + + response = yield self._send_request( + request, retry_on_dns_fail=retry_on_dns_fail, ignore_backoff=ignore_backoff, ) @@ -626,14 +702,25 @@ class MatrixFederationHttpClient(object): headers = dict(response.headers.getAllRawHeaders()) try: - with logcontext.PreserveLoggingContext(): - d = _readBodyToFile(response, output_stream, max_size) - d.addTimeout(self.default_timeout, self.hs.get_reactor()) - length = yield make_deferred_yieldable(d) - except Exception: - logger.exception("Failed to download body") + d = _readBodyToFile(response, output_stream, max_size) + d.addTimeout(self.default_timeout, self.hs.get_reactor()) + length = yield make_deferred_yieldable(d) + except Exception as e: + logger.warn( + "{%s} [%d] Error reading response: %s", + request.txn_id, + request.destination, + e, + ) raise - + logger.info( + "{%s} [%d] Completed: %d %s [%d bytes]", + request.txn_id, + request.destination, + response.code, + response.phrase.decode('ascii', errors='replace'), + length, + ) defer.returnValue((length, headers)) diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py index 1c46c9cfeb..66c09f63b6 100644 --- a/tests/http/test_fedclient.py +++ b/tests/http/test_fedclient.py @@ -18,9 +18,14 @@ from mock import Mock from twisted.internet.defer import TimeoutError from twisted.internet.error import ConnectingCancelledError, DNSLookupError from twisted.web.client import ResponseNeverReceived +from twisted.web.http import HTTPChannel -from synapse.http.matrixfederationclient import MatrixFederationHttpClient +from synapse.http.matrixfederationclient import ( + MatrixFederationHttpClient, + MatrixFederationRequest, +) +from tests.server import FakeTransport from tests.unittest import HomeserverTestCase @@ -40,7 +45,7 @@ class FederationClientTests(HomeserverTestCase): """ If the DNS raising returns an error, it will bubble up. """ - d = self.cl._request("testserv2:8008", "GET", "foo/bar", timeout=10000) + d = self.cl.get_json("testserv2:8008", "foo/bar", timeout=10000) self.pump() f = self.failureResultOf(d) @@ -51,7 +56,7 @@ class FederationClientTests(HomeserverTestCase): If the HTTP request is not connected and is timed out, it'll give a ConnectingCancelledError. """ - d = self.cl._request("testserv:8008", "GET", "foo/bar", timeout=10000) + d = self.cl.get_json("testserv:8008", "foo/bar", timeout=10000) self.pump() @@ -78,7 +83,7 @@ class FederationClientTests(HomeserverTestCase): If the HTTP request is connected, but gets no response before being timed out, it'll give a ResponseNeverReceived. """ - d = self.cl._request("testserv:8008", "GET", "foo/bar", timeout=10000) + d = self.cl.get_json("testserv:8008", "foo/bar", timeout=10000) self.pump() @@ -108,7 +113,12 @@ class FederationClientTests(HomeserverTestCase): """ Once the client gets the headers, _request returns successfully. """ - d = self.cl._request("testserv:8008", "GET", "foo/bar", timeout=10000) + request = MatrixFederationRequest( + method="GET", + destination="testserv:8008", + path="foo/bar", + ) + d = self.cl._send_request(request, timeout=10000) self.pump() @@ -155,3 +165,26 @@ class FederationClientTests(HomeserverTestCase): f = self.failureResultOf(d) self.assertIsInstance(f.value, TimeoutError) + + def test_client_sends_body(self): + self.cl.post_json( + "testserv:8008", "foo/bar", timeout=10000, + data={"a": "b"} + ) + + self.pump() + + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + client = clients[0][2].buildProtocol(None) + server = HTTPChannel() + + client.makeConnection(FakeTransport(server, self.reactor)) + server.makeConnection(FakeTransport(client, self.reactor)) + + self.pump(0.1) + + self.assertEqual(len(server.requests), 1) + request = server.requests[0] + content = request.content.read() + self.assertEqual(content, b'{"a":"b"}') diff --git a/tests/replication/slave/storage/_base.py b/tests/replication/slave/storage/_base.py index 089cecfbee..9e9fbbfe93 100644 --- a/tests/replication/slave/storage/_base.py +++ b/tests/replication/slave/storage/_base.py @@ -15,8 +15,6 @@ from mock import Mock, NonCallableMock -import attr - from synapse.replication.tcp.client import ( ReplicationClientFactory, ReplicationClientHandler, @@ -24,6 +22,7 @@ from synapse.replication.tcp.client import ( from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from tests import unittest +from tests.server import FakeTransport class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): @@ -56,36 +55,8 @@ class BaseSlavedStoreTestCase(unittest.HomeserverTestCase): server = server_factory.buildProtocol(None) client = client_factory.buildProtocol(None) - @attr.s - class FakeTransport(object): - - other = attr.ib() - disconnecting = False - buffer = attr.ib(default=b'') - - def registerProducer(self, producer, streaming): - - self.producer = producer - - def _produce(): - self.producer.resumeProducing() - reactor.callLater(0.1, _produce) - - reactor.callLater(0.0, _produce) - - def write(self, byt): - self.buffer = self.buffer + byt - - if getattr(self.other, "transport") is not None: - self.other.dataReceived(self.buffer) - self.buffer = b"" - - def writeSequence(self, seq): - for x in seq: - self.write(x) - - client.makeConnection(FakeTransport(server)) - server.makeConnection(FakeTransport(client)) + client.makeConnection(FakeTransport(server, reactor)) + server.makeConnection(FakeTransport(client, reactor)) def replicate(self): """Tell the master side of replication that something has happened, and then diff --git a/tests/server.py b/tests/server.py index 420ec4e088..ccea3baa55 100644 --- a/tests/server.py +++ b/tests/server.py @@ -280,3 +280,84 @@ def get_clock(): clock = ThreadedMemoryReactorClock() hs_clock = Clock(clock) return (clock, hs_clock) + + +@attr.s +class FakeTransport(object): + """ + A twisted.internet.interfaces.ITransport implementation which sends all its data + straight into an IProtocol object: it exists to connect two IProtocols together. + + To use it, instantiate it with the receiving IProtocol, and then pass it to the + sending IProtocol's makeConnection method: + + server = HTTPChannel() + client.makeConnection(FakeTransport(server, self.reactor)) + + If you want bidirectional communication, you'll need two instances. + """ + + other = attr.ib() + """The Protocol object which will receive any data written to this transport. + + :type: twisted.internet.interfaces.IProtocol + """ + + _reactor = attr.ib() + """Test reactor + + :type: twisted.internet.interfaces.IReactorTime + """ + + disconnecting = False + buffer = attr.ib(default=b'') + producer = attr.ib(default=None) + + def getPeer(self): + return None + + def getHost(self): + return None + + def loseConnection(self): + self.disconnecting = True + + def abortConnection(self): + self.disconnecting = True + + def pauseProducing(self): + self.producer.pauseProducing() + + def unregisterProducer(self): + if not self.producer: + return + + self.producer = None + + def registerProducer(self, producer, streaming): + self.producer = producer + self.producerStreaming = streaming + + def _produce(): + d = self.producer.resumeProducing() + d.addCallback(lambda x: self._reactor.callLater(0.1, _produce)) + + if not streaming: + self._reactor.callLater(0.0, _produce) + + def write(self, byt): + self.buffer = self.buffer + byt + + def _write(): + if getattr(self.other, "transport") is not None: + self.other.dataReceived(self.buffer) + self.buffer = b"" + return + + self._reactor.callLater(0.0, _write) + + _write() + + def writeSequence(self, seq): + for x in seq: + self.write(x) -- cgit 1.5.1 From 35aec19f0ac88e7b16bc6bcc4f4e546b7fc89589 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 18 Sep 2018 15:26:08 -0600 Subject: Destination is a string --- synapse/http/matrixfederationclient.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 6a2d447289..530c0245a9 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -150,14 +150,14 @@ def _handle_json_response(reactor, timeout_sec, request, response): body = yield make_deferred_yieldable(d) except Exception as e: logger.warn( - "{%s} [%d] Error reading response: %s", + "{%s} [%s] Error reading response: %s", request.txn_id, request.destination, e, ) raise logger.info( - "{%s} [%d] Completed: %d %s", + "{%s} [%s] Completed: %d %s", request.txn_id, request.destination, response.code, @@ -707,14 +707,14 @@ class MatrixFederationHttpClient(object): length = yield make_deferred_yieldable(d) except Exception as e: logger.warn( - "{%s} [%d] Error reading response: %s", + "{%s} [%s] Error reading response: %s", request.txn_id, request.destination, e, ) raise logger.info( - "{%s} [%d] Completed: %d %s [%d bytes]", + "{%s} [%s] Completed: %d %s [%d bytes]", request.txn_id, request.destination, response.code, -- cgit 1.5.1 From a334e1cacec5dd9c1a983415906d8ffea4a30134 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 19 Sep 2018 10:39:40 +0100 Subject: Update to use new timeout function everywhere. The existing deferred timeout helper function (and the one into twisted) suffer from a bug when a deferred's canceller throws an exception, #3842. The new helper function doesn't suffer from this problem. --- synapse/http/client.py | 4 +- synapse/http/matrixfederationclient.py | 25 +++++++----- synapse/notifier.py | 13 +++--- synapse/util/async_helpers.py | 73 +++++++++------------------------- 4 files changed, 43 insertions(+), 72 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/client.py b/synapse/http/client.py index ec339a92ad..3d05f83b8c 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -43,7 +43,7 @@ from twisted.web.http_headers import Headers from synapse.api.errors import Codes, HttpResponseException, SynapseError from synapse.http import cancelled_to_request_timed_out_error, redact_uri from synapse.http.endpoint import SpiderEndpoint -from synapse.util.async_helpers import add_timeout_to_deferred +from synapse.util.async_helpers import timeout_deferred from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable @@ -99,7 +99,7 @@ class SimpleHttpClient(object): request_deferred = treq.request( method, uri, agent=self.agent, data=data, headers=headers ) - add_timeout_to_deferred( + request_deferred = timeout_deferred( request_deferred, 60, self.hs.get_reactor(), cancelled_to_request_timed_out_error, ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 530c0245a9..14b12cd1c4 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -44,7 +44,7 @@ from synapse.api.errors import ( SynapseError, ) from synapse.http.endpoint import matrix_federation_endpoint -from synapse.util.async_helpers import timeout_no_seriously +from synapse.util.async_helpers import timeout_deferred from synapse.util.logcontext import make_deferred_yieldable from synapse.util.metrics import Measure @@ -145,8 +145,14 @@ def _handle_json_response(reactor, timeout_sec, request, response): """ try: check_content_type_is_json(response.headers) + d = treq.json_content(response) - d.addTimeout(timeout_sec, reactor) + d = timeout_deferred( + d, + timeout=timeout_sec, + reactor=reactor, + ) + body = yield make_deferred_yieldable(d) except Exception as e: logger.warn( @@ -321,15 +327,10 @@ class MatrixFederationHttpClient(object): reactor=self.hs.get_reactor(), unbuffered=True ) - request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor()) - # Sometimes the timeout above doesn't work, so lets hack yet - # another layer of timeouts in in the vain hope that at some - # point the world made sense and this really really really - # should work. - request_deferred = timeout_no_seriously( + request_deferred = timeout_deferred( request_deferred, - timeout=_sec_timeout * 2, + timeout=_sec_timeout, reactor=self.hs.get_reactor(), ) @@ -388,7 +389,11 @@ class MatrixFederationHttpClient(object): # :'( # Update transactions table? d = treq.content(response) - d.addTimeout(_sec_timeout, self.hs.get_reactor()) + d = timeout_deferred( + d, + timeout=_sec_timeout, + reactor=self.hs.get_reactor(), + ) body = yield make_deferred_yieldable(d) raise HttpResponseException( response.code, response.phrase, body diff --git a/synapse/notifier.py b/synapse/notifier.py index 82f391481c..2d683718fb 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -28,7 +28,7 @@ from synapse.types import StreamToken from synapse.util.async_helpers import ( DeferredTimeoutError, ObservableDeferred, - add_timeout_to_deferred, + timeout_deferred, ) from synapse.util.logcontext import PreserveLoggingContext, run_in_background from synapse.util.logutils import log_function @@ -337,7 +337,7 @@ class Notifier(object): # Now we wait for the _NotifierUserStream to be told there # is a new token. listener = user_stream.new_listener(prev_token) - add_timeout_to_deferred( + listener.deferred = timeout_deferred( listener.deferred, (end_time - now) / 1000., self.hs.get_reactor(), @@ -559,11 +559,12 @@ class Notifier(object): if end_time <= now: break - add_timeout_to_deferred( - listener.deferred.addTimeout, - (end_time - now) / 1000., - self.hs.get_reactor(), + listener.deferred = timeout_deferred( + listener.deferred, + timeout=(end_time - now) / 1000., + reactor=self.hs.get_reactor(), ) + try: with PreserveLoggingContext(): yield listener.deferred diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 40c2946129..2e12bcda98 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -380,23 +380,25 @@ class DeferredTimeoutError(Exception): """ -def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None): - """ - Add a timeout to a deferred by scheduling it to be cancelled after - timeout seconds. +def _cancelled_to_timed_out_error(value, timeout): + if isinstance(value, failure.Failure): + value.trap(CancelledError) + raise DeferredTimeoutError(timeout, "Deferred") + return value - This is essentially a backport of deferred.addTimeout, which was introduced - in twisted 16.5. - If the deferred gets timed out, it errbacks with a DeferredTimeoutError, - unless a cancelable function was passed to its initialization or unless - a different on_timeout_cancel callable is provided. +def timeout_deferred(deferred, timeout, reactor, on_timeout_cancel=None): + """The in built twisted `Deferred.addTimeout` fails to time out deferreds + that have a canceller that throws exceptions. This method creates a new + deferred that wraps and times out the given deferred, correctly handling + the case where the given deferred's canceller throws. - Args: - deferred (defer.Deferred): deferred to be timed out - timeout (Number): seconds to time out after - reactor (twisted.internet.reactor): the Twisted reactor to use + NOTE: Unlike `Deferred.addTimeout`, this function returns a new deferred + Args: + deferred (Deferred) + timeout (float): Timeout in seconds + reactor (twisted.internet.reactor): The twisted reactor to use on_timeout_cancel (callable): A callable which is called immediately after the deferred times out, and not if this deferred is otherwise cancelled before the timeout. @@ -407,47 +409,9 @@ def add_timeout_to_deferred(deferred, timeout, reactor, on_timeout_cancel=None): The default callable (if none is provided) will translate a CancelledError Failure into a DeferredTimeoutError. - """ - timed_out = [False] - def time_it_out(): - timed_out[0] = True - deferred.cancel() - - delayed_call = reactor.callLater(timeout, time_it_out) - - def convert_cancelled(value): - if timed_out[0]: - to_call = on_timeout_cancel or _cancelled_to_timed_out_error - return to_call(value, timeout) - return value - - deferred.addBoth(convert_cancelled) - - def cancel_timeout(result): - # stop the pending call to cancel the deferred if it's been fired - if delayed_call.active(): - delayed_call.cancel() - return result - - deferred.addBoth(cancel_timeout) - - -def _cancelled_to_timed_out_error(value, timeout): - if isinstance(value, failure.Failure): - value.trap(CancelledError) - raise DeferredTimeoutError(timeout, "Deferred") - return value - - -def timeout_no_seriously(deferred, timeout, reactor): - """The in build twisted deferred addTimeout (and the method above) - completely fail to time things out under some unknown circumstances. - - Lets try a different way of timing things out and maybe that will make - things work?! - - TODO: Kill this with fire. + Returns: + Deferred """ new_d = defer.Deferred() @@ -466,7 +430,8 @@ def timeout_no_seriously(deferred, timeout, reactor): def convert_cancelled(value): if timed_out[0]: - return _cancelled_to_timed_out_error(value, timeout) + to_call = on_timeout_cancel or _cancelled_to_timed_out_error + return to_call(value, timeout) return value deferred.addBoth(convert_cancelled) -- cgit 1.5.1 From aeca5a5ed5816a7797a0807142a41e14baac80f6 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 20 Sep 2018 16:28:18 +1000 Subject: Add a regression test for logging on failed connections (#3912) --- changelog.d/3912.misc | 1 + synapse/http/site.py | 4 +-- tests/test_server.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++-- tests/unittest.py | 3 ++- 4 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 changelog.d/3912.misc (limited to 'synapse/http') diff --git a/changelog.d/3912.misc b/changelog.d/3912.misc new file mode 100644 index 0000000000..87d73697ea --- /dev/null +++ b/changelog.d/3912.misc @@ -0,0 +1 @@ +Add a regression test for logging failed HTTP requests on Python 3. \ No newline at end of file diff --git a/synapse/http/site.py b/synapse/http/site.py index 9579e8cd0d..d41d672934 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -75,9 +75,9 @@ class SynapseRequest(Request): return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % ( self.__class__.__name__, id(self), - self.method, + self.method.decode('ascii', errors='replace'), self.get_redacted_uri(), - self.clientproto, + self.clientproto.decode('ascii', errors='replace'), self.site.site_tag, ) diff --git a/tests/test_server.py b/tests/test_server.py index ef74544e93..4045fdadc3 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -1,14 +1,35 @@ +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging import re +from six import StringIO + from twisted.internet.defer import Deferred -from twisted.test.proto_helpers import MemoryReactorClock +from twisted.python.failure import Failure +from twisted.test.proto_helpers import AccumulatingProtocol, MemoryReactorClock +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET from synapse.api.errors import Codes, SynapseError from synapse.http.server import JsonResource +from synapse.http.site import SynapseSite, logger from synapse.util import Clock from tests import unittest -from tests.server import make_request, render, setup_test_homeserver +from tests.server import FakeTransport, make_request, render, setup_test_homeserver class JsonResourceTests(unittest.TestCase): @@ -121,3 +142,52 @@ class JsonResourceTests(unittest.TestCase): self.assertEqual(channel.result["code"], b'400') self.assertEqual(channel.json_body["error"], "Unrecognized request") self.assertEqual(channel.json_body["errcode"], "M_UNRECOGNIZED") + + +class SiteTestCase(unittest.HomeserverTestCase): + def test_lose_connection(self): + """ + We log the URI correctly redacted when we lose the connection. + """ + + class HangingResource(Resource): + """ + A Resource that strategically hangs, as if it were processing an + answer. + """ + + def render(self, request): + return NOT_DONE_YET + + # Set up a logging handler that we can inspect afterwards + output = StringIO() + handler = logging.StreamHandler(output) + logger.addHandler(handler) + old_level = logger.level + logger.setLevel(10) + self.addCleanup(logger.setLevel, old_level) + self.addCleanup(logger.removeHandler, handler) + + # Make a resource and a Site, the resource will hang and allow us to + # time out the request while it's 'processing' + base_resource = Resource() + base_resource.putChild(b'', HangingResource()) + site = SynapseSite("test", "site_tag", {}, base_resource, "1.0") + + server = site.buildProtocol(None) + client = AccumulatingProtocol() + client.makeConnection(FakeTransport(server, self.reactor)) + server.makeConnection(FakeTransport(client, self.reactor)) + + # Send a request with an access token that will get redacted + server.dataReceived(b"GET /?access_token=bar HTTP/1.0\r\n\r\n") + self.pump() + + # Lose the connection + e = Failure(Exception("Failed123")) + server.connectionLost(e) + handler.flush() + + # Our access token is redacted and the failure reason is logged. + self.assertIn("/?access_token=", output.getvalue()) + self.assertIn("Failed123", output.getvalue()) diff --git a/tests/unittest.py b/tests/unittest.py index a3d39920db..56f3dca394 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -219,7 +219,8 @@ class HomeserverTestCase(TestCase): Function to be overridden in subclasses. """ - raise NotImplementedError() + hs = self.setup_test_homeserver() + return hs def prepare(self, reactor, clock, homeserver): """ -- cgit 1.5.1 From 1f3f5fcf52efdf5215dc6c7cd3805b1357bf8236 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 20 Sep 2018 20:14:34 +1000 Subject: Fix client IPs being broken on Python 3 (#3908) --- .travis.yml | 5 + changelog.d/3908.bugfix | 1 + synapse/http/site.py | 2 +- synapse/storage/client_ips.py | 34 ++++--- tests/server.py | 8 +- tests/storage/test_client_ips.py | 202 ++++++++++++++++++++++++++++++++------- tests/unittest.py | 7 +- tests/utils.py | 27 +++++- tox.ini | 10 ++ 9 files changed, 238 insertions(+), 58 deletions(-) create mode 100644 changelog.d/3908.bugfix (limited to 'synapse/http') diff --git a/.travis.yml b/.travis.yml index b3ee66da8f..b6faca4b92 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,6 +31,11 @@ matrix: - python: 3.6 env: TOX_ENV=py36 + - python: 3.6 + env: TOX_ENV=py36-postgres TRIAL_FLAGS="-j 4" + services: + - postgresql + - python: 3.6 env: TOX_ENV=check_isort diff --git a/changelog.d/3908.bugfix b/changelog.d/3908.bugfix new file mode 100644 index 0000000000..518aee6c4d --- /dev/null +++ b/changelog.d/3908.bugfix @@ -0,0 +1 @@ +Fix adding client IPs to the database failing on Python 3. \ No newline at end of file diff --git a/synapse/http/site.py b/synapse/http/site.py index d41d672934..50be2de3bb 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -308,7 +308,7 @@ class XForwardedForRequest(SynapseRequest): C{b"-"}. """ return self.requestHeaders.getRawHeaders( - b"x-forwarded-for", [b"-"])[0].split(b",")[0].strip() + b"x-forwarded-for", [b"-"])[0].split(b",")[0].strip().decode('ascii') class SynapseRequestFactory(object): diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 8fc678fa67..9ad17b7c25 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -119,21 +119,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): for entry in iteritems(to_update): (user_id, access_token, ip), (user_agent, device_id, last_seen) = entry - self._simple_upsert_txn( - txn, - table="user_ips", - keyvalues={ - "user_id": user_id, - "access_token": access_token, - "ip": ip, - "user_agent": user_agent, - "device_id": device_id, - }, - values={ - "last_seen": last_seen, - }, - lock=False, - ) + try: + self._simple_upsert_txn( + txn, + table="user_ips", + keyvalues={ + "user_id": user_id, + "access_token": access_token, + "ip": ip, + "user_agent": user_agent, + "device_id": device_id, + }, + values={ + "last_seen": last_seen, + }, + lock=False, + ) + except Exception as e: + # Failed to upsert, log and continue + logger.error("Failed to insert client IP %r: %r", entry, e) @defer.inlineCallbacks def get_last_client_ip_by_device(self, user_id, device_id): diff --git a/tests/server.py b/tests/server.py index ccea3baa55..7bee58dff1 100644 --- a/tests/server.py +++ b/tests/server.py @@ -98,7 +98,7 @@ class FakeSite: return FakeLogger() -def make_request(method, path, content=b"", access_token=None): +def make_request(method, path, content=b"", access_token=None, request=SynapseRequest): """ Make a web request using the given method and path, feed it the content, and return the Request and the Channel underneath. @@ -120,14 +120,16 @@ def make_request(method, path, content=b"", access_token=None): site = FakeSite() channel = FakeChannel() - req = SynapseRequest(site, channel) + req = request(site, channel) req.process = lambda: b"" req.content = BytesIO(content) if access_token: req.requestHeaders.addRawHeader(b"Authorization", b"Bearer " + access_token) - req.requestHeaders.addRawHeader(b"X-Forwarded-For", b"127.0.0.1") + if content: + req.requestHeaders.addRawHeader(b"Content-Type", b"application/json") + req.requestReceived(method, path, b"1.1") return req, channel diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index c9b02a062b..2ffbb9f14f 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,35 +13,45 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import hashlib +import hmac +import json + from mock import Mock from twisted.internet import defer -import tests.unittest -import tests.utils +from synapse.http.site import XForwardedForRequest +from synapse.rest.client.v1 import admin, login + +from tests import unittest -class ClientIpStoreTestCase(tests.unittest.TestCase): - def __init__(self, *args, **kwargs): - super(ClientIpStoreTestCase, self).__init__(*args, **kwargs) - self.store = None # type: synapse.storage.DataStore - self.clock = None # type: tests.utils.MockClock +class ClientIpStoreTestCase(unittest.HomeserverTestCase): + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver() + return hs - @defer.inlineCallbacks - def setUp(self): - self.hs = yield tests.utils.setup_test_homeserver(self.addCleanup) + def prepare(self, hs, reactor, clock): self.store = self.hs.get_datastore() - self.clock = self.hs.get_clock() - @defer.inlineCallbacks def test_insert_new_client_ip(self): - self.clock.now = 12345678 + self.reactor.advance(12345678) + user_id = "@user:id" - yield self.store.insert_client_ip( - user_id, "access_token", "ip", "user_agent", "device_id" + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) ) - result = yield self.store.get_last_client_ip_by_device(user_id, "device_id") + # Trigger the storage loop + self.reactor.advance(10) + + result = self.get_success( + self.store.get_last_client_ip_by_device(user_id, "device_id") + ) r = result[(user_id, "device_id")] self.assertDictContainsSubset( @@ -55,18 +66,18 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): r, ) - @defer.inlineCallbacks def test_disabled_monthly_active_user(self): self.hs.config.limit_usage_by_mau = False self.hs.config.max_mau_value = 50 user_id = "@user:server" - yield self.store.insert_client_ip( - user_id, "access_token", "ip", "user_agent", "device_id" + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) ) - active = yield self.store.user_last_seen_monthly_active(user_id) + active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertFalse(active) - @defer.inlineCallbacks def test_adding_monthly_active_user_when_full(self): self.hs.config.limit_usage_by_mau = True self.hs.config.max_mau_value = 50 @@ -76,38 +87,159 @@ class ClientIpStoreTestCase(tests.unittest.TestCase): self.store.get_monthly_active_count = Mock( return_value=defer.succeed(lots_of_users) ) - yield self.store.insert_client_ip( - user_id, "access_token", "ip", "user_agent", "device_id" + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) ) - active = yield self.store.user_last_seen_monthly_active(user_id) + active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertFalse(active) - @defer.inlineCallbacks def test_adding_monthly_active_user_when_space(self): self.hs.config.limit_usage_by_mau = True self.hs.config.max_mau_value = 50 user_id = "@user:server" - active = yield self.store.user_last_seen_monthly_active(user_id) + active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertFalse(active) - yield self.store.insert_client_ip( - user_id, "access_token", "ip", "user_agent", "device_id" + # Trigger the saving loop + self.reactor.advance(10) + + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) ) - active = yield self.store.user_last_seen_monthly_active(user_id) + active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertTrue(active) - @defer.inlineCallbacks def test_updating_monthly_active_user_when_space(self): self.hs.config.limit_usage_by_mau = True self.hs.config.max_mau_value = 50 user_id = "@user:server" - yield self.store.register(user_id=user_id, token="123", password_hash=None) + self.get_success( + self.store.register(user_id=user_id, token="123", password_hash=None) + ) - active = yield self.store.user_last_seen_monthly_active(user_id) + active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertFalse(active) - yield self.store.insert_client_ip( - user_id, "access_token", "ip", "user_agent", "device_id" + # Trigger the saving loop + self.reactor.advance(10) + + self.get_success( + self.store.insert_client_ip( + user_id, "access_token", "ip", "user_agent", "device_id" + ) ) - active = yield self.store.user_last_seen_monthly_active(user_id) + active = self.get_success(self.store.user_last_seen_monthly_active(user_id)) self.assertTrue(active) + + +class ClientIpAuthTestCase(unittest.HomeserverTestCase): + + servlets = [admin.register_servlets, login.register_servlets] + + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver() + return hs + + def prepare(self, hs, reactor, clock): + self.hs.config.registration_shared_secret = u"shared" + self.store = self.hs.get_datastore() + + # Create the user + request, channel = self.make_request("GET", "/_matrix/client/r0/admin/register") + self.render(request) + nonce = channel.json_body["nonce"] + + want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) + want_mac.update(nonce.encode('ascii') + b"\x00bob\x00abc123\x00admin") + want_mac = want_mac.hexdigest() + + body = json.dumps( + { + "nonce": nonce, + "username": "bob", + "password": "abc123", + "admin": True, + "mac": want_mac, + } + ) + request, channel = self.make_request( + "POST", "/_matrix/client/r0/admin/register", body.encode('utf8') + ) + self.render(request) + + self.assertEqual(channel.code, 200) + self.user_id = channel.json_body["user_id"] + + def test_request_with_xforwarded(self): + """ + The IP in X-Forwarded-For is entered into the client IPs table. + """ + self._runtest( + {b"X-Forwarded-For": b"127.9.0.1"}, + "127.9.0.1", + {"request": XForwardedForRequest}, + ) + + def test_request_from_getPeer(self): + """ + The IP returned by getPeer is entered into the client IPs table, if + there's no X-Forwarded-For header. + """ + self._runtest({}, "127.0.0.1", {}) + + def _runtest(self, headers, expected_ip, make_request_args): + device_id = "bleb" + + body = json.dumps( + { + "type": "m.login.password", + "user": "bob", + "password": "abc123", + "device_id": device_id, + } + ) + request, channel = self.make_request( + "POST", "/_matrix/client/r0/login", body.encode('utf8'), **make_request_args + ) + self.render(request) + self.assertEqual(channel.code, 200) + access_token = channel.json_body["access_token"].encode('ascii') + + # Advance to a known time + self.reactor.advance(123456 - self.reactor.seconds()) + + request, channel = self.make_request( + "GET", + "/_matrix/client/r0/admin/users/" + self.user_id, + body.encode('utf8'), + access_token=access_token, + **make_request_args + ) + request.requestHeaders.addRawHeader(b"User-Agent", b"Mozzila pizza") + + # Add the optional headers + for h, v in headers.items(): + request.requestHeaders.addRawHeader(h, v) + self.render(request) + + # Advance so the save loop occurs + self.reactor.advance(100) + + result = self.get_success( + self.store.get_last_client_ip_by_device(self.user_id, device_id) + ) + r = result[(self.user_id, device_id)] + self.assertDictContainsSubset( + { + "user_id": self.user_id, + "device_id": device_id, + "ip": expected_ip, + "user_agent": "Mozzila pizza", + "last_seen": 123456100, + }, + r, + ) diff --git a/tests/unittest.py b/tests/unittest.py index 56f3dca394..ef905e6389 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -26,6 +26,7 @@ from twisted.internet.defer import Deferred from twisted.trial import unittest from synapse.http.server import JsonResource +from synapse.http.site import SynapseRequest from synapse.server import HomeServer from synapse.types import UserID, create_requester from synapse.util.logcontext import LoggingContextFilter @@ -237,7 +238,9 @@ class HomeserverTestCase(TestCase): Function to optionally be overridden in subclasses. """ - def make_request(self, method, path, content=b""): + def make_request( + self, method, path, content=b"", access_token=None, request=SynapseRequest + ): """ Create a SynapseRequest at the path using the method and containing the given content. @@ -255,7 +258,7 @@ class HomeserverTestCase(TestCase): if isinstance(content, dict): content = json.dumps(content).encode('utf8') - return make_request(method, path, content) + return make_request(method, path, content, access_token, request) def render(self, request): """ diff --git a/tests/utils.py b/tests/utils.py index 215226debf..aaed1149c3 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -16,7 +16,9 @@ import atexit import hashlib import os +import time import uuid +import warnings from inspect import getcallargs from mock import Mock, patch @@ -237,20 +239,41 @@ def setup_test_homeserver( else: # We need to do cleanup on PostgreSQL def cleanup(): + import psycopg2 + # Close all the db pools hs.get_db_pool().close() + dropped = False + # Drop the test database db_conn = db_engine.module.connect( database=POSTGRES_BASE_DB, user=POSTGRES_USER ) db_conn.autocommit = True cur = db_conn.cursor() - cur.execute("DROP DATABASE IF EXISTS %s;" % (test_db,)) - db_conn.commit() + + # Try a few times to drop the DB. Some things may hold on to the + # database for a few more seconds due to flakiness, preventing + # us from dropping it when the test is over. If we can't drop + # it, warn and move on. + for x in range(5): + try: + cur.execute("DROP DATABASE IF EXISTS %s;" % (test_db,)) + db_conn.commit() + dropped = True + except psycopg2.OperationalError as e: + warnings.warn( + "Couldn't drop old db: " + str(e), category=UserWarning + ) + time.sleep(0.5) + cur.close() db_conn.close() + if not dropped: + warnings.warn("Failed to drop old DB.", category=UserWarning) + if not LEAVE_DB: # Register the cleanup hook cleanup_func(cleanup) diff --git a/tox.ini b/tox.ini index 88875c3318..e4db563b4b 100644 --- a/tox.ini +++ b/tox.ini @@ -70,6 +70,16 @@ usedevelop=true [testenv:py36] usedevelop=true +[testenv:py36-postgres] +usedevelop=true +deps = + {[base]deps} + psycopg2 +setenv = + {[base]setenv} + SYNAPSE_POSTGRES = 1 + + [testenv:packaging] deps = check-manifest -- cgit 1.5.1 From b28a7ed5030148139af6c6c03e635b068a6eae15 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 20 Sep 2018 13:44:20 +0100 Subject: Fix spurious exceptions when client closes conncetion If a HTTP handler throws an exception while processing a request we automatically write a JSON error response. If the handler had already started writing a response twisted throws an exception. We should check for this case and simple abort the connection if there was an error after the response had started being written. --- synapse/http/server.py | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/server.py b/synapse/http/server.py index 2d5c23e673..b4b25cab19 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -84,10 +84,21 @@ def wrap_json_request_handler(h): logger.info( "%s SynapseError: %s - %s", request, code, e.msg ) - respond_with_json( - request, code, e.error_dict(), send_cors=True, - pretty_print=_request_user_agent_is_curl(request), - ) + + # Only respond with an error response if we haven't already started + # writing, otherwise lets just kill the connection + if request.startedWriting: + if request.transport: + try: + request.transport.abortConnection() + except Exception: + # abortConnection throws if the connection is already closed + pass + else: + respond_with_json( + request, code, e.error_dict(), send_cors=True, + pretty_print=_request_user_agent_is_curl(request), + ) except Exception: # failure.Failure() fishes the original Failure out @@ -100,16 +111,26 @@ def wrap_json_request_handler(h): request, f.getTraceback().rstrip(), ) - respond_with_json( - request, - 500, - { - "error": "Internal server error", - "errcode": Codes.UNKNOWN, - }, - send_cors=True, - pretty_print=_request_user_agent_is_curl(request), - ) + # Only respond with an error response if we haven't already started + # writing, otherwise lets just kill the connection + if request.startedWriting: + if request.transport: + try: + request.transport.abortConnection() + except Exception: + # abortConnection throws if the connection is already closed + pass + else: + respond_with_json( + request, + 500, + { + "error": "Internal server error", + "errcode": Codes.UNKNOWN, + }, + send_cors=True, + pretty_print=_request_user_agent_is_curl(request), + ) return wrap_async_request_handler(wrapped_request_handler) -- cgit 1.5.1 From 219606a6eda4aac01bdb24f69ec0d418689e653e Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 26 Sep 2018 13:26:27 -0600 Subject: Fix exception documentation in matrixfederationclient.py --- synapse/http/matrixfederationclient.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 14b12cd1c4..1a79f6305b 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -230,7 +230,7 @@ class MatrixFederationHttpClient(object): Returns: Deferred: resolves with the http response object on success. - Fails with ``HTTPRequestException``: if we get an HTTP response + Fails with ``HttpResponseException``: if we get an HTTP response code >= 300. Fails with ``NotRetryingDestination`` if we are not yet ready @@ -478,7 +478,7 @@ class MatrixFederationHttpClient(object): Deferred: Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. - Fails with ``HTTPRequestException`` if we get an HTTP response + Fails with ``HttpResponseException`` if we get an HTTP response code >= 300. Fails with ``NotRetryingDestination`` if we are not yet ready @@ -532,7 +532,7 @@ class MatrixFederationHttpClient(object): Deferred: Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. - Fails with ``HTTPRequestException`` if we get an HTTP response + Fails with ``HttpResponseException`` if we get an HTTP response code >= 300. Fails with ``NotRetryingDestination`` if we are not yet ready @@ -587,7 +587,7 @@ class MatrixFederationHttpClient(object): Deferred: Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. - Fails with ``HTTPRequestException`` if we get an HTTP response + Fails with ``HttpResponseException`` if we get an HTTP response code >= 300. Fails with ``NotRetryingDestination`` if we are not yet ready @@ -638,7 +638,7 @@ class MatrixFederationHttpClient(object): Deferred: Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. - Fails with ``HTTPRequestException`` if we get an HTTP response + Fails with ``HttpResponseException`` if we get an HTTP response code >= 300. Fails with ``NotRetryingDestination`` if we are not yet ready @@ -682,7 +682,7 @@ class MatrixFederationHttpClient(object): Deferred: resolves with an (int,dict) tuple of the file length and a dict of the response headers. - Fails with ``HTTPRequestException`` if we get an HTTP response code + Fails with ``HttpResponseException`` if we get an HTTP response code >= 300 Fails with ``NotRetryingDestination`` if we are not yet ready -- cgit 1.5.1 From 334e075dd85a21e859eb89948b606c2710b932ad Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 Oct 2018 11:49:39 +0100 Subject: Fix error when logging incomplete requests If a connection is lost before a request is read from Request, Twisted sets `method` (and `uri`) attributes to dummy values. These dummy values have incorrect types (i.e. they're not bytes), and so things like `__repr__` would raise an exception. To fix this we had a helper method to return the method with a consistent type. --- synapse/http/site.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/site.py b/synapse/http/site.py index 50be2de3bb..e508c0bd4f 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -75,14 +75,14 @@ class SynapseRequest(Request): return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % ( self.__class__.__name__, id(self), - self.method.decode('ascii', errors='replace'), + self.get_method(), self.get_redacted_uri(), self.clientproto.decode('ascii', errors='replace'), self.site.site_tag, ) def get_request_id(self): - return "%s-%i" % (self.method.decode('ascii'), self.request_seq) + return "%s-%i" % (self.get_method(), self.request_seq) def get_redacted_uri(self): uri = self.uri @@ -90,6 +90,21 @@ class SynapseRequest(Request): uri = self.uri.decode('ascii') return redact_uri(uri) + def get_method(self): + """Gets the method associated with the request (or placeholder if not + method has yet been received). + + Note: This is necessary as the placeholder value in twisted is str + rather than bytes, so we need to sanitise `self.method`. + + Returns: + str + """ + method = self.method + if isinstance(method, bytes): + method = self.method.decode('ascii') + return method + def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] @@ -119,7 +134,7 @@ class SynapseRequest(Request): # dispatching to the handler, so that the handler # can update the servlet name in the request # metrics - requests_counter.labels(self.method.decode('ascii'), + requests_counter.labels(self.get_method(), self.request_metrics.name).inc() @contextlib.contextmanager @@ -207,14 +222,14 @@ class SynapseRequest(Request): self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start( - self.start_time, name=servlet_name, method=self.method.decode('ascii'), + self.start_time, name=servlet_name, method=self.get_method(), ) self.site.access_logger.info( "%s - %s - Received request: %s %s", self.getClientIP(), self.site.site_tag, - self.method.decode('ascii'), + self.get_method(), self.get_redacted_uri() ) @@ -280,7 +295,7 @@ class SynapseRequest(Request): int(usage.db_txn_count), self.sentLength, code, - self.method.decode('ascii'), + self.get_method(), self.get_redacted_uri(), self.clientproto.decode('ascii', errors='replace'), user_agent, -- cgit 1.5.1 From b8a5b0097c4fc88a43c5c5f9785720d38299d3d8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 16 Oct 2018 10:44:49 +0100 Subject: Various cleanups in the federation client code (#4031) - Improve logging: log things in the right order, include destination and txids in all log lines, don't log successful responses twice - Fix the docstring on TransportLayerClient.send_transaction - Don't use treq.request, which is overcomplicated for our purposes: just use a twisted.web.client.Agent. - simplify the logic for setting up the bodyProducer - fix bytes/str confusions --- changelog.d/4031.misc | 1 + synapse/federation/transaction_queue.py | 27 +++++------- synapse/federation/transport/client.py | 19 ++++---- synapse/http/matrixfederationclient.py | 78 +++++++++++++++++---------------- 4 files changed, 64 insertions(+), 61 deletions(-) create mode 100644 changelog.d/4031.misc (limited to 'synapse/http') diff --git a/changelog.d/4031.misc b/changelog.d/4031.misc new file mode 100644 index 0000000000..60be8b59fd --- /dev/null +++ b/changelog.d/4031.misc @@ -0,0 +1 @@ +Various cleanups in the federation client code diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 98b5950800..3fdd63be95 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -633,14 +633,6 @@ class TransactionQueue(object): transaction, json_data_cb ) code = 200 - - if response: - for e_id, r in response.get("pdus", {}).items(): - if "error" in r: - logger.warn( - "Transaction returned error for %s: %s", - e_id, r, - ) except HttpResponseException as e: code = e.code response = e.response @@ -657,19 +649,24 @@ class TransactionQueue(object): destination, txn_id, code ) - logger.debug("TX [%s] Sent transaction", destination) - logger.debug("TX [%s] Marking as delivered...", destination) - yield self.transaction_actions.delivered( transaction, code, response ) - logger.debug("TX [%s] Marked as delivered", destination) + logger.debug("TX [%s] {%s} Marked as delivered", destination, txn_id) - if code != 200: + if code == 200: + for e_id, r in response.get("pdus", {}).items(): + if "error" in r: + logger.warn( + "TX [%s] {%s} Remote returned error for %s: %s", + destination, txn_id, e_id, r, + ) + else: for p in pdus: - logger.info( - "Failed to send event %s to %s", p.event_id, destination + logger.warn( + "TX [%s] {%s} Failed to send event %s", + destination, txn_id, p.event_id, ) success = False diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 2ab973d6c8..edba5a9808 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -143,9 +143,17 @@ class TransportLayerClient(object): transaction (Transaction) Returns: - Deferred: Results of the deferred is a tuple in the form of - (response_code, response_body) where the response_body is a - python dict decoded from json + Deferred: Succeeds when we get a 2xx HTTP response. The result + will be the decoded JSON body. + + Fails with ``HTTPRequestException`` if we get an HTTP response + code >= 300. + + Fails with ``NotRetryingDestination`` if we are not yet ready + to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ logger.debug( "send_data dest=%s, txid=%s", @@ -170,11 +178,6 @@ class TransportLayerClient(object): backoff_on_404=True, # If we get a 404 the other side has gone ) - logger.debug( - "send_data dest=%s, txid=%s, got response: 200", - transaction.destination, transaction.transaction_id, - ) - defer.returnValue(response) @defer.inlineCallbacks diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 14b12cd1c4..fcc02fc77d 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -195,7 +195,7 @@ class MatrixFederationHttpClient(object): ) self.clock = hs.get_clock() self._store = hs.get_datastore() - self.version_string = hs.version_string.encode('ascii') + self.version_string_bytes = hs.version_string.encode('ascii') self.default_timeout = 60 def schedule(x): @@ -261,8 +261,8 @@ class MatrixFederationHttpClient(object): ignore_backoff=ignore_backoff, ) - method = request.method - destination = request.destination + method_bytes = request.method.encode("ascii") + destination_bytes = request.destination.encode("ascii") path_bytes = request.path.encode("ascii") if request.query: query_bytes = encode_query_args(request.query) @@ -270,8 +270,8 @@ class MatrixFederationHttpClient(object): query_bytes = b"" headers_dict = { - "User-Agent": [self.version_string], - "Host": [request.destination], + b"User-Agent": [self.version_string_bytes], + b"Host": [destination_bytes], } with limiter: @@ -282,50 +282,51 @@ class MatrixFederationHttpClient(object): else: retries_left = MAX_SHORT_RETRIES - url = urllib.parse.urlunparse(( - b"matrix", destination.encode("ascii"), + url_bytes = urllib.parse.urlunparse(( + b"matrix", destination_bytes, path_bytes, None, query_bytes, b"", - )).decode('ascii') + )) + url_str = url_bytes.decode('ascii') - http_url = urllib.parse.urlunparse(( + url_to_sign_bytes = urllib.parse.urlunparse(( b"", b"", path_bytes, None, query_bytes, b"", - )).decode('ascii') + )) while True: try: json = request.get_json() if json: - data = encode_canonical_json(json) - headers_dict["Content-Type"] = ["application/json"] + headers_dict[b"Content-Type"] = [b"application/json"] self.sign_request( - destination, method, http_url, headers_dict, json + destination_bytes, method_bytes, url_to_sign_bytes, + headers_dict, json, ) - else: - data = None - self.sign_request(destination, method, http_url, headers_dict) - - logger.info( - "{%s} [%s] Sending request: %s %s", - request.txn_id, destination, method, url - ) - - if data: + data = encode_canonical_json(json) producer = FileBodyProducer( BytesIO(data), - cooperator=self._cooperator + cooperator=self._cooperator, ) else: producer = None + self.sign_request( + destination_bytes, method_bytes, url_to_sign_bytes, + headers_dict, + ) - request_deferred = treq.request( - method, - url, + logger.info( + "{%s} [%s] Sending request: %s %s", + request.txn_id, request.destination, request.method, + url_str, + ) + + # we don't want all the fancy cookie and redirect handling that + # treq.request gives: just use the raw Agent. + request_deferred = self.agent.request( + method_bytes, + url_bytes, headers=Headers(headers_dict), - data=producer, - agent=self.agent, - reactor=self.hs.get_reactor(), - unbuffered=True + bodyProducer=producer, ) request_deferred = timeout_deferred( @@ -344,9 +345,9 @@ class MatrixFederationHttpClient(object): logger.warn( "{%s} [%s] Request failed: %s %s: %s", request.txn_id, - destination, - method, - url, + request.destination, + request.method, + url_str, _flatten_response_never_received(e), ) @@ -366,7 +367,7 @@ class MatrixFederationHttpClient(object): logger.debug( "{%s} [%s] Waiting %ss before re-sending...", request.txn_id, - destination, + request.destination, delay, ) @@ -378,7 +379,7 @@ class MatrixFederationHttpClient(object): logger.info( "{%s} [%s] Got response headers: %d %s", request.txn_id, - destination, + request.destination, response.code, response.phrase.decode('ascii', errors='replace'), ) @@ -411,8 +412,9 @@ class MatrixFederationHttpClient(object): destination_is must be non-None. method (bytes): The HTTP method of the request url_bytes (bytes): The URI path of the request - headers_dict (dict): Dictionary of request headers to append to - content (bytes): The body of the request + headers_dict (dict[bytes, list[bytes]]): Dictionary of request headers to + append to + content (object): The body of the request destination_is (bytes): As 'destination', but if the destination is an identity server -- cgit 1.5.1 From b69216f7680b92ccd8f92e618e1a81c9ba3c3346 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Fri, 19 Oct 2018 21:45:45 +1100 Subject: Make the metrics less racy (#4061) --- changelog.d/4061.bugfix | 1 + synapse/http/request_metrics.py | 31 ++++++++++++++++++------------- synapse/notifier.py | 6 +++--- 3 files changed, 22 insertions(+), 16 deletions(-) create mode 100644 changelog.d/4061.bugfix (limited to 'synapse/http') diff --git a/changelog.d/4061.bugfix b/changelog.d/4061.bugfix new file mode 100644 index 0000000000..94ffcf7a51 --- /dev/null +++ b/changelog.d/4061.bugfix @@ -0,0 +1 @@ +Fix some metrics being racy and causing exceptions when polled by Prometheus. diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index fedb4e6b18..62045a918b 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -39,7 +39,8 @@ outgoing_responses_counter = Counter( ) response_timer = Histogram( - "synapse_http_server_response_time_seconds", "sec", + "synapse_http_server_response_time_seconds", + "sec", ["method", "servlet", "tag", "code"], ) @@ -79,15 +80,11 @@ response_size = Counter( # than when the response was written. in_flight_requests_ru_utime = Counter( - "synapse_http_server_in_flight_requests_ru_utime_seconds", - "", - ["method", "servlet"], + "synapse_http_server_in_flight_requests_ru_utime_seconds", "", ["method", "servlet"] ) in_flight_requests_ru_stime = Counter( - "synapse_http_server_in_flight_requests_ru_stime_seconds", - "", - ["method", "servlet"], + "synapse_http_server_in_flight_requests_ru_stime_seconds", "", ["method", "servlet"] ) in_flight_requests_db_txn_count = Counter( @@ -134,7 +131,7 @@ def _get_in_flight_counts(): # type counts = {} for rm in reqs: - key = (rm.method, rm.name,) + key = (rm.method, rm.name) counts[key] = counts.get(key, 0) + 1 return counts @@ -175,7 +172,8 @@ class RequestMetrics(object): if context != self.start_context: logger.warn( "Context have unexpectedly changed %r, %r", - context, self.start_context + context, + self.start_context, ) return @@ -192,10 +190,10 @@ class RequestMetrics(object): resource_usage = context.get_resource_usage() response_ru_utime.labels(self.method, self.name, tag).inc( - resource_usage.ru_utime, + resource_usage.ru_utime ) response_ru_stime.labels(self.method, self.name, tag).inc( - resource_usage.ru_stime, + resource_usage.ru_stime ) response_db_txn_count.labels(self.method, self.name, tag).inc( resource_usage.db_txn_count @@ -222,8 +220,15 @@ class RequestMetrics(object): diff = new_stats - self._request_stats self._request_stats = new_stats - in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime) - in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime) + # max() is used since rapid use of ru_stime/ru_utime can end up with the + # count going backwards due to NTP, time smearing, fine-grained + # correction, or floating points. Who knows, really? + in_flight_requests_ru_utime.labels(self.method, self.name).inc( + max(diff.ru_utime, 0) + ) + in_flight_requests_ru_stime.labels(self.method, self.name).inc( + max(diff.ru_stime, 0) + ) in_flight_requests_db_txn_count.labels(self.method, self.name).inc( diff.db_txn_count diff --git a/synapse/notifier.py b/synapse/notifier.py index 340b16ce25..de02b1017e 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -186,9 +186,9 @@ class Notifier(object): def count_listeners(): all_user_streams = set() - for x in self.room_to_user_streams.values(): + for x in list(self.room_to_user_streams.values()): all_user_streams |= x - for x in self.user_to_user_stream.values(): + for x in list(self.user_to_user_stream.values()): all_user_streams.add(x) return sum(stream.count_listeners() for stream in all_user_streams) @@ -196,7 +196,7 @@ class Notifier(object): LaterGauge( "synapse_notifier_rooms", "", [], - lambda: count(bool, self.room_to_user_streams.values()), + lambda: count(bool, list(self.room_to_user_streams.values())), ) LaterGauge( "synapse_notifier_users", "", [], -- cgit 1.5.1