diff options
Diffstat (limited to 'synapse/http')
-rw-r--r-- | synapse/http/__init__.py | 13 | ||||
-rw-r--r-- | synapse/http/additional_resource.py | 3 | ||||
-rw-r--r-- | synapse/http/client.py | 61 | ||||
-rw-r--r-- | synapse/http/endpoint.py | 97 | ||||
-rw-r--r-- | synapse/http/matrixfederationclient.py | 77 | ||||
-rw-r--r-- | synapse/http/request_metrics.py | 95 | ||||
-rw-r--r-- | synapse/http/server.py | 41 | ||||
-rw-r--r-- | synapse/http/servlet.py | 9 | ||||
-rw-r--r-- | synapse/http/site.py | 53 |
9 files changed, 257 insertions, 192 deletions
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index 054372e179..58ef8d3ce4 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -13,6 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re + from twisted.internet.defer import CancelledError from twisted.python import failure @@ -34,3 +36,14 @@ def cancelled_to_request_timed_out_error(value, timeout): value.trap(CancelledError) raise RequestTimedOutError() return value + + +ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') + + +def redact_uri(uri): + """Strips access tokens from the uri replaces with <redacted>""" + return ACCESS_TOKEN_RE.sub( + br'\1<redacted>\3', + uri + ) diff --git a/synapse/http/additional_resource.py b/synapse/http/additional_resource.py index a797396ade..0e10e3f8f7 100644 --- a/synapse/http/additional_resource.py +++ b/synapse/http/additional_resource.py @@ -13,10 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.http.server import wrap_json_request_handler from twisted.web.resource import Resource from twisted.web.server import NOT_DONE_YET +from synapse.http.server import wrap_json_request_handler + class AdditionalResource(Resource): """Resource wrapper for additional_resources diff --git a/synapse/http/client.py b/synapse/http/client.py index 4d4eee3d64..d6a0d75b2b 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -13,39 +13,41 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from OpenSSL import SSL -from OpenSSL.SSL import VERIFY_NONE +import logging +import urllib -from synapse.api.errors import ( - CodeMessageException, MatrixCodeMessageException, SynapseError, Codes, -) -from synapse.http import cancelled_to_request_timed_out_error -from synapse.util.async import add_timeout_to_deferred -from synapse.util.caches import CACHE_SIZE_FACTOR -from synapse.util.logcontext import make_deferred_yieldable -from synapse.http.endpoint import SpiderEndpoint +from six import StringIO -from canonicaljson import encode_canonical_json +from canonicaljson import encode_canonical_json, json +from prometheus_client import Counter -from twisted.internet import defer, reactor, ssl, protocol, task +from OpenSSL import SSL +from OpenSSL.SSL import VERIFY_NONE +from twisted.internet import defer, protocol, reactor, ssl, task from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS +from twisted.web._newclient import ResponseDone +from twisted.web.client import Agent, BrowserLikeRedirectAgent, ContentDecoderAgent +from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.client import ( - BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent, - readBody, PartialDownloadError, + GzipDecoder, HTTPConnectionPool, + PartialDownloadError, + readBody, ) -from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.http import PotentialDataLoss from twisted.web.http_headers import Headers -from twisted.web._newclient import ResponseDone - -from six import StringIO - -from prometheus_client import Counter -import simplejson as json -import logging -import urllib +from synapse.api.errors import ( + CodeMessageException, + Codes, + MatrixCodeMessageException, + SynapseError, +) +from synapse.http import cancelled_to_request_timed_out_error, redact_uri +from synapse.http.endpoint import SpiderEndpoint +from synapse.util.async import add_timeout_to_deferred +from synapse.util.caches import CACHE_SIZE_FACTOR +from synapse.util.logcontext import make_deferred_yieldable logger = logging.getLogger(__name__) @@ -90,31 +92,32 @@ class SimpleHttpClient(object): # counters to it outgoing_requests_counter.labels(method).inc() - logger.info("Sending request %s %s", method, uri) + # log request but strip `access_token` (AS requests for example include this) + logger.info("Sending request %s %s", method, redact_uri(uri)) try: request_deferred = self.agent.request( method, uri, *args, **kwargs ) add_timeout_to_deferred( - request_deferred, - 60, cancelled_to_request_timed_out_error, + request_deferred, 60, self.hs.get_reactor(), + cancelled_to_request_timed_out_error, ) response = yield make_deferred_yieldable(request_deferred) incoming_responses_counter.labels(method, response.code).inc() logger.info( "Received response to %s %s: %s", - method, uri, response.code + method, redact_uri(uri), response.code ) defer.returnValue(response) except Exception as e: incoming_responses_counter.labels(method, "ERR").inc() logger.info( "Error sending request to %s %s: %s %s", - method, uri, type(e).__name__, e.message + method, redact_uri(uri), type(e).__name__, e.message ) - raise e + raise @defer.inlineCallbacks def post_urlencoded_get_json(self, uri, args={}, headers=None): diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index 87a482650d..d65daa72bb 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -12,17 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS -from twisted.internet import defer, reactor -from twisted.internet.error import ConnectError -from twisted.names import client, dns -from twisted.names.error import DNSNameError, DomainError - import collections import logging import random +import re import time +from twisted.internet import defer +from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS +from twisted.internet.error import ConnectError +from twisted.names import client, dns +from twisted.names.error import DNSNameError, DomainError logger = logging.getLogger(__name__) @@ -38,6 +38,71 @@ _Server = collections.namedtuple( ) +def parse_server_name(server_name): + """Split a server name into host/port parts. + + Args: + server_name (str): server name to parse + + Returns: + Tuple[str, int|None]: host/port parts. + + Raises: + ValueError if the server name could not be parsed. + """ + try: + if server_name[-1] == ']': + # ipv6 literal, hopefully + return server_name, None + + domain_port = server_name.rsplit(":", 1) + domain = domain_port[0] + port = int(domain_port[1]) if domain_port[1:] else None + return domain, port + except Exception: + raise ValueError("Invalid server name '%s'" % server_name) + + +VALID_HOST_REGEX = re.compile( + "\\A[0-9a-zA-Z.-]+\\Z", +) + + +def parse_and_validate_server_name(server_name): + """Split a server name into host/port parts and do some basic validation. + + Args: + server_name (str): server name to parse + + Returns: + Tuple[str, int|None]: host/port parts. + + Raises: + ValueError if the server name could not be parsed. + """ + host, port = parse_server_name(server_name) + + # these tests don't need to be bulletproof as we'll find out soon enough + # if somebody is giving us invalid data. What we *do* need is to be sure + # that nobody is sneaking IP literals in that look like hostnames, etc. + + # look for ipv6 literals + if host[0] == '[': + if host[-1] != ']': + raise ValueError("Mismatched [...] in server name '%s'" % ( + server_name, + )) + return host, port + + # otherwise it should only be alphanumerics. + if not VALID_HOST_REGEX.match(host): + raise ValueError("Server name '%s' contains invalid characters" % ( + server_name, + )) + + return host, port + + def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None, timeout=None): """Construct an endpoint for the given matrix destination. @@ -50,9 +115,7 @@ def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None, timeout (int): connection timeout in seconds """ - domain_port = destination.split(":") - domain = domain_port[0] - port = int(domain_port[1]) if domain_port[1:] else None + domain, port = parse_server_name(destination) endpoint_kw_args = {} @@ -74,21 +137,22 @@ def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None, reactor, "matrix", domain, protocol="tcp", default_port=default_port, endpoint=transport_endpoint, endpoint_kw_args=endpoint_kw_args - )) + ), reactor) else: return _WrappingEndpointFac(transport_endpoint( reactor, domain, port, **endpoint_kw_args - )) + ), reactor) class _WrappingEndpointFac(object): - def __init__(self, endpoint_fac): + def __init__(self, endpoint_fac, reactor): self.endpoint_fac = endpoint_fac + self.reactor = reactor @defer.inlineCallbacks def connect(self, protocolFactory): conn = yield self.endpoint_fac.connect(protocolFactory) - conn = _WrappedConnection(conn) + conn = _WrappedConnection(conn, self.reactor) defer.returnValue(conn) @@ -98,9 +162,10 @@ class _WrappedConnection(object): """ __slots__ = ["conn", "last_request"] - def __init__(self, conn): + def __init__(self, conn, reactor): object.__setattr__(self, "conn", conn) object.__setattr__(self, "last_request", time.time()) + self._reactor = reactor def __getattr__(self, name): return getattr(self.conn, name) @@ -131,14 +196,14 @@ class _WrappedConnection(object): # Time this connection out if we haven't send a request in the last # N minutes # TODO: Cancel the previous callLater? - reactor.callLater(3 * 60, self._time_things_out_maybe) + self._reactor.callLater(3 * 60, self._time_things_out_maybe) d = self.conn.request(request) def update_request_time(res): self.last_request = time.time() # TODO: Cancel the previous callLater? - reactor.callLater(3 * 60, self._time_things_out_maybe) + self._reactor.callLater(3 * 60, self._time_things_out_maybe) return res d.addCallback(update_request_time) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 821aed362b..bf1aa29502 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -13,39 +13,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer, reactor, protocol -from twisted.internet.error import DNSLookupError -from twisted.web.client import readBody, HTTPConnectionPool, Agent -from twisted.web.http_headers import Headers -from twisted.web._newclient import ResponseDone - -from synapse.http import cancelled_to_request_timed_out_error -from synapse.http.endpoint import matrix_federation_endpoint -import synapse.metrics -from synapse.util.async import sleep, add_timeout_to_deferred -from synapse.util import logcontext -from synapse.util.logcontext import make_deferred_yieldable -import synapse.util.retryutils - -from canonicaljson import encode_canonical_json - -from synapse.api.errors import ( - SynapseError, Codes, HttpResponseException, FederationDeniedError, -) - -from signedjson.sign import sign_json - import cgi -import simplejson as json import logging import random import sys import urllib -from six.moves.urllib import parse as urlparse -from six import string_types +from six import string_types +from six.moves.urllib import parse as urlparse +from canonicaljson import encode_canonical_json, json from prometheus_client import Counter +from signedjson.sign import sign_json + +from twisted.internet import defer, protocol, reactor +from twisted.internet.error import DNSLookupError +from twisted.web._newclient import ResponseDone +from twisted.web.client import Agent, HTTPConnectionPool, readBody +from twisted.web.http_headers import Headers + +import synapse.metrics +import synapse.util.retryutils +from synapse.api.errors import ( + Codes, + FederationDeniedError, + HttpResponseException, + SynapseError, +) +from synapse.http import cancelled_to_request_timed_out_error +from synapse.http.endpoint import matrix_federation_endpoint +from synapse.util import logcontext +from synapse.util.async import add_timeout_to_deferred +from synapse.util.logcontext import make_deferred_yieldable logger = logging.getLogger(__name__) outbound_logger = logging.getLogger("synapse.http.outbound") @@ -193,6 +192,7 @@ class MatrixFederationHttpClient(object): add_timeout_to_deferred( request_deferred, timeout / 1000. if timeout else 60, + self.hs.get_reactor(), cancelled_to_request_timed_out_error, ) response = yield make_deferred_yieldable( @@ -234,7 +234,7 @@ class MatrixFederationHttpClient(object): delay = min(delay, 2) delay *= random.uniform(0.8, 1.4) - yield sleep(delay) + yield self.clock.sleep(delay) retries_left -= 1 else: raise @@ -260,14 +260,35 @@ class MatrixFederationHttpClient(object): defer.returnValue(response) def sign_request(self, destination, method, url_bytes, headers_dict, - content=None): + content=None, destination_is=None): + """ + Signs a request by adding an Authorization header to headers_dict + Args: + destination (bytes|None): The desination home server of the request. + May be None if the destination is an identity server, in which case + destination_is must be non-None. + method (bytes): The HTTP method of the request + url_bytes (bytes): The URI path of the request + headers_dict (dict): Dictionary of request headers to append to + content (bytes): The body of the request + destination_is (bytes): As 'destination', but if the destination is an + identity server + + Returns: + None + """ request = { "method": method, "uri": url_bytes, "origin": self.server_name, - "destination": destination, } + if destination is not None: + request["destination"] = destination + + if destination_is is not None: + request["destination_is"] = destination_is + if content is not None: request["content"] = content diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index dc06f6c443..f24b4b949c 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -17,8 +17,8 @@ import logging from prometheus_client.core import Counter, Histogram -from synapse.metrics import LaterGauge +from synapse.metrics import LaterGauge from synapse.util.logcontext import LoggingContext logger = logging.getLogger(__name__) @@ -117,13 +117,17 @@ def _get_in_flight_counts(): Returns: dict[tuple[str, str], int] """ - for rm in _in_flight_requests: + # Cast to a list to prevent it changing while the Prometheus + # thread is collecting metrics + reqs = list(_in_flight_requests) + + for rm in reqs: rm.update_metrics() # Map from (method, name) -> int, the number of in flight requests of that # type counts = {} - for rm in _in_flight_requests: + for rm in reqs: key = (rm.method, rm.name,) counts[key] = counts.get(key, 0) + 1 @@ -131,7 +135,7 @@ def _get_in_flight_counts(): LaterGauge( - "synapse_http_request_metrics_in_flight_requests_count", + "synapse_http_server_in_flight_requests_count", "", ["method", "servlet"], _get_in_flight_counts, @@ -145,7 +149,9 @@ class RequestMetrics(object): self.name = name self.method = method - self._request_stats = _RequestStats.from_context(self.start_context) + # _request_stats records resource usage that we have already added + # to the "in flight" metrics. + self._request_stats = self.start_context.get_resource_usage() _in_flight_requests.add(self) @@ -173,18 +179,22 @@ class RequestMetrics(object): time_sec - self.start ) - ru_utime, ru_stime = context.get_resource_usage() + resource_usage = context.get_resource_usage() - response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime) - response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime) + response_ru_utime.labels(request.method, self.name, tag).inc( + resource_usage.ru_utime, + ) + response_ru_stime.labels(request.method, self.name, tag).inc( + resource_usage.ru_stime, + ) response_db_txn_count.labels(request.method, self.name, tag).inc( - context.db_txn_count + resource_usage.db_txn_count ) response_db_txn_duration.labels(request.method, self.name, tag).inc( - context.db_txn_duration_sec + resource_usage.db_txn_duration_sec ) response_db_sched_duration.labels(request.method, self.name, tag).inc( - context.db_sched_duration_sec + resource_usage.db_sched_duration_sec ) response_size.labels(request.method, self.name, tag).inc(request.sentLength) @@ -197,7 +207,10 @@ class RequestMetrics(object): def update_metrics(self): """Updates the in flight metrics with values from this request. """ - diff = self._request_stats.update(self.start_context) + new_stats = self.start_context.get_resource_usage() + + diff = new_stats - self._request_stats + self._request_stats = new_stats in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime) in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime) @@ -213,61 +226,3 @@ class RequestMetrics(object): in_flight_requests_db_sched_duration.labels(self.method, self.name).inc( diff.db_sched_duration_sec ) - - -class _RequestStats(object): - """Keeps tracks of various metrics for an in flight request. - """ - - __slots__ = [ - "ru_utime", - "ru_stime", - "db_txn_count", - "db_txn_duration_sec", - "db_sched_duration_sec", - ] - - def __init__( - self, ru_utime, ru_stime, db_txn_count, db_txn_duration_sec, db_sched_duration_sec - ): - self.ru_utime = ru_utime - self.ru_stime = ru_stime - self.db_txn_count = db_txn_count - self.db_txn_duration_sec = db_txn_duration_sec - self.db_sched_duration_sec = db_sched_duration_sec - - @staticmethod - def from_context(context): - ru_utime, ru_stime = context.get_resource_usage() - - return _RequestStats( - ru_utime, ru_stime, - context.db_txn_count, - context.db_txn_duration_sec, - context.db_sched_duration_sec, - ) - - def update(self, context): - """Updates the current values and returns the difference between the - old and new values. - - Returns: - _RequestStats: The difference between the old and new values - """ - new = _RequestStats.from_context(context) - - diff = _RequestStats( - new.ru_utime - self.ru_utime, - new.ru_stime - self.ru_stime, - new.db_txn_count - self.db_txn_count, - new.db_txn_duration_sec - self.db_txn_duration_sec, - new.db_sched_duration_sec - self.db_sched_duration_sec, - ) - - self.ru_utime = new.ru_utime - self.ru_stime = new.ru_stime - self.db_txn_count = new.db_txn_count - self.db_txn_duration_sec = new.db_txn_duration_sec - self.db_sched_duration_sec = new.db_sched_duration_sec - - return diff diff --git a/synapse/http/server.py b/synapse/http/server.py index bc09b8b2be..c70fdbdfd2 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -14,34 +14,33 @@ # See the License for the specific language governing permissions and # limitations under the License. import cgi -from six.moves import http_client +import collections +import logging +import urllib -from synapse.api.errors import ( - cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError, Codes -) -from synapse.http.request_metrics import ( - requests_counter, -) -from synapse.util.logcontext import LoggingContext, PreserveLoggingContext -from synapse.util.caches import intern_dict -from synapse.util.metrics import Measure -import synapse.metrics -import synapse.events +from six.moves import http_client -from canonicaljson import ( - encode_canonical_json, encode_pretty_printed_json -) +from canonicaljson import encode_canonical_json, encode_pretty_printed_json, json from twisted.internet import defer from twisted.python import failure -from twisted.web import server, resource +from twisted.web import resource, server from twisted.web.server import NOT_DONE_YET from twisted.web.util import redirectTo -import collections -import logging -import urllib -import simplejson +import synapse.events +import synapse.metrics +from synapse.api.errors import ( + CodeMessageException, + Codes, + SynapseError, + UnrecognizedRequestError, + cs_exception, +) +from synapse.http.request_metrics import requests_counter +from synapse.util.caches import intern_dict +from synapse.util.logcontext import LoggingContext, PreserveLoggingContext +from synapse.util.metrics import Measure logger = logging.getLogger(__name__) @@ -410,7 +409,7 @@ def respond_with_json(request, code, json_object, send_cors=False, if canonical_json or synapse.events.USE_FROZEN_DICTS: json_bytes = encode_canonical_json(json_object) else: - json_bytes = simplejson.dumps(json_object) + json_bytes = json.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index ef8e62901b..cf6723563a 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -15,10 +15,11 @@ """ This module contains base REST classes for constructing REST servlets. """ -from synapse.api.errors import SynapseError, Codes - import logging -import simplejson + +from canonicaljson import json + +from synapse.api.errors import Codes, SynapseError logger = logging.getLogger(__name__) @@ -171,7 +172,7 @@ def parse_json_value_from_request(request, allow_empty_body=False): return None try: - content = simplejson.loads(content_bytes) + content = json.loads(content_bytes) except Exception as e: logger.warn("Unable to parse JSON: %s", e) raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON) diff --git a/synapse/http/site.py b/synapse/http/site.py index 60299657b9..21e26f9c5e 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -14,18 +14,16 @@ import contextlib import logging -import re import time -from twisted.web.server import Site, Request +from twisted.web.server import Request, Site +from synapse.http import redact_uri from synapse.http.request_metrics import RequestMetrics -from synapse.util.logcontext import LoggingContext +from synapse.util.logcontext import LoggingContext, ContextResourceUsage logger = logging.getLogger(__name__) -ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') - _next_request_seq = 0 @@ -69,10 +67,7 @@ class SynapseRequest(Request): return "%s-%i" % (self.method, self.request_seq) def get_redacted_uri(self): - return ACCESS_TOKEN_RE.sub( - br'\1<redacted>\3', - self.uri - ) + return redact_uri(self.uri) def get_user_agent(self): return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] @@ -100,35 +95,47 @@ class SynapseRequest(Request): def _finished_processing(self): try: context = LoggingContext.current_context() - ru_utime, ru_stime = context.get_resource_usage() - db_txn_count = context.db_txn_count - db_txn_duration_sec = context.db_txn_duration_sec - db_sched_duration_sec = context.db_sched_duration_sec + usage = context.get_resource_usage() except Exception: - ru_utime, ru_stime = (0, 0) - db_txn_count, db_txn_duration_sec = (0, 0) + usage = ContextResourceUsage() end_time = time.time() + # need to decode as it could be raw utf-8 bytes + # from a IDN servname in an auth header + authenticated_entity = self.authenticated_entity + if authenticated_entity is not None: + authenticated_entity = authenticated_entity.decode("utf-8", "replace") + + # ...or could be raw utf-8 bytes in the User-Agent header. + # N.B. if you don't do this, the logger explodes cryptically + # with maximum recursion trying to log errors about + # the charset problem. + # c.f. https://github.com/matrix-org/synapse/issues/3471 + user_agent = self.get_user_agent() + if user_agent is not None: + user_agent = user_agent.decode("utf-8", "replace") + self.site.access_logger.info( "%s - %s - {%s}" " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" - " %sB %s \"%s %s %s\" \"%s\"", + " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]", self.getClientIP(), self.site.site_tag, - self.authenticated_entity, + authenticated_entity, end_time - self.start_time, - ru_utime, - ru_stime, - db_sched_duration_sec, - db_txn_duration_sec, - int(db_txn_count), + usage.ru_utime, + usage.ru_stime, + usage.db_sched_duration_sec, + usage.db_txn_duration_sec, + int(usage.db_txn_count), self.sentLength, self.code, self.method, self.get_redacted_uri(), self.clientproto, - self.get_user_agent(), + user_agent, + usage.evt_db_fetch_count, ) try: |