diff options
Diffstat (limited to 'synapse/http')
-rw-r--r-- | synapse/http/__init__.py | 22 | ||||
-rw-r--r-- | synapse/http/additional_resource.py | 55 | ||||
-rw-r--r-- | synapse/http/client.py | 197 | ||||
-rw-r--r-- | synapse/http/endpoint.py | 50 | ||||
-rw-r--r-- | synapse/http/matrixfederationclient.py | 452 | ||||
-rw-r--r-- | synapse/http/server.py | 238 | ||||
-rw-r--r-- | synapse/http/servlet.py | 37 | ||||
-rw-r--r-- | synapse/http/site.py | 18 |
8 files changed, 759 insertions, 310 deletions
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py index bfebb0f644..054372e179 100644 --- a/synapse/http/__init__.py +++ b/synapse/http/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,3 +13,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from twisted.internet.defer import CancelledError +from twisted.python import failure + +from synapse.api.errors import SynapseError + + +class RequestTimedOutError(SynapseError): + """Exception representing timeout of an outbound request""" + def __init__(self): + super(RequestTimedOutError, self).__init__(504, "Timed out") + + +def cancelled_to_request_timed_out_error(value, timeout): + """Turns CancelledErrors into RequestTimedOutErrors. + + For use with async.add_timeout_to_deferred + """ + if isinstance(value, failure.Failure): + value.trap(CancelledError) + raise RequestTimedOutError() + return value diff --git a/synapse/http/additional_resource.py b/synapse/http/additional_resource.py new file mode 100644 index 0000000000..343e932cb1 --- /dev/null +++ b/synapse/http/additional_resource.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# Copyright 2017 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from synapse.http.server import wrap_request_handler +from twisted.web.resource import Resource +from twisted.web.server import NOT_DONE_YET + + +class AdditionalResource(Resource): + """Resource wrapper for additional_resources + + If the user has configured additional_resources, we need to wrap the + handler class with a Resource so that we can map it into the resource tree. + + This class is also where we wrap the request handler with logging, metrics, + and exception handling. + """ + def __init__(self, hs, handler): + """Initialise AdditionalResource + + The ``handler`` should return a deferred which completes when it has + done handling the request. It should write a response with + ``request.write()``, and call ``request.finish()``. + + Args: + hs (synapse.server.HomeServer): homeserver + handler ((twisted.web.server.Request) -> twisted.internet.defer.Deferred): + function to be called to handle the request. + """ + Resource.__init__(self) + self._handler = handler + + # these are required by the request_handler wrapper + self.version_string = hs.version_string + self.clock = hs.get_clock() + + def render(self, request): + self._async_render(request) + return NOT_DONE_YET + + @wrap_request_handler + def _async_render(self, request): + return self._handler(request) diff --git a/synapse/http/client.py b/synapse/http/client.py index ca2f770f5d..70a19d9b74 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +17,12 @@ from OpenSSL import SSL from OpenSSL.SSL import VERIFY_NONE from synapse.api.errors import ( - CodeMessageException, SynapseError, Codes, + CodeMessageException, MatrixCodeMessageException, SynapseError, Codes, ) -from synapse.util.logcontext import preserve_context_over_fn +from synapse.http import cancelled_to_request_timed_out_error +from synapse.util.async import add_timeout_to_deferred +from synapse.util.caches import CACHE_SIZE_FACTOR +from synapse.util.logcontext import make_deferred_yieldable import synapse.metrics from synapse.http.endpoint import SpiderEndpoint @@ -29,13 +33,14 @@ from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web.client import ( BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent, readBody, PartialDownloadError, + HTTPConnectionPool, ) from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer from twisted.web.http import PotentialDataLoss from twisted.web.http_headers import Headers from twisted.web._newclient import ResponseDone -from StringIO import StringIO +from six import StringIO import simplejson as json import logging @@ -63,92 +68,139 @@ class SimpleHttpClient(object): """ def __init__(self, hs): self.hs = hs + + pool = HTTPConnectionPool(reactor) + + # the pusher makes lots of concurrent SSL connections to sygnal, and + # tends to do so in batches, so we need to allow the pool to keep lots + # of idle connections around. + pool.maxPersistentPerHost = max((100 * CACHE_SIZE_FACTOR, 5)) + pool.cachedConnectionTimeout = 2 * 60 + # The default context factory in Twisted 14.0.0 (which we require) is # BrowserLikePolicyForHTTPS which will do regular cert validation # 'like a browser' self.agent = Agent( reactor, connectTimeout=15, - contextFactory=hs.get_http_client_context_factory() + contextFactory=hs.get_http_client_context_factory(), + pool=pool, ) self.user_agent = hs.version_string + self.clock = hs.get_clock() if hs.config.user_agent_suffix: self.user_agent = "%s %s" % (self.user_agent, hs.config.user_agent_suffix,) + @defer.inlineCallbacks def request(self, method, uri, *args, **kwargs): # A small wrapper around self.agent.request() so we can easily attach # counters to it outgoing_requests_counter.inc(method) - d = preserve_context_over_fn( - self.agent.request, - method, uri, *args, **kwargs - ) logger.info("Sending request %s %s", method, uri) - def _cb(response): + try: + request_deferred = self.agent.request( + method, uri, *args, **kwargs + ) + add_timeout_to_deferred( + request_deferred, + 60, cancelled_to_request_timed_out_error, + ) + response = yield make_deferred_yieldable(request_deferred) + incoming_responses_counter.inc(method, response.code) logger.info( "Received response to %s %s: %s", method, uri, response.code ) - return response - - def _eb(failure): + defer.returnValue(response) + except Exception as e: incoming_responses_counter.inc(method, "ERR") logger.info( "Error sending request to %s %s: %s %s", - method, uri, failure.type, failure.getErrorMessage() + method, uri, type(e).__name__, e.message ) - return failure + raise e - d.addCallbacks(_cb, _eb) + @defer.inlineCallbacks + def post_urlencoded_get_json(self, uri, args={}, headers=None): + """ + Args: + uri (str): + args (dict[str, str|List[str]]): query params + headers (dict[str, List[str]]|None): If not None, a map from + header name to a list of values for that header - return d + Returns: + Deferred[object]: parsed json + """ - @defer.inlineCallbacks - def post_urlencoded_get_json(self, uri, args={}): # TODO: Do we ever want to log message contents? logger.debug("post_urlencoded_get_json args: %s", args) query_bytes = urllib.urlencode(encode_urlencode_args(args), True) + actual_headers = { + b"Content-Type": [b"application/x-www-form-urlencoded"], + b"User-Agent": [self.user_agent], + } + if headers: + actual_headers.update(headers) + response = yield self.request( "POST", uri.encode("ascii"), - headers=Headers({ - b"Content-Type": [b"application/x-www-form-urlencoded"], - b"User-Agent": [self.user_agent], - }), + headers=Headers(actual_headers), bodyProducer=FileBodyProducer(StringIO(query_bytes)) ) - body = yield preserve_context_over_fn(readBody, response) + body = yield make_deferred_yieldable(readBody(response)) defer.returnValue(json.loads(body)) @defer.inlineCallbacks - def post_json_get_json(self, uri, post_json): + def post_json_get_json(self, uri, post_json, headers=None): + """ + + Args: + uri (str): + post_json (object): + headers (dict[str, List[str]]|None): If not None, a map from + header name to a list of values for that header + + Returns: + Deferred[object]: parsed json + """ json_str = encode_canonical_json(post_json) logger.debug("HTTP POST %s -> %s", json_str, uri) + actual_headers = { + b"Content-Type": [b"application/json"], + b"User-Agent": [self.user_agent], + } + if headers: + actual_headers.update(headers) + response = yield self.request( "POST", uri.encode("ascii"), - headers=Headers({ - b"Content-Type": [b"application/json"], - b"User-Agent": [self.user_agent], - }), + headers=Headers(actual_headers), bodyProducer=FileBodyProducer(StringIO(json_str)) ) - body = yield preserve_context_over_fn(readBody, response) + body = yield make_deferred_yieldable(readBody(response)) + + if 200 <= response.code < 300: + defer.returnValue(json.loads(body)) + else: + raise self._exceptionFromFailedRequest(response, body) defer.returnValue(json.loads(body)) @defer.inlineCallbacks - def get_json(self, uri, args={}): + def get_json(self, uri, args={}, headers=None): """ Gets some json from the given URI. Args: @@ -157,6 +209,8 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. + headers (dict[str, List[str]]|None): If not None, a map from + header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the HTTP body as JSON. @@ -164,11 +218,14 @@ class SimpleHttpClient(object): On a non-2xx HTTP response. The response body will be used as the error message. """ - body = yield self.get_raw(uri, args) - defer.returnValue(json.loads(body)) + try: + body = yield self.get_raw(uri, args, headers=headers) + defer.returnValue(json.loads(body)) + except CodeMessageException as e: + raise self._exceptionFromFailedRequest(e.code, e.msg) @defer.inlineCallbacks - def put_json(self, uri, json_body, args={}): + def put_json(self, uri, json_body, args={}, headers=None): """ Puts some json to the given URI. Args: @@ -178,6 +235,8 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. + headers (dict[str, List[str]]|None): If not None, a map from + header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the HTTP body as JSON. @@ -190,17 +249,21 @@ class SimpleHttpClient(object): json_str = encode_canonical_json(json_body) + actual_headers = { + b"Content-Type": [b"application/json"], + b"User-Agent": [self.user_agent], + } + if headers: + actual_headers.update(headers) + response = yield self.request( "PUT", uri.encode("ascii"), - headers=Headers({ - b"User-Agent": [self.user_agent], - "Content-Type": ["application/json"] - }), + headers=Headers(actual_headers), bodyProducer=FileBodyProducer(StringIO(json_str)) ) - body = yield preserve_context_over_fn(readBody, response) + body = yield make_deferred_yieldable(readBody(response)) if 200 <= response.code < 300: defer.returnValue(json.loads(body)) @@ -211,7 +274,7 @@ class SimpleHttpClient(object): raise CodeMessageException(response.code, body) @defer.inlineCallbacks - def get_raw(self, uri, args={}): + def get_raw(self, uri, args={}, headers=None): """ Gets raw text from the given URI. Args: @@ -220,6 +283,8 @@ class SimpleHttpClient(object): None. **Note**: The value of each key is assumed to be an iterable and *not* a string. + headers (dict[str, List[str]]|None): If not None, a map from + header name to a list of values for that header Returns: Deferred: Succeeds when we get *any* 2xx HTTP response, with the HTTP body at text. @@ -231,46 +296,65 @@ class SimpleHttpClient(object): query_bytes = urllib.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) + actual_headers = { + b"User-Agent": [self.user_agent], + } + if headers: + actual_headers.update(headers) + response = yield self.request( "GET", uri.encode("ascii"), - headers=Headers({ - b"User-Agent": [self.user_agent], - }) + headers=Headers(actual_headers), ) - body = yield preserve_context_over_fn(readBody, response) + body = yield make_deferred_yieldable(readBody(response)) if 200 <= response.code < 300: defer.returnValue(body) else: raise CodeMessageException(response.code, body) + def _exceptionFromFailedRequest(self, response, body): + try: + jsonBody = json.loads(body) + errcode = jsonBody['errcode'] + error = jsonBody['error'] + return MatrixCodeMessageException(response.code, error, errcode) + except (ValueError, KeyError): + return CodeMessageException(response.code, body) + # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient. # The two should be factored out. @defer.inlineCallbacks - def get_file(self, url, output_stream, max_size=None): + def get_file(self, url, output_stream, max_size=None, headers=None): """GETs a file from a given URL Args: url (str): The URL to GET output_stream (file): File to write the response body to. + headers (dict[str, List[str]]|None): If not None, a map from + header name to a list of values for that header Returns: A (int,dict,string,int) tuple of the file length, dict of the response headers, absolute URI of the response and HTTP response code. """ + actual_headers = { + b"User-Agent": [self.user_agent], + } + if headers: + actual_headers.update(headers) + response = yield self.request( "GET", url.encode("ascii"), - headers=Headers({ - b"User-Agent": [self.user_agent], - }) + headers=Headers(actual_headers), ) - headers = dict(response.headers.getAllRawHeaders()) + resp_headers = dict(response.headers.getAllRawHeaders()) - if 'Content-Length' in headers and headers['Content-Length'] > max_size: + if 'Content-Length' in resp_headers and resp_headers['Content-Length'] > max_size: logger.warn("Requested URL is too large > %r bytes" % (self.max_size,)) raise SynapseError( 502, @@ -291,10 +375,9 @@ class SimpleHttpClient(object): # straight back in again try: - length = yield preserve_context_over_fn( - _readBodyToFile, - response, output_stream, max_size - ) + length = yield make_deferred_yieldable(_readBodyToFile( + response, output_stream, max_size, + )) except Exception as e: logger.exception("Failed to download body") raise SynapseError( @@ -303,7 +386,9 @@ class SimpleHttpClient(object): Codes.UNKNOWN, ) - defer.returnValue((length, headers, response.request.absoluteURI, response.code)) + defer.returnValue( + (length, resp_headers, response.request.absoluteURI, response.code), + ) # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient. @@ -371,7 +456,7 @@ class CaptchaServerHttpClient(SimpleHttpClient): ) try: - body = yield preserve_context_over_fn(readBody, response) + body = yield make_deferred_yieldable(readBody(response)) defer.returnValue(body) except PartialDownloadError as e: # twisted dislikes google's response, no content length. @@ -422,7 +507,7 @@ class SpiderHttpClient(SimpleHttpClient): reactor, SpiderEndpointFactory(hs) ) - ), [('gzip', GzipDecoder)] + ), [(b'gzip', GzipDecoder)] ) # We could look like Chrome: # self.user_agent = ("Mozilla/5.0 (%s) (KHTML, like Gecko) diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py index 564ae4c10d..87a482650d 100644 --- a/synapse/http/endpoint.py +++ b/synapse/http/endpoint.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.internet import defer, reactor from twisted.internet.error import ConnectError @@ -30,7 +29,10 @@ logger = logging.getLogger(__name__) SERVER_CACHE = {} - +# our record of an individual server which can be tried to reach a destination. +# +# "host" is the hostname acquired from the SRV record. Except when there's +# no SRV record, in which case it is the original hostname. _Server = collections.namedtuple( "_Server", "priority weight host port expires" ) @@ -224,9 +226,10 @@ class SRVClientEndpoint(object): return self.default_server else: raise ConnectError( - "Not server available for %s" % self.service_name + "No server available for %s" % self.service_name ) + # look for all servers with the same priority min_priority = self.servers[0].priority weight_indexes = list( (index, server.weight + 1) @@ -236,11 +239,22 @@ class SRVClientEndpoint(object): total_weight = sum(weight for index, weight in weight_indexes) target_weight = random.randint(0, total_weight) - for index, weight in weight_indexes: target_weight -= weight if target_weight <= 0: server = self.servers[index] + # XXX: this looks totally dubious: + # + # (a) we never reuse a server until we have been through + # all of the servers at the same priority, so if the + # weights are A: 100, B:1, we always do ABABAB instead of + # AAAA...AAAB (approximately). + # + # (b) After using all the servers at the lowest priority, + # we move onto the next priority. We should only use the + # second priority if servers at the top priority are + # unreachable. + # del self.servers[index] self.used_servers.append(server) return server @@ -277,7 +291,7 @@ def resolve_service(service_name, dns_client=client, cache=SERVER_CACHE, clock=t if (len(answers) == 1 and answers[0].type == dns.SRV and answers[0].payload - and answers[0].payload.target == dns.Name('.')): + and answers[0].payload.target == dns.Name(b'.')): raise ConnectError("Service %s unavailable" % service_name) for answer in answers: @@ -285,26 +299,14 @@ def resolve_service(service_name, dns_client=client, cache=SERVER_CACHE, clock=t continue payload = answer.payload - host = str(payload.target) - srv_ttl = answer.ttl - - try: - answers, _, _ = yield dns_client.lookupAddress(host) - except DNSNameError: - continue - for answer in answers: - if answer.type == dns.A and answer.payload: - ip = answer.payload.dottedQuad() - host_ttl = min(srv_ttl, answer.ttl) - - servers.append(_Server( - host=ip, - port=int(payload.port), - priority=int(payload.priority), - weight=int(payload.weight), - expires=int(clock.time()) + host_ttl, - )) + servers.append(_Server( + host=str(payload.target), + port=int(payload.port), + priority=int(payload.priority), + weight=int(payload.weight), + expires=int(clock.time()) + answer.ttl, + )) servers.sort() cache[service_name] = list(servers) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 78b92cef36..4b2b85464d 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,23 +13,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - from twisted.internet import defer, reactor, protocol from twisted.internet.error import DNSLookupError from twisted.web.client import readBody, HTTPConnectionPool, Agent from twisted.web.http_headers import Headers from twisted.web._newclient import ResponseDone +from synapse.http import cancelled_to_request_timed_out_error from synapse.http.endpoint import matrix_federation_endpoint -from synapse.util.async import sleep -from synapse.util.logcontext import preserve_context_over_fn import synapse.metrics +from synapse.util.async import sleep, add_timeout_to_deferred +from synapse.util import logcontext +from synapse.util.logcontext import make_deferred_yieldable +import synapse.util.retryutils from canonicaljson import encode_canonical_json from synapse.api.errors import ( - SynapseError, Codes, HttpResponseException, + SynapseError, Codes, HttpResponseException, FederationDeniedError, ) from signedjson.sign import sign_json @@ -39,8 +41,7 @@ import logging import random import sys import urllib -import urlparse - +from six.moves.urllib import parse as urlparse logger = logging.getLogger(__name__) outbound_logger = logging.getLogger("synapse.http.outbound") @@ -94,6 +95,7 @@ class MatrixFederationHttpClient(object): reactor, MatrixFederationEndpointFactory(hs), pool=pool ) self.clock = hs.get_clock() + self._store = hs.get_datastore() self.version_string = hs.version_string self._next_id = 1 @@ -103,123 +105,161 @@ class MatrixFederationHttpClient(object): ) @defer.inlineCallbacks - def _create_request(self, destination, method, path_bytes, - body_callback, headers_dict={}, param_bytes=b"", - query_bytes=b"", retry_on_dns_fail=True, - timeout=None, long_retries=False): - """ Creates and sends a request to the given url - """ - headers_dict[b"User-Agent"] = [self.version_string] - headers_dict[b"Host"] = [destination] + def _request(self, destination, method, path, + body_callback, headers_dict={}, param_bytes=b"", + query_bytes=b"", retry_on_dns_fail=True, + timeout=None, long_retries=False, + ignore_backoff=False, + backoff_on_404=False): + """ Creates and sends a request to the given server + Args: + destination (str): The remote server to send the HTTP request to. + method (str): HTTP method + path (str): The HTTP path + ignore_backoff (bool): true to ignore the historical backoff data + and try the request anyway. + backoff_on_404 (bool): Back off if we get a 404 - url_bytes = self._create_url( - destination, path_bytes, param_bytes, query_bytes - ) + Returns: + Deferred: resolves with the http response object on success. - txn_id = "%s-O-%s" % (method, self._next_id) - self._next_id = (self._next_id + 1) % (sys.maxint - 1) + Fails with ``HTTPRequestException``: if we get an HTTP response + code >= 300. - outbound_logger.info( - "{%s} [%s] Sending request: %s %s", - txn_id, destination, method, url_bytes - ) + Fails with ``NotRetryingDestination`` if we are not yet ready + to retry this server. - # XXX: Would be much nicer to retry only at the transaction-layer - # (once we have reliable transactions in place) - if long_retries: - retries_left = MAX_LONG_RETRIES - else: - retries_left = MAX_SHORT_RETRIES + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist - http_url_bytes = urlparse.urlunparse( - ("", "", path_bytes, param_bytes, query_bytes, "") + (May also fail with plenty of other Exceptions for things like DNS + failures, connection failures, SSL failures.) + """ + if ( + self.hs.config.federation_domain_whitelist and + destination not in self.hs.config.federation_domain_whitelist + ): + raise FederationDeniedError(destination) + + limiter = yield synapse.util.retryutils.get_retry_limiter( + destination, + self.clock, + self._store, + backoff_on_404=backoff_on_404, + ignore_backoff=ignore_backoff, ) - log_result = None - try: - while True: - producer = None - if body_callback: - producer = body_callback(method, http_url_bytes, headers_dict) - - try: - def send_request(): - request_deferred = preserve_context_over_fn( - self.agent.request, + destination = destination.encode("ascii") + path_bytes = path.encode("ascii") + with limiter: + headers_dict[b"User-Agent"] = [self.version_string] + headers_dict[b"Host"] = [destination] + + url_bytes = self._create_url( + destination, path_bytes, param_bytes, query_bytes + ) + + txn_id = "%s-O-%s" % (method, self._next_id) + self._next_id = (self._next_id + 1) % (sys.maxint - 1) + + outbound_logger.info( + "{%s} [%s] Sending request: %s %s", + txn_id, destination, method, url_bytes + ) + + # XXX: Would be much nicer to retry only at the transaction-layer + # (once we have reliable transactions in place) + if long_retries: + retries_left = MAX_LONG_RETRIES + else: + retries_left = MAX_SHORT_RETRIES + + http_url_bytes = urlparse.urlunparse( + ("", "", path_bytes, param_bytes, query_bytes, "") + ) + + log_result = None + try: + while True: + producer = None + if body_callback: + producer = body_callback(method, http_url_bytes, headers_dict) + + try: + request_deferred = self.agent.request( method, url_bytes, Headers(headers_dict), producer ) - - return self.clock.time_bound_deferred( + add_timeout_to_deferred( + request_deferred, + timeout / 1000. if timeout else 60, + cancelled_to_request_timed_out_error, + ) + response = yield make_deferred_yieldable( request_deferred, - time_out=timeout / 1000. if timeout else 60, ) - response = yield preserve_context_over_fn(send_request) + log_result = "%d %s" % (response.code, response.phrase,) + break + except Exception as e: + if not retry_on_dns_fail and isinstance(e, DNSLookupError): + logger.warn( + "DNS Lookup failed to %s with %s", + destination, + e + ) + log_result = "DNS Lookup failed to %s with %s" % ( + destination, e + ) + raise - log_result = "%d %s" % (response.code, response.phrase,) - break - except Exception as e: - if not retry_on_dns_fail and isinstance(e, DNSLookupError): logger.warn( - "DNS Lookup failed to %s with %s", + "{%s} Sending request failed to %s: %s %s: %s", + txn_id, destination, - e - ) - log_result = "DNS Lookup failed to %s with %s" % ( - destination, e + method, + url_bytes, + _flatten_response_never_received(e), ) - raise - - logger.warn( - "{%s} Sending request failed to %s: %s %s: %s - %s", - txn_id, - destination, - method, - url_bytes, - type(e).__name__, - _flatten_response_never_received(e), - ) - - log_result = "%s - %s" % ( - type(e).__name__, _flatten_response_never_received(e), - ) - - if retries_left and not timeout: - if long_retries: - delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left) - delay = min(delay, 60) - delay *= random.uniform(0.8, 1.4) + + log_result = _flatten_response_never_received(e) + + if retries_left and not timeout: + if long_retries: + delay = 4 ** (MAX_LONG_RETRIES + 1 - retries_left) + delay = min(delay, 60) + delay *= random.uniform(0.8, 1.4) + else: + delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left) + delay = min(delay, 2) + delay *= random.uniform(0.8, 1.4) + + yield sleep(delay) + retries_left -= 1 else: - delay = 0.5 * 2 ** (MAX_SHORT_RETRIES - retries_left) - delay = min(delay, 2) - delay *= random.uniform(0.8, 1.4) - - yield sleep(delay) - retries_left -= 1 - else: - raise - finally: - outbound_logger.info( - "{%s} [%s] Result: %s", - txn_id, - destination, - log_result, - ) + raise + finally: + outbound_logger.info( + "{%s} [%s] Result: %s", + txn_id, + destination, + log_result, + ) - if 200 <= response.code < 300: - pass - else: - # :'( - # Update transactions table? - body = yield preserve_context_over_fn(readBody, response) - raise HttpResponseException( - response.code, response.phrase, body - ) + if 200 <= response.code < 300: + pass + else: + # :'( + # Update transactions table? + with logcontext.PreserveLoggingContext(): + body = yield readBody(response) + raise HttpResponseException( + response.code, response.phrase, body + ) - defer.returnValue(response) + defer.returnValue(response) def sign_request(self, destination, method, url_bytes, headers_dict, content=None): @@ -247,14 +287,18 @@ class MatrixFederationHttpClient(object): headers_dict[b"Authorization"] = auth_headers @defer.inlineCallbacks - def put_json(self, destination, path, data={}, json_data_callback=None, - long_retries=False, timeout=None): + def put_json(self, destination, path, args={}, data={}, + json_data_callback=None, + long_retries=False, timeout=None, + ignore_backoff=False, + backoff_on_404=False): """ Sends the specifed json data using PUT Args: destination (str): The remote server to send the HTTP request to. path (str): The HTTP path. + args (dict): query params data (dict): A dict containing the data that will be used as the request body. This will be encoded as JSON. json_data_callback (callable): A callable returning the dict to @@ -263,11 +307,24 @@ class MatrixFederationHttpClient(object): retry for a short or long time. timeout(int): How long to try (in ms) the destination for before giving up. None indicates no timeout. + ignore_backoff (bool): true to ignore the historical backoff data + and try the request anyway. + backoff_on_404 (bool): True if we should count a 404 response as + a failure of the server (and should therefore back off future + requests) Returns: Deferred: Succeeds when we get a 2xx HTTP response. The result - will be the decoded JSON body. On a 4xx or 5xx error response a - CodeMessageException is raised. + will be the decoded JSON body. + + Fails with ``HTTPRequestException`` if we get an HTTP response + code >= 300. + + Fails with ``NotRetryingDestination`` if we are not yet ready + to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ if not json_data_callback: @@ -282,26 +339,30 @@ class MatrixFederationHttpClient(object): producer = _JsonProducer(json_data) return producer - response = yield self._create_request( - destination.encode("ascii"), + response = yield self._request( + destination, "PUT", - path.encode("ascii"), + path, body_callback=body_callback, headers_dict={"Content-Type": ["application/json"]}, + query_bytes=encode_query_args(args), long_retries=long_retries, timeout=timeout, + ignore_backoff=ignore_backoff, + backoff_on_404=backoff_on_404, ) if 200 <= response.code < 300: # We need to update the transactions table to say it was sent? check_content_type_is_json(response.headers) - body = yield preserve_context_over_fn(readBody, response) + with logcontext.PreserveLoggingContext(): + body = yield readBody(response) defer.returnValue(json.loads(body)) @defer.inlineCallbacks def post_json(self, destination, path, data={}, long_retries=False, - timeout=None): + timeout=None, ignore_backoff=False, args={}): """ Sends the specifed json data using POST Args: @@ -314,11 +375,21 @@ class MatrixFederationHttpClient(object): retry for a short or long time. timeout(int): How long to try (in ms) the destination for before giving up. None indicates no timeout. - + ignore_backoff (bool): true to ignore the historical backoff data and + try the request anyway. + args (dict): query params Returns: Deferred: Succeeds when we get a 2xx HTTP response. The result - will be the decoded JSON body. On a 4xx or 5xx error response a - CodeMessageException is raised. + will be the decoded JSON body. + + Fails with ``HTTPRequestException`` if we get an HTTP response + code >= 300. + + Fails with ``NotRetryingDestination`` if we are not yet ready + to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ def body_callback(method, url_bytes, headers_dict): @@ -327,27 +398,30 @@ class MatrixFederationHttpClient(object): ) return _JsonProducer(data) - response = yield self._create_request( - destination.encode("ascii"), + response = yield self._request( + destination, "POST", - path.encode("ascii"), + path, + query_bytes=encode_query_args(args), body_callback=body_callback, headers_dict={"Content-Type": ["application/json"]}, long_retries=long_retries, timeout=timeout, + ignore_backoff=ignore_backoff, ) if 200 <= response.code < 300: # We need to update the transactions table to say it was sent? check_content_type_is_json(response.headers) - body = yield preserve_context_over_fn(readBody, response) + with logcontext.PreserveLoggingContext(): + body = yield readBody(response) defer.returnValue(json.loads(body)) @defer.inlineCallbacks def get_json(self, destination, path, args={}, retry_on_dns_fail=True, - timeout=None): + timeout=None, ignore_backoff=False): """ GETs some json from the given host homeserver and path Args: @@ -359,57 +433,122 @@ class MatrixFederationHttpClient(object): timeout (int): How long to try (in ms) the destination for before giving up. None indicates no timeout and that the request will be retried. + ignore_backoff (bool): true to ignore the historical backoff data + and try the request anyway. Returns: - Deferred: Succeeds when we get *any* HTTP response. + Deferred: Succeeds when we get a 2xx HTTP response. The result + will be the decoded JSON body. + + Fails with ``HTTPRequestException`` if we get an HTTP response + code >= 300. + + Fails with ``NotRetryingDestination`` if we are not yet ready + to retry this server. - The result of the deferred is a tuple of `(code, response)`, - where `response` is a dict representing the decoded JSON body. + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ logger.debug("get_json args: %s", args) - encoded_args = {} - for k, vs in args.items(): - if isinstance(vs, basestring): - vs = [vs] - encoded_args[k] = [v.encode("UTF-8") for v in vs] - - query_bytes = urllib.urlencode(encoded_args, True) logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) def body_callback(method, url_bytes, headers_dict): self.sign_request(destination, method, url_bytes, headers_dict) return None - response = yield self._create_request( - destination.encode("ascii"), + response = yield self._request( + destination, "GET", - path.encode("ascii"), - query_bytes=query_bytes, + path, + query_bytes=encode_query_args(args), body_callback=body_callback, retry_on_dns_fail=retry_on_dns_fail, timeout=timeout, + ignore_backoff=ignore_backoff, ) if 200 <= response.code < 300: # We need to update the transactions table to say it was sent? check_content_type_is_json(response.headers) - body = yield preserve_context_over_fn(readBody, response) + with logcontext.PreserveLoggingContext(): + body = yield readBody(response) + + defer.returnValue(json.loads(body)) + + @defer.inlineCallbacks + def delete_json(self, destination, path, long_retries=False, + timeout=None, ignore_backoff=False, args={}): + """Send a DELETE request to the remote expecting some json response + + Args: + destination (str): The remote server to send the HTTP request + to. + path (str): The HTTP path. + long_retries (bool): A boolean that indicates whether we should + retry for a short or long time. + timeout(int): How long to try (in ms) the destination for before + giving up. None indicates no timeout. + ignore_backoff (bool): true to ignore the historical backoff data and + try the request anyway. + Returns: + Deferred: Succeeds when we get a 2xx HTTP response. The result + will be the decoded JSON body. + + Fails with ``HTTPRequestException`` if we get an HTTP response + code >= 300. + + Fails with ``NotRetryingDestination`` if we are not yet ready + to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist + """ + + response = yield self._request( + destination, + "DELETE", + path, + query_bytes=encode_query_args(args), + headers_dict={"Content-Type": ["application/json"]}, + long_retries=long_retries, + timeout=timeout, + ignore_backoff=ignore_backoff, + ) + + if 200 <= response.code < 300: + # We need to update the transactions table to say it was sent? + check_content_type_is_json(response.headers) + + with logcontext.PreserveLoggingContext(): + body = yield readBody(response) defer.returnValue(json.loads(body)) @defer.inlineCallbacks def get_file(self, destination, path, output_stream, args={}, - retry_on_dns_fail=True, max_size=None): + retry_on_dns_fail=True, max_size=None, + ignore_backoff=False): """GETs a file from a given homeserver Args: destination (str): The remote server to send the HTTP request to. path (str): The HTTP path to GET. output_stream (file): File to write the response body to. args (dict): Optional dictionary used to create the query string. + ignore_backoff (bool): true to ignore the historical backoff data + and try the request anyway. Returns: - A (int,dict) tuple of the file length and a dict of the response - headers. + Deferred: resolves with an (int,dict) tuple of the file length and + a dict of the response headers. + + Fails with ``HTTPRequestException`` if we get an HTTP response code + >= 300 + + Fails with ``NotRetryingDestination`` if we are not yet ready + to retry this server. + + Fails with ``FederationDeniedError`` if this destination + is not on our federation whitelist """ encoded_args = {} @@ -419,29 +558,30 @@ class MatrixFederationHttpClient(object): encoded_args[k] = [v.encode("UTF-8") for v in vs] query_bytes = urllib.urlencode(encoded_args, True) - logger.debug("Query bytes: %s Retry DNS: %s", args, retry_on_dns_fail) + logger.debug("Query bytes: %s Retry DNS: %s", query_bytes, retry_on_dns_fail) def body_callback(method, url_bytes, headers_dict): self.sign_request(destination, method, url_bytes, headers_dict) return None - response = yield self._create_request( - destination.encode("ascii"), + response = yield self._request( + destination, "GET", - path.encode("ascii"), + path, query_bytes=query_bytes, body_callback=body_callback, - retry_on_dns_fail=retry_on_dns_fail + retry_on_dns_fail=retry_on_dns_fail, + ignore_backoff=ignore_backoff, ) headers = dict(response.headers.getAllRawHeaders()) try: - length = yield preserve_context_over_fn( - _readBodyToFile, - response, output_stream, max_size - ) - except: + with logcontext.PreserveLoggingContext(): + length = yield _readBodyToFile( + response, output_stream, max_size + ) + except Exception: logger.exception("Failed to download body") raise @@ -506,12 +646,14 @@ class _JsonProducer(object): def _flatten_response_never_received(e): if hasattr(e, "reasons"): - return ", ".join( + reasons = ", ".join( _flatten_response_never_received(f.value) for f in e.reasons ) + + return "%s:[%s]" % (type(e).__name__, reasons) else: - return "%s: %s" % (type(e).__name__, e.message,) + return repr(e) def check_content_type_is_json(headers): @@ -538,3 +680,15 @@ def check_content_type_is_json(headers): raise RuntimeError( "Content-Type not application/json: was '%s'" % c_type ) + + +def encode_query_args(args): + encoded_args = {} + for k, vs in args.items(): + if isinstance(vs, basestring): + vs = [vs] + encoded_args[k] = [v.encode("UTF-8") for v in vs] + + query_bytes = urllib.urlencode(encoded_args, True) + + return query_bytes diff --git a/synapse/http/server.py b/synapse/http/server.py index 14715878c5..55b9ad5251 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,6 +29,7 @@ from canonicaljson import ( ) from twisted.internet import defer +from twisted.python import failure from twisted.web import server, resource from twisted.web.server import NOT_DONE_YET from twisted.web.util import redirectTo @@ -35,42 +37,86 @@ from twisted.web.util import redirectTo import collections import logging import urllib -import ujson +import simplejson logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -incoming_requests_counter = metrics.register_counter( - "requests", +# total number of responses served, split by method/servlet/tag +response_count = metrics.register_counter( + "response_count", labels=["method", "servlet", "tag"], + alternative_names=( + # the following are all deprecated aliases for the same metric + metrics.name_prefix + x for x in ( + "_requests", + "_response_time:count", + "_response_ru_utime:count", + "_response_ru_stime:count", + "_response_db_txn_count:count", + "_response_db_txn_duration:count", + ) + ) +) + +requests_counter = metrics.register_counter( + "requests_received", + labels=["method", "servlet", ], ) + outgoing_responses_counter = metrics.register_counter( "responses", labels=["method", "code"], ) -response_timer = metrics.register_distribution( - "response_time", - labels=["method", "servlet", "tag"] +response_timer = metrics.register_counter( + "response_time_seconds", + labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_time:total", + ), ) -response_ru_utime = metrics.register_distribution( - "response_ru_utime", labels=["method", "servlet", "tag"] +response_ru_utime = metrics.register_counter( + "response_ru_utime_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_ru_utime:total", + ), ) -response_ru_stime = metrics.register_distribution( - "response_ru_stime", labels=["method", "servlet", "tag"] +response_ru_stime = metrics.register_counter( + "response_ru_stime_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_ru_stime:total", + ), ) -response_db_txn_count = metrics.register_distribution( - "response_db_txn_count", labels=["method", "servlet", "tag"] +response_db_txn_count = metrics.register_counter( + "response_db_txn_count", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_db_txn_count:total", + ), ) -response_db_txn_duration = metrics.register_distribution( - "response_db_txn_duration", labels=["method", "servlet", "tag"] +# seconds spent waiting for db txns, excluding scheduling time, when processing +# this request +response_db_txn_duration = metrics.register_counter( + "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"], + alternative_names=( + metrics.name_prefix + "_response_db_txn_duration:total", + ), ) +# seconds spent waiting for a db connection, when processing this request +response_db_sched_duration = metrics.register_counter( + "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"] +) + +# size in bytes of the response written +response_size = metrics.register_counter( + "response_size", labels=["method", "servlet", "tag"] +) _next_request_id = 0 @@ -106,7 +152,12 @@ def wrap_request_handler(request_handler, include_metrics=False): with LoggingContext(request_id) as request_context: with Measure(self.clock, "wrapped_request_handler"): request_metrics = RequestMetrics() - request_metrics.start(self.clock, name=self.__class__.__name__) + # we start the request metrics timer here with an initial stab + # at the servlet name. For most requests that name will be + # JsonResource (or a subclass), and JsonResource._async_render + # will update it once it picks a servlet. + servlet_name = self.__class__.__name__ + request_metrics.start(self.clock, name=servlet_name) request_context.request = request_id with request.processing(): @@ -115,6 +166,7 @@ def wrap_request_handler(request_handler, include_metrics=False): if include_metrics: yield request_handler(self, request, request_metrics) else: + requests_counter.inc(request.method, servlet_name) yield request_handler(self, request) except CodeMessageException as e: code = e.code @@ -130,13 +182,18 @@ def wrap_request_handler(request_handler, include_metrics=False): pretty_print=_request_user_agent_is_curl(request), version_string=self.version_string, ) - except: - logger.exception( - "Failed handle request %s.%s on %r: %r", + except Exception: + # failure.Failure() fishes the original Failure out + # of our stack, and thus gives us a sensible stack + # trace. + f = failure.Failure() + logger.error( + "Failed handle request %s.%s on %r: %r: %s", request_handler.__module__, request_handler.__name__, self, - request + request, + f.getTraceback().rstrip(), ) respond_with_json( request, @@ -145,7 +202,9 @@ def wrap_request_handler(request_handler, include_metrics=False): "error": "Internal server error", "errcode": Codes.UNKNOWN, }, - send_cors=True + send_cors=True, + pretty_print=_request_user_agent_is_curl(request), + version_string=self.version_string, ) finally: try: @@ -183,7 +242,7 @@ class JsonResource(HttpServer, resource.Resource): """ This implements the HttpServer interface and provides JSON support for Resources. - Register callbacks via register_path() + Register callbacks via register_paths() Callbacks can return a tuple of status code and a dict in which case the the dict will automatically be sent to the client as a JSON object. @@ -230,57 +289,62 @@ class JsonResource(HttpServer, resource.Resource): This checks if anyone has registered a callback for that method and path. """ - if request.method == "OPTIONS": - self._send_response(request, 200, {}) - return + callback, group_dict = self._get_handler_for_request(request) - # Loop through all the registered callbacks to check if the method - # and path regex match - for path_entry in self.path_regexs.get(request.method, []): - m = path_entry.pattern.match(request.path) - if not m: - continue + servlet_instance = getattr(callback, "__self__", None) + if servlet_instance is not None: + servlet_classname = servlet_instance.__class__.__name__ + else: + servlet_classname = "%r" % callback - # We found a match! Trigger callback and then return the - # returned response. We pass both the request and any - # matched groups from the regex to the callback. + request_metrics.name = servlet_classname + requests_counter.inc(request.method, servlet_classname) - callback = path_entry.callback + # Now trigger the callback. If it returns a response, we send it + # here. If it throws an exception, that is handled by the wrapper + # installed by @request_handler. - kwargs = intern_dict({ - name: urllib.unquote(value).decode("UTF-8") if value else value - for name, value in m.groupdict().items() - }) + kwargs = intern_dict({ + name: urllib.unquote(value).decode("UTF-8") if value else value + for name, value in group_dict.items() + }) - callback_return = yield callback(request, **kwargs) - if callback_return is not None: - code, response = callback_return - self._send_response(request, code, response) + callback_return = yield callback(request, **kwargs) + if callback_return is not None: + code, response = callback_return + self._send_response(request, code, response) - servlet_instance = getattr(callback, "__self__", None) - if servlet_instance is not None: - servlet_classname = servlet_instance.__class__.__name__ - else: - servlet_classname = "%r" % callback + def _get_handler_for_request(self, request): + """Finds a callback method to handle the given request - request_metrics.name = servlet_classname + Args: + request (twisted.web.http.Request): + + Returns: + Tuple[Callable, dict[str, str]]: callback method, and the dict + mapping keys to path components as specified in the handler's + path match regexp. - return + The callback will normally be a method registered via + register_paths, so will return (possibly via Deferred) either + None, or a tuple of (http code, response body). + """ + if request.method == b"OPTIONS": + return _options_handler, {} + + # Loop through all the registered callbacks to check if the method + # and path regex match + for path_entry in self.path_regexs.get(request.method, []): + m = path_entry.pattern.match(request.path) + if m: + # We found a match! + return path_entry.callback, m.groupdict() # Huh. No one wanted to handle that? Fiiiiiine. Send 400. - raise UnrecognizedRequestError() + return _unrecognised_request_handler, {} def _send_response(self, request, code, response_json_object, response_code_message=None): - # could alternatively use request.notifyFinish() and flip a flag when - # the Deferred fires, but since the flag is RIGHT THERE it seems like - # a waste. - if request._disconnected: - logger.warn( - "Not sending response to request %s, already disconnected.", - request) - return - outgoing_responses_counter.inc(request.method, str(code)) # TODO: Only enable CORS for the requests that need it. @@ -294,6 +358,34 @@ class JsonResource(HttpServer, resource.Resource): ) +def _options_handler(request): + """Request handler for OPTIONS requests + + This is a request handler suitable for return from + _get_handler_for_request. It returns a 200 and an empty body. + + Args: + request (twisted.web.http.Request): + + Returns: + Tuple[int, dict]: http code, response body. + """ + return 200, {} + + +def _unrecognised_request_handler(request): + """Request handler for unrecognised requests + + This is a request handler suitable for return from + _get_handler_for_request. It actually just raises an + UnrecognizedRequestError. + + Args: + request (twisted.web.http.Request): + """ + raise UnrecognizedRequestError() + + class RequestMetrics(object): def start(self, clock, name): self.start = clock.time_msec() @@ -314,7 +406,7 @@ class RequestMetrics(object): ) return - incoming_requests_counter.inc(request.method, self.name, tag) + response_count.inc(request.method, self.name, tag) response_timer.inc_by( clock.time_msec() - self.start, request.method, @@ -333,9 +425,14 @@ class RequestMetrics(object): context.db_txn_count, request.method, self.name, tag ) response_db_txn_duration.inc_by( - context.db_txn_duration, request.method, self.name, tag + context.db_txn_duration_ms / 1000., request.method, self.name, tag + ) + response_db_sched_duration.inc_by( + context.db_sched_duration_ms / 1000., request.method, self.name, tag ) + response_size.inc_by(request.sentLength, request.method, self.name, tag) + class RootRedirect(resource.Resource): """Redirects the root '/' path to another path.""" @@ -356,14 +453,22 @@ class RootRedirect(resource.Resource): def respond_with_json(request, code, json_object, send_cors=False, response_code_message=None, pretty_print=False, version_string="", canonical_json=True): + # could alternatively use request.notifyFinish() and flip a flag when + # the Deferred fires, but since the flag is RIGHT THERE it seems like + # a waste. + if request._disconnected: + logger.warn( + "Not sending response to request %s, already disconnected.", + request) + return + if pretty_print: json_bytes = encode_pretty_printed_json(json_object) + "\n" else: if canonical_json or synapse.events.USE_FROZEN_DICTS: json_bytes = encode_canonical_json(json_object) else: - # ujson doesn't like frozen_dicts. - json_bytes = ujson.dumps(json_object, ensure_ascii=False) + json_bytes = simplejson.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, @@ -390,6 +495,7 @@ def respond_with_json_bytes(request, code, json_bytes, send_cors=False, request.setHeader(b"Content-Type", b"application/json") request.setHeader(b"Server", version_string) request.setHeader(b"Content-Length", b"%d" % (len(json_bytes),)) + request.setHeader(b"Cache-Control", b"no-cache, no-store, must-revalidate") if send_cors: set_cors_headers(request) @@ -412,7 +518,7 @@ def set_cors_headers(request): ) request.setHeader( "Access-Control-Allow-Headers", - "Origin, X-Requested-With, Content-Type, Accept" + "Origin, X-Requested-With, Content-Type, Accept, Authorization" ) @@ -437,9 +543,9 @@ def finish_request(request): def _request_user_agent_is_curl(request): user_agents = request.requestHeaders.getRawHeaders( - "User-Agent", default=[] + b"User-Agent", default=[] ) for user_agent in user_agents: - if "curl" in user_agent: + if b"curl" in user_agent: return True return False diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 8c22d6f00f..ef8e62901b 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -48,7 +48,7 @@ def parse_integer_from_args(args, name, default=None, required=False): if name in args: try: return int(args[name][0]) - except: + except Exception: message = "Query parameter %r must be an integer" % (name,) raise SynapseError(400, message) else: @@ -88,7 +88,7 @@ def parse_boolean_from_args(args, name, default=None, required=False): "true": True, "false": False, }[args[name][0]] - except: + except Exception: message = ( "Boolean query parameter %r must be one of" " ['true', 'false']" @@ -148,11 +148,13 @@ def parse_string_from_args(args, name, default=None, required=False, return default -def parse_json_value_from_request(request): +def parse_json_value_from_request(request, allow_empty_body=False): """Parse a JSON value from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into None Returns: The JSON value. @@ -162,28 +164,39 @@ def parse_json_value_from_request(request): """ try: content_bytes = request.content.read() - except: + except Exception: raise SynapseError(400, "Error reading JSON content.") + if not content_bytes and allow_empty_body: + return None + try: content = simplejson.loads(content_bytes) - except simplejson.JSONDecodeError: + except Exception as e: + logger.warn("Unable to parse JSON: %s", e) raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON) return content -def parse_json_object_from_request(request): +def parse_json_object_from_request(request, allow_empty_body=False): """Parse a JSON object from the body of a twisted HTTP request. Args: request: the twisted HTTP request. + allow_empty_body (bool): if True, an empty body will be accepted and + turned into an empty dict. Raises: SynapseError if the request body couldn't be decoded as JSON or if it wasn't a JSON object. """ - content = parse_json_value_from_request(request) + content = parse_json_value_from_request( + request, allow_empty_body=allow_empty_body, + ) + + if allow_empty_body and content is None: + return {} if type(content) != dict: message = "Content must be a JSON object." @@ -192,6 +205,16 @@ def parse_json_object_from_request(request): return content +def assert_params_in_request(body, required): + absent = [] + for k in required: + if k not in body: + absent.append(k) + + if len(absent) > 0: + raise SynapseError(400, "Missing params: %r" % absent, Codes.MISSING_PARAM) + + class RestServlet(object): """ A Synapse REST Servlet. diff --git a/synapse/http/site.py b/synapse/http/site.py index 4b09d7ee66..c8b46e1af2 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -20,7 +20,7 @@ import logging import re import time -ACCESS_TOKEN_RE = re.compile(r'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') +ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$') class SynapseRequest(Request): @@ -43,12 +43,12 @@ class SynapseRequest(Request): def get_redacted_uri(self): return ACCESS_TOKEN_RE.sub( - r'\1<redacted>\3', + br'\1<redacted>\3', self.uri ) def get_user_agent(self): - return self.requestHeaders.getRawHeaders("User-Agent", [None])[-1] + return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1] def started_processing(self): self.site.access_logger.info( @@ -66,14 +66,15 @@ class SynapseRequest(Request): context = LoggingContext.current_context() ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count - db_txn_duration = context.db_txn_duration - except: + db_txn_duration_ms = context.db_txn_duration_ms + db_sched_duration_ms = context.db_sched_duration_ms + except Exception: ru_utime, ru_stime = (0, 0) - db_txn_count, db_txn_duration = (0, 0) + db_txn_count, db_txn_duration_ms = (0, 0) self.site.access_logger.info( "%s - %s - {%s}" - " Processed request: %dms (%dms, %dms) (%dms/%d)" + " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, @@ -81,7 +82,8 @@ class SynapseRequest(Request): int(time.time() * 1000) - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), - int(db_txn_duration * 1000), + db_sched_duration_ms, + db_txn_duration_ms, int(db_txn_count), self.sentLength, self.code, |