Merge branch 'develop' into develop

author: Amber Brown <hawkowl@atleastfornow.net> 2018-09-03 21:08:35 +1000
committer: GitHub <noreply@github.com> 2018-09-03 21:08:35 +1000
commit: 4fc4b881c58fd638db5f4dac0863721111b67af0 (patch)
tree: cc1604f5e3b4e0a263e0e11a55b62ef4006a64a1 /synapse/http
parent: The project `matrix-synapse-auto-deploy` does not seem to be maintained anymore. (diff)
parent: Merge pull request #3777 from matrix-org/neilj/fix_register_user_registration (diff)
download: synapse-4fc4b881c58fd638db5f4dac0863721111b67af0.tar.xz
8 files changed, 559 insertions, 345 deletions
diff --git a/synapse/http/additional_resource.py b/synapse/http/additional_resource.py
index a797396ade..0e10e3f8f7 100644
--- a/synapse/http/additional_resource.py
+++ b/synapse/http/additional_resource.py
@@ -13,10 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.http.server import wrap_json_request_handler
 from twisted.web.resource import Resource
 from twisted.web.server import NOT_DONE_YET
 
+from synapse.http.server import wrap_json_request_handler
+
 
 class AdditionalResource(Resource):
     """Resource wrapper for additional_resources
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 8064a84c5c..ab4fbf59b2 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -13,39 +13,38 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from OpenSSL import SSL
-from OpenSSL.SSL import VERIFY_NONE
+import logging
+import urllib
 
-from synapse.api.errors import (
-    CodeMessageException, MatrixCodeMessageException, SynapseError, Codes,
-)
-from synapse.http import cancelled_to_request_timed_out_error, redact_uri
-from synapse.util.async import add_timeout_to_deferred
-from synapse.util.caches import CACHE_SIZE_FACTOR
-from synapse.util.logcontext import make_deferred_yieldable
-from synapse.http.endpoint import SpiderEndpoint
+from six import StringIO
 
-from canonicaljson import encode_canonical_json
+from canonicaljson import encode_canonical_json, json
+from prometheus_client import Counter
 
-from twisted.internet import defer, reactor, ssl, protocol, task
+from OpenSSL import SSL
+from OpenSSL.SSL import VERIFY_NONE
+from twisted.internet import defer, protocol, reactor, ssl, task
 from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
+from twisted.web._newclient import ResponseDone
 from twisted.web.client import (
-    BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent,
-    readBody, PartialDownloadError,
+    Agent,
+    BrowserLikeRedirectAgent,
+    ContentDecoderAgent,
+    FileBodyProducer as TwistedFileBodyProducer,
+    GzipDecoder,
     HTTPConnectionPool,
+    PartialDownloadError,
+    readBody,
 )
-from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer
 from twisted.web.http import PotentialDataLoss
 from twisted.web.http_headers import Headers
-from twisted.web._newclient import ResponseDone
-
-from six import StringIO
-
-from prometheus_client import Counter
-import simplejson as json
-import logging
-import urllib
 
+from synapse.api.errors import Codes, HttpResponseException, SynapseError
+from synapse.http import cancelled_to_request_timed_out_error, redact_uri
+from synapse.http.endpoint import SpiderEndpoint
+from synapse.util.async_helpers import add_timeout_to_deferred
+from synapse.util.caches import CACHE_SIZE_FACTOR
+from synapse.util.logcontext import make_deferred_yieldable
 
 logger = logging.getLogger(__name__)
 
@@ -98,8 +97,8 @@ class SimpleHttpClient(object):
                 method, uri, *args, **kwargs
             )
             add_timeout_to_deferred(
-                request_deferred,
-                60, cancelled_to_request_timed_out_error,
+                request_deferred, 60, self.hs.get_reactor(),
+                cancelled_to_request_timed_out_error,
             )
             response = yield make_deferred_yieldable(request_deferred)
 
@@ -115,7 +114,7 @@ class SimpleHttpClient(object):
                 "Error sending request to  %s %s: %s %s",
                 method, redact_uri(uri), type(e).__name__, e.message
             )
-            raise e
+            raise
 
     @defer.inlineCallbacks
     def post_urlencoded_get_json(self, uri, args={}, headers=None):
@@ -128,6 +127,11 @@ class SimpleHttpClient(object):
 
         Returns:
             Deferred[object]: parsed json
+
+        Raises:
+            HttpResponseException: On a non-2xx HTTP response.
+
+            ValueError: if the response was not JSON
         """
 
         # TODO: Do we ever want to log message contents?
@@ -151,7 +155,10 @@ class SimpleHttpClient(object):
 
         body = yield make_deferred_yieldable(readBody(response))
 
-        defer.returnValue(json.loads(body))
+        if 200 <= response.code < 300:
+            defer.returnValue(json.loads(body))
+        else:
+            raise HttpResponseException(response.code, response.phrase, body)
 
     @defer.inlineCallbacks
     def post_json_get_json(self, uri, post_json, headers=None):
@@ -165,6 +172,11 @@ class SimpleHttpClient(object):
 
         Returns:
             Deferred[object]: parsed json
+
+        Raises:
+            HttpResponseException: On a non-2xx HTTP response.
+
+            ValueError: if the response was not JSON
         """
         json_str = encode_canonical_json(post_json)
 
@@ -189,9 +201,7 @@ class SimpleHttpClient(object):
         if 200 <= response.code < 300:
             defer.returnValue(json.loads(body))
         else:
-            raise self._exceptionFromFailedRequest(response, body)
-
-        defer.returnValue(json.loads(body))
+            raise HttpResponseException(response.code, response.phrase, body)
 
     @defer.inlineCallbacks
     def get_json(self, uri, args={}, headers=None):
@@ -209,14 +219,12 @@ class SimpleHttpClient(object):
             Deferred: Succeeds when we get *any* 2xx HTTP response, with the
             HTTP body as JSON.
         Raises:
-            On a non-2xx HTTP response. The response body will be used as the
-            error message.
+            HttpResponseException On a non-2xx HTTP response.
+
+            ValueError: if the response was not JSON
         """
-        try:
-            body = yield self.get_raw(uri, args, headers=headers)
-            defer.returnValue(json.loads(body))
-        except CodeMessageException as e:
-            raise self._exceptionFromFailedRequest(e.code, e.msg)
+        body = yield self.get_raw(uri, args, headers=headers)
+        defer.returnValue(json.loads(body))
 
     @defer.inlineCallbacks
     def put_json(self, uri, json_body, args={}, headers=None):
@@ -235,7 +243,9 @@ class SimpleHttpClient(object):
             Deferred: Succeeds when we get *any* 2xx HTTP response, with the
             HTTP body as JSON.
         Raises:
-            On a non-2xx HTTP response.
+            HttpResponseException On a non-2xx HTTP response.
+
+            ValueError: if the response was not JSON
         """
         if len(args):
             query_bytes = urllib.urlencode(args, True)
@@ -262,10 +272,7 @@ class SimpleHttpClient(object):
         if 200 <= response.code < 300:
             defer.returnValue(json.loads(body))
         else:
-            # NB: This is explicitly not json.loads(body)'d because the contract
-            # of CodeMessageException is a *string* message. Callers can always
-            # load it into JSON if they want.
-            raise CodeMessageException(response.code, body)
+            raise HttpResponseException(response.code, response.phrase, body)
 
     @defer.inlineCallbacks
     def get_raw(self, uri, args={}, headers=None):
@@ -283,8 +290,7 @@ class SimpleHttpClient(object):
             Deferred: Succeeds when we get *any* 2xx HTTP response, with the
             HTTP body at text.
         Raises:
-            On a non-2xx HTTP response. The response body will be used as the
-            error message.
+            HttpResponseException on a non-2xx HTTP response.
         """
         if len(args):
             query_bytes = urllib.urlencode(args, True)
@@ -307,16 +313,7 @@ class SimpleHttpClient(object):
         if 200 <= response.code < 300:
             defer.returnValue(body)
         else:
-            raise CodeMessageException(response.code, body)
-
-    def _exceptionFromFailedRequest(self, response, body):
-        try:
-            jsonBody = json.loads(body)
-            errcode = jsonBody['errcode']
-            error = jsonBody['error']
-            return MatrixCodeMessageException(response.code, error, errcode)
-        except (ValueError, KeyError):
-            return CodeMessageException(response.code, body)
+            raise HttpResponseException(response.code, response.phrase, body)
 
     # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient.
     # The two should be factored out.
diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py
index 87a482650d..b0c9369519 100644
--- a/synapse/http/endpoint.py
+++ b/synapse/http/endpoint.py
@@ -12,21 +12,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
-from twisted.internet import defer, reactor
-from twisted.internet.error import ConnectError
-from twisted.names import client, dns
-from twisted.names.error import DNSNameError, DomainError
-
 import collections
 import logging
 import random
+import re
 import time
 
+from twisted.internet import defer
+from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
+from twisted.internet.error import ConnectError
+from twisted.names import client, dns
+from twisted.names.error import DNSNameError, DomainError
 
 logger = logging.getLogger(__name__)
 
-
 SERVER_CACHE = {}
 
 # our record of an individual server which can be tried to reach a destination.
@@ -38,34 +37,98 @@ _Server = collections.namedtuple(
 )
 
 
-def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None,
+def parse_server_name(server_name):
+    """Split a server name into host/port parts.
+
+    Args:
+        server_name (str): server name to parse
+
+    Returns:
+        Tuple[str, int|None]: host/port parts.
+
+    Raises:
+        ValueError if the server name could not be parsed.
+    """
+    try:
+        if server_name[-1] == ']':
+            # ipv6 literal, hopefully
+            return server_name, None
+
+        domain_port = server_name.rsplit(":", 1)
+        domain = domain_port[0]
+        port = int(domain_port[1]) if domain_port[1:] else None
+        return domain, port
+    except Exception:
+        raise ValueError("Invalid server name '%s'" % server_name)
+
+
+VALID_HOST_REGEX = re.compile(
+    "\\A[0-9a-zA-Z.-]+\\Z",
+)
+
+
+def parse_and_validate_server_name(server_name):
+    """Split a server name into host/port parts and do some basic validation.
+
+    Args:
+        server_name (str): server name to parse
+
+    Returns:
+        Tuple[str, int|None]: host/port parts.
+
+    Raises:
+        ValueError if the server name could not be parsed.
+    """
+    host, port = parse_server_name(server_name)
+
+    # these tests don't need to be bulletproof as we'll find out soon enough
+    # if somebody is giving us invalid data. What we *do* need is to be sure
+    # that nobody is sneaking IP literals in that look like hostnames, etc.
+
+    # look for ipv6 literals
+    if host[0] == '[':
+        if host[-1] != ']':
+            raise ValueError("Mismatched [...] in server name '%s'" % (
+                server_name,
+            ))
+        return host, port
+
+    # otherwise it should only be alphanumerics.
+    if not VALID_HOST_REGEX.match(host):
+        raise ValueError("Server name '%s' contains invalid characters" % (
+            server_name,
+        ))
+
+    return host, port
+
+
+def matrix_federation_endpoint(reactor, destination, tls_client_options_factory=None,
                                timeout=None):
     """Construct an endpoint for the given matrix destination.
 
     Args:
         reactor: Twisted reactor.
         destination (bytes): The name of the server to connect to.
-        ssl_context_factory (twisted.internet.ssl.ContextFactory): Factory
-            which generates SSL contexts to use for TLS.
+        tls_client_options_factory
+            (synapse.crypto.context_factory.ClientTLSOptionsFactory):
+            Factory which generates TLS options for client connections.
         timeout (int): connection timeout in seconds
     """
 
-    domain_port = destination.split(":")
-    domain = domain_port[0]
-    port = int(domain_port[1]) if domain_port[1:] else None
+    domain, port = parse_server_name(destination)
 
     endpoint_kw_args = {}
 
     if timeout is not None:
         endpoint_kw_args.update(timeout=timeout)
 
-    if ssl_context_factory is None:
+    if tls_client_options_factory is None:
         transport_endpoint = HostnameEndpoint
         default_port = 8008
     else:
         def transport_endpoint(reactor, host, port, timeout):
             return wrapClientTLS(
-                ssl_context_factory,
+                tls_client_options_factory.get_options(host),
                 HostnameEndpoint(reactor, host, port, timeout=timeout))
         default_port = 8448
 
@@ -74,21 +137,22 @@ def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None,
             reactor, "matrix", domain, protocol="tcp",
             default_port=default_port, endpoint=transport_endpoint,
             endpoint_kw_args=endpoint_kw_args
-        ))
+        ), reactor)
     else:
         return _WrappingEndpointFac(transport_endpoint(
             reactor, domain, port, **endpoint_kw_args
-        ))
+        ), reactor)
 
 
 class _WrappingEndpointFac(object):
-    def __init__(self, endpoint_fac):
+    def __init__(self, endpoint_fac, reactor):
         self.endpoint_fac = endpoint_fac
+        self.reactor = reactor
 
     @defer.inlineCallbacks
     def connect(self, protocolFactory):
         conn = yield self.endpoint_fac.connect(protocolFactory)
-        conn = _WrappedConnection(conn)
+        conn = _WrappedConnection(conn, self.reactor)
         defer.returnValue(conn)
 
 
@@ -98,9 +162,10 @@ class _WrappedConnection(object):
     """
     __slots__ = ["conn", "last_request"]
 
-    def __init__(self, conn):
+    def __init__(self, conn, reactor):
         object.__setattr__(self, "conn", conn)
         object.__setattr__(self, "last_request", time.time())
+        self._reactor = reactor
 
     def __getattr__(self, name):
         return getattr(self.conn, name)
@@ -131,14 +196,14 @@ class _WrappedConnection(object):
         # Time this connection out if we haven't send a request in the last
         # N minutes
         # TODO: Cancel the previous callLater?
-        reactor.callLater(3 * 60, self._time_things_out_maybe)
+        self._reactor.callLater(3 * 60, self._time_things_out_maybe)
 
         d = self.conn.request(request)
 
         def update_request_time(res):
             self.last_request = time.time()
             # TODO: Cancel the previous callLater?
-            reactor.callLater(3 * 60, self._time_things_out_maybe)
+            self._reactor.callLater(3 * 60, self._time_things_out_maybe)
             return res
 
         d.addCallback(update_request_time)
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 821aed362b..b34bb8e31a 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -13,39 +13,38 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from twisted.internet import defer, reactor, protocol
-from twisted.internet.error import DNSLookupError
-from twisted.web.client import readBody, HTTPConnectionPool, Agent
-from twisted.web.http_headers import Headers
-from twisted.web._newclient import ResponseDone
-
-from synapse.http import cancelled_to_request_timed_out_error
-from synapse.http.endpoint import matrix_federation_endpoint
-import synapse.metrics
-from synapse.util.async import sleep, add_timeout_to_deferred
-from synapse.util import logcontext
-from synapse.util.logcontext import make_deferred_yieldable
-import synapse.util.retryutils
-
-from canonicaljson import encode_canonical_json
-
-from synapse.api.errors import (
-    SynapseError, Codes, HttpResponseException, FederationDeniedError,
-)
-
-from signedjson.sign import sign_json
-
 import cgi
-import simplejson as json
 import logging
 import random
 import sys
 import urllib
-from six.moves.urllib import parse as urlparse
-from six import string_types
 
+from six import string_types
+from six.moves.urllib import parse as urlparse
 
+from canonicaljson import encode_canonical_json, json
 from prometheus_client import Counter
+from signedjson.sign import sign_json
+
+from twisted.internet import defer, protocol, reactor
+from twisted.internet.error import DNSLookupError
+from twisted.web._newclient import ResponseDone
+from twisted.web.client import Agent, HTTPConnectionPool, readBody
+from twisted.web.http_headers import Headers
+
+import synapse.metrics
+import synapse.util.retryutils
+from synapse.api.errors import (
+    Codes,
+    FederationDeniedError,
+    HttpResponseException,
+    SynapseError,
+)
+from synapse.http import cancelled_to_request_timed_out_error
+from synapse.http.endpoint import matrix_federation_endpoint
+from synapse.util import logcontext
+from synapse.util.async_helpers import add_timeout_to_deferred
+from synapse.util.logcontext import make_deferred_yieldable
 
 logger = logging.getLogger(__name__)
 outbound_logger = logging.getLogger("synapse.http.outbound")
@@ -62,14 +61,14 @@ MAX_SHORT_RETRIES = 3
 
 class MatrixFederationEndpointFactory(object):
     def __init__(self, hs):
-        self.tls_server_context_factory = hs.tls_server_context_factory
+        self.tls_client_options_factory = hs.tls_client_options_factory
 
     def endpointForURI(self, uri):
         destination = uri.netloc
 
         return matrix_federation_endpoint(
             reactor, destination, timeout=10,
-            ssl_context_factory=self.tls_server_context_factory
+            tls_client_options_factory=self.tls_client_options_factory
         )
 
 
@@ -134,7 +133,7 @@ class MatrixFederationHttpClient(object):
                 failures, connection failures, SSL failures.)
         """
         if (
-            self.hs.config.federation_domain_whitelist and
+            self.hs.config.federation_domain_whitelist is not None and
             destination not in self.hs.config.federation_domain_whitelist
         ):
             raise FederationDeniedError(destination)
@@ -193,6 +192,7 @@ class MatrixFederationHttpClient(object):
                         add_timeout_to_deferred(
                             request_deferred,
                             timeout / 1000. if timeout else 60,
+                            self.hs.get_reactor(),
                             cancelled_to_request_timed_out_error,
                         )
                         response = yield make_deferred_yieldable(
@@ -234,7 +234,7 @@ class MatrixFederationHttpClient(object):
                                 delay = min(delay, 2)
                                 delay *= random.uniform(0.8, 1.4)
 
-                            yield sleep(delay)
+                            yield self.clock.sleep(delay)
                             retries_left -= 1
                         else:
                             raise
@@ -260,14 +260,35 @@ class MatrixFederationHttpClient(object):
             defer.returnValue(response)
 
     def sign_request(self, destination, method, url_bytes, headers_dict,
-                     content=None):
+                     content=None, destination_is=None):
+        """
+        Signs a request by adding an Authorization header to headers_dict
+        Args:
+            destination (bytes|None): The desination home server of the request.
+                May be None if the destination is an identity server, in which case
+                destination_is must be non-None.
+            method (bytes): The HTTP method of the request
+            url_bytes (bytes): The URI path of the request
+            headers_dict (dict): Dictionary of request headers to append to
+            content (bytes): The body of the request
+            destination_is (bytes): As 'destination', but if the destination is an
+                identity server
+
+        Returns:
+            None
+        """
         request = {
             "method": method,
             "uri": url_bytes,
             "origin": self.server_name,
-            "destination": destination,
         }
 
+        if destination is not None:
+            request["destination"] = destination
+
+        if destination_is is not None:
+            request["destination_is"] = destination_is
+
         if content is not None:
             request["content"] = content
 
@@ -418,7 +439,7 @@ class MatrixFederationHttpClient(object):
         defer.returnValue(json.loads(body))
 
     @defer.inlineCallbacks
-    def get_json(self, destination, path, args={}, retry_on_dns_fail=True,
+    def get_json(self, destination, path, args=None, retry_on_dns_fail=True,
                  timeout=None, ignore_backoff=False):
         """ GETs some json from the given host homeserver and path
 
@@ -426,7 +447,7 @@ class MatrixFederationHttpClient(object):
             destination (str): The remote server to send the HTTP request
                 to.
             path (str): The HTTP path.
-            args (dict): A dictionary used to create query strings, defaults to
+            args (dict|None): A dictionary used to create query strings, defaults to
                 None.
             timeout (int): How long to try (in ms) the destination for before
                 giving up. None indicates no timeout and that the request will
@@ -681,6 +702,9 @@ def check_content_type_is_json(headers):
 
 
 def encode_query_args(args):
+    if args is None:
+        return b""
+
     encoded_args = {}
     for k, vs in args.items():
         if isinstance(vs, string_types):
diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py
index dc06f6c443..72c2654678 100644
--- a/synapse/http/request_metrics.py
+++ b/synapse/http/request_metrics.py
@@ -15,10 +15,11 @@
 # limitations under the License.
 
 import logging
+import threading
 
 from prometheus_client.core import Counter, Histogram
-from synapse.metrics import LaterGauge
 
+from synapse.metrics import LaterGauge
 from synapse.util.logcontext import LoggingContext
 
 logger = logging.getLogger(__name__)
@@ -38,7 +39,8 @@ outgoing_responses_counter = Counter(
 )
 
 response_timer = Histogram(
-    "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"]
+    "synapse_http_server_response_time_seconds", "sec",
+    ["method", "servlet", "tag", "code"],
 )
 
 response_ru_utime = Counter(
@@ -110,6 +112,9 @@ in_flight_requests_db_sched_duration = Counter(
 # The set of all in flight requests, set[RequestMetrics]
 _in_flight_requests = set()
 
+# Protects the _in_flight_requests set from concurrent accesss
+_in_flight_requests_lock = threading.Lock()
+
 
 def _get_in_flight_counts():
     """Returns a count of all in flight requests by (method, server_name)
@@ -117,13 +122,18 @@ def _get_in_flight_counts():
     Returns:
         dict[tuple[str, str], int]
     """
-    for rm in _in_flight_requests:
+    # Cast to a list to prevent it changing while the Prometheus
+    # thread is collecting metrics
+    with _in_flight_requests_lock:
+        reqs = list(_in_flight_requests)
+
+    for rm in reqs:
         rm.update_metrics()
 
     # Map from (method, name) -> int, the number of in flight requests of that
     # type
     counts = {}
-    for rm in _in_flight_requests:
+    for rm in reqs:
         key = (rm.method, rm.name,)
         counts[key] = counts.get(key, 0) + 1
 
@@ -131,7 +141,7 @@ def _get_in_flight_counts():
 
 
 LaterGauge(
-    "synapse_http_request_metrics_in_flight_requests_count",
+    "synapse_http_server_in_flight_requests_count",
     "",
     ["method", "servlet"],
     _get_in_flight_counts,
@@ -145,12 +155,16 @@ class RequestMetrics(object):
         self.name = name
         self.method = method
 
-        self._request_stats = _RequestStats.from_context(self.start_context)
+        # _request_stats records resource usage that we have already added
+        # to the "in flight" metrics.
+        self._request_stats = self.start_context.get_resource_usage()
 
-        _in_flight_requests.add(self)
+        with _in_flight_requests_lock:
+            _in_flight_requests.add(self)
 
     def stop(self, time_sec, request):
-        _in_flight_requests.discard(self)
+        with _in_flight_requests_lock:
+            _in_flight_requests.discard(self)
 
         context = LoggingContext.current_context()
 
@@ -165,26 +179,32 @@ class RequestMetrics(object):
                 )
                 return
 
-        outgoing_responses_counter.labels(request.method, str(request.code)).inc()
+        response_code = str(request.code)
+
+        outgoing_responses_counter.labels(request.method, response_code).inc()
 
         response_count.labels(request.method, self.name, tag).inc()
 
-        response_timer.labels(request.method, self.name, tag).observe(
+        response_timer.labels(request.method, self.name, tag, response_code).observe(
             time_sec - self.start
         )
 
-        ru_utime, ru_stime = context.get_resource_usage()
+        resource_usage = context.get_resource_usage()
 
-        response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime)
-        response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime)
+        response_ru_utime.labels(request.method, self.name, tag).inc(
+            resource_usage.ru_utime,
+        )
+        response_ru_stime.labels(request.method, self.name, tag).inc(
+            resource_usage.ru_stime,
+        )
         response_db_txn_count.labels(request.method, self.name, tag).inc(
-            context.db_txn_count
+            resource_usage.db_txn_count
         )
         response_db_txn_duration.labels(request.method, self.name, tag).inc(
-            context.db_txn_duration_sec
+            resource_usage.db_txn_duration_sec
         )
         response_db_sched_duration.labels(request.method, self.name, tag).inc(
-            context.db_sched_duration_sec
+            resource_usage.db_sched_duration_sec
         )
 
         response_size.labels(request.method, self.name, tag).inc(request.sentLength)
@@ -197,7 +217,10 @@ class RequestMetrics(object):
     def update_metrics(self):
         """Updates the in flight metrics with values from this request.
         """
-        diff = self._request_stats.update(self.start_context)
+        new_stats = self.start_context.get_resource_usage()
+
+        diff = new_stats - self._request_stats
+        self._request_stats = new_stats
 
         in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime)
         in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime)
@@ -213,61 +236,3 @@ class RequestMetrics(object):
         in_flight_requests_db_sched_duration.labels(self.method, self.name).inc(
             diff.db_sched_duration_sec
         )
-
-
-class _RequestStats(object):
-    """Keeps tracks of various metrics for an in flight request.
-    """
-
-    __slots__ = [
-        "ru_utime",
-        "ru_stime",
-        "db_txn_count",
-        "db_txn_duration_sec",
-        "db_sched_duration_sec",
-    ]
-
-    def __init__(
-        self, ru_utime, ru_stime, db_txn_count, db_txn_duration_sec, db_sched_duration_sec
-    ):
-        self.ru_utime = ru_utime
-        self.ru_stime = ru_stime
-        self.db_txn_count = db_txn_count
-        self.db_txn_duration_sec = db_txn_duration_sec
-        self.db_sched_duration_sec = db_sched_duration_sec
-
-    @staticmethod
-    def from_context(context):
-        ru_utime, ru_stime = context.get_resource_usage()
-
-        return _RequestStats(
-            ru_utime, ru_stime,
-            context.db_txn_count,
-            context.db_txn_duration_sec,
-            context.db_sched_duration_sec,
-        )
-
-    def update(self, context):
-        """Updates the current values and returns the difference between the
-        old and new values.
-
-        Returns:
-            _RequestStats: The difference between the old and new values
-        """
-        new = _RequestStats.from_context(context)
-
-        diff = _RequestStats(
-            new.ru_utime - self.ru_utime,
-            new.ru_stime - self.ru_stime,
-            new.db_txn_count - self.db_txn_count,
-            new.db_txn_duration_sec - self.db_txn_duration_sec,
-            new.db_sched_duration_sec - self.db_sched_duration_sec,
-        )
-
-        self.ru_utime = new.ru_utime
-        self.ru_stime = new.ru_stime
-        self.db_txn_count = new.db_txn_count
-        self.db_txn_duration_sec = new.db_txn_duration_sec
-        self.db_sched_duration_sec = new.db_sched_duration_sec
-
-        return diff
diff --git a/synapse/http/server.py b/synapse/http/server.py
index bc09b8b2be..2d5c23e673 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -13,35 +13,38 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import cgi
-from six.moves import http_client
+import collections
+import logging
 
-from synapse.api.errors import (
-    cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError, Codes
-)
-from synapse.http.request_metrics import (
-    requests_counter,
-)
-from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
-from synapse.util.caches import intern_dict
-from synapse.util.metrics import Measure
-import synapse.metrics
-import synapse.events
+from six import PY3
+from six.moves import http_client, urllib
 
-from canonicaljson import (
-    encode_canonical_json, encode_pretty_printed_json
-)
+from canonicaljson import encode_canonical_json, encode_pretty_printed_json, json
 
 from twisted.internet import defer
 from twisted.python import failure
-from twisted.web import server, resource
+from twisted.web import resource
 from twisted.web.server import NOT_DONE_YET
+from twisted.web.static import NoRangeStaticProducer
 from twisted.web.util import redirectTo
 
-import collections
-import logging
-import urllib
-import simplejson
+import synapse.events
+import synapse.metrics
+from synapse.api.errors import (
+    CodeMessageException,
+    Codes,
+    SynapseError,
+    UnrecognizedRequestError,
+)
+from synapse.util.caches import intern_dict
+from synapse.util.logcontext import preserve_fn
+
+if PY3:
+    from io import BytesIO
+else:
+    from cStringIO import StringIO as BytesIO
 
 logger = logging.getLogger(__name__)
 
@@ -61,11 +64,10 @@ HTML_ERROR_TEMPLATE = """<!DOCTYPE html>
 def wrap_json_request_handler(h):
     """Wraps a request handler method with exception handling.
 
-    Also adds logging as per wrap_request_handler_with_logging.
+    Also does the wrapping with request.processing as per wrap_async_request_handler.
 
     The handler method must have a signature of "handle_foo(self, request)",
-    where "self" must have a "clock" attribute (and "request" must be a
-    SynapseRequest).
+    where "request" must be a SynapseRequest.
 
     The handler must return a deferred. If the deferred succeeds we assume that
     a response has been sent. If the deferred fails with a SynapseError we use
@@ -77,16 +79,13 @@ def wrap_json_request_handler(h):
     def wrapped_request_handler(self, request):
         try:
             yield h(self, request)
-        except CodeMessageException as e:
+        except SynapseError as e:
             code = e.code
-            if isinstance(e, SynapseError):
-                logger.info(
-                    "%s SynapseError: %s - %s", request, code, e.msg
-                )
-            else:
-                logger.exception(e)
+            logger.info(
+                "%s SynapseError: %s - %s", request, code, e.msg
+            )
             respond_with_json(
-                request, code, cs_exception(e), send_cors=True,
+                request, code, e.error_dict(), send_cors=True,
                 pretty_print=_request_user_agent_is_curl(request),
             )
 
@@ -112,24 +111,23 @@ def wrap_json_request_handler(h):
                 pretty_print=_request_user_agent_is_curl(request),
             )
 
-    return wrap_request_handler_with_logging(wrapped_request_handler)
+    return wrap_async_request_handler(wrapped_request_handler)
 
 
 def wrap_html_request_handler(h):
     """Wraps a request handler method with exception handling.
 
-    Also adds logging as per wrap_request_handler_with_logging.
+    Also does the wrapping with request.processing as per wrap_async_request_handler.
 
     The handler method must have a signature of "handle_foo(self, request)",
-    where "self" must have a "clock" attribute (and "request" must be a
-    SynapseRequest).
+    where "request" must be a SynapseRequest.
     """
     def wrapped_request_handler(self, request):
         d = defer.maybeDeferred(h, self, request)
         d.addErrback(_return_html_error, request)
         return d
 
-    return wrap_request_handler_with_logging(wrapped_request_handler)
+    return wrap_async_request_handler(wrapped_request_handler)
 
 
 def _return_html_error(f, request):
@@ -174,46 +172,26 @@ def _return_html_error(f, request):
     finish_request(request)
 
 
-def wrap_request_handler_with_logging(h):
-    """Wraps a request handler to provide logging and metrics
+def wrap_async_request_handler(h):
+    """Wraps an async request handler so that it calls request.processing.
+
+    This helps ensure that work done by the request handler after the request is completed
+    is correctly recorded against the request metrics/logs.
 
     The handler method must have a signature of "handle_foo(self, request)",
-    where "self" must have a "clock" attribute (and "request" must be a
-    SynapseRequest).
+    where "request" must be a SynapseRequest.
 
-    As well as calling `request.processing` (which will log the response and
-    duration for this request), the wrapped request handler will insert the
-    request id into the logging context.
+    The handler may return a deferred, in which case the completion of the request isn't
+    logged until the deferred completes.
     """
     @defer.inlineCallbacks
-    def wrapped_request_handler(self, request):
-        """
-        Args:
-            self:
-            request (synapse.http.site.SynapseRequest):
-        """
+    def wrapped_async_request_handler(self, request):
+        with request.processing():
+            yield h(self, request)
 
-        request_id = request.get_request_id()
-        with LoggingContext(request_id) as request_context:
-            request_context.request = request_id
-            with Measure(self.clock, "wrapped_request_handler"):
-                # we start the request metrics timer here with an initial stab
-                # at the servlet name. For most requests that name will be
-                # JsonResource (or a subclass), and JsonResource._async_render
-                # will update it once it picks a servlet.
-                servlet_name = self.__class__.__name__
-                with request.processing(servlet_name):
-                    with PreserveLoggingContext(request_context):
-                        d = defer.maybeDeferred(h, self, request)
-
-                        # record the arrival of the request *after*
-                        # dispatching to the handler, so that the handler
-                        # can update the servlet name in the request
-                        # metrics
-                        requests_counter.labels(request.method,
-                                                request.request_metrics.name).inc()
-                        yield d
-    return wrapped_request_handler
+    # we need to preserve_fn here, because the synchronous render method won't yield for
+    # us (obviously)
+    return preserve_fn(wrapped_async_request_handler)
 
 
 class HttpServer(object):
@@ -265,6 +243,7 @@ class JsonResource(HttpServer, resource.Resource):
         self.hs = hs
 
     def register_paths(self, method, path_patterns, callback):
+        method = method.encode("utf-8")  # method is bytes on py3
         for path_pattern in path_patterns:
             logger.debug("Registering for %s %s", method, path_pattern.pattern)
             self.path_regexs.setdefault(method, []).append(
@@ -275,7 +254,7 @@ class JsonResource(HttpServer, resource.Resource):
         """ This gets called by twisted every time someone sends us a request.
         """
         self._async_render(request)
-        return server.NOT_DONE_YET
+        return NOT_DONE_YET
 
     @wrap_json_request_handler
     @defer.inlineCallbacks
@@ -297,8 +276,19 @@ class JsonResource(HttpServer, resource.Resource):
         # here. If it throws an exception, that is handled by the wrapper
         # installed by @request_handler.
 
+        def _unquote(s):
+            if PY3:
+                # On Python 3, unquote is unicode -> unicode
+                return urllib.parse.unquote(s)
+            else:
+                # On Python 2, unquote is bytes -> bytes We need to encode the
+                # URL again (as it was decoded by _get_handler_for request), as
+                # ASCII because it's a URL, and then decode it to get the UTF-8
+                # characters that were quoted.
+                return urllib.parse.unquote(s.encode('ascii')).decode('utf8')
+
         kwargs = intern_dict({
-            name: urllib.unquote(value).decode("UTF-8") if value else value
+            name: _unquote(value) if value else value
             for name, value in group_dict.items()
         })
 
@@ -314,9 +304,9 @@ class JsonResource(HttpServer, resource.Resource):
             request (twisted.web.http.Request):
 
         Returns:
-            Tuple[Callable, dict[str, str]]: callback method, and the dict
-                mapping keys to path components as specified in the handler's
-                path match regexp.
+            Tuple[Callable, dict[unicode, unicode]]: callback method, and the
+                dict mapping keys to path components as specified in the
+                handler's path match regexp.
 
                 The callback will normally be a method registered via
                 register_paths, so will return (possibly via Deferred) either
@@ -328,7 +318,7 @@ class JsonResource(HttpServer, resource.Resource):
         # Loop through all the registered callbacks to check if the method
         # and path regex match
         for path_entry in self.path_regexs.get(request.method, []):
-            m = path_entry.pattern.match(request.path)
+            m = path_entry.pattern.match(request.path.decode('ascii'))
             if m:
                 # We found a match!
                 return path_entry.callback, m.groupdict()
@@ -384,7 +374,7 @@ class RootRedirect(resource.Resource):
         self.url = path
 
     def render_GET(self, request):
-        return redirectTo(self.url, request)
+        return redirectTo(self.url.encode('ascii'), request)
 
     def getChild(self, name, request):
         if len(name) == 0:
@@ -405,12 +395,13 @@ def respond_with_json(request, code, json_object, send_cors=False,
         return
 
     if pretty_print:
-        json_bytes = encode_pretty_printed_json(json_object) + "\n"
+        json_bytes = encode_pretty_printed_json(json_object) + b"\n"
     else:
         if canonical_json or synapse.events.USE_FROZEN_DICTS:
+            # canonicaljson already encodes to bytes
             json_bytes = encode_canonical_json(json_object)
         else:
-            json_bytes = simplejson.dumps(json_object)
+            json_bytes = json.dumps(json_object).encode("utf-8")
 
     return respond_with_json_bytes(
         request, code, json_bytes,
@@ -440,8 +431,12 @@ def respond_with_json_bytes(request, code, json_bytes, send_cors=False,
     if send_cors:
         set_cors_headers(request)
 
-    request.write(json_bytes)
-    finish_request(request)
+    # todo: we can almost certainly avoid this copy and encode the json straight into
+    # the bytesIO, but it would involve faffing around with string->bytes wrappers.
+    bytes_io = BytesIO(json_bytes)
+
+    producer = NoRangeStaticProducer(request, bytes_io)
+    producer.start()
     return NOT_DONE_YET
 
 
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index ef8e62901b..a1e4b88e6d 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -15,10 +15,11 @@
 
 """ This module contains base REST classes for constructing REST servlets. """
 
-from synapse.api.errors import SynapseError, Codes
-
 import logging
-import simplejson
+
+from canonicaljson import json
+
+from synapse.api.errors import Codes, SynapseError
 
 logger = logging.getLogger(__name__)
 
@@ -28,7 +29,7 @@ def parse_integer(request, name, default=None, required=False):
 
     Args:
         request: the twisted HTTP request.
-        name (str): the name of the query parameter.
+        name (bytes/unicode): the name of the query parameter.
         default (int|None): value to use if the parameter is absent, defaults
             to None.
         required (bool): whether to raise a 400 SynapseError if the
@@ -45,6 +46,10 @@ def parse_integer(request, name, default=None, required=False):
 
 
 def parse_integer_from_args(args, name, default=None, required=False):
+
+    if not isinstance(name, bytes):
+        name = name.encode('ascii')
+
     if name in args:
         try:
             return int(args[name][0])
@@ -64,7 +69,7 @@ def parse_boolean(request, name, default=None, required=False):
 
     Args:
         request: the twisted HTTP request.
-        name (str): the name of the query parameter.
+        name (bytes/unicode): the name of the query parameter.
         default (bool|None): value to use if the parameter is absent, defaults
             to None.
         required (bool): whether to raise a 400 SynapseError if the
@@ -82,11 +87,15 @@ def parse_boolean(request, name, default=None, required=False):
 
 
 def parse_boolean_from_args(args, name, default=None, required=False):
+
+    if not isinstance(name, bytes):
+        name = name.encode('ascii')
+
     if name in args:
         try:
             return {
-                "true": True,
-                "false": False,
+                b"true": True,
+                b"false": False,
             }[args[name][0]]
         except Exception:
             message = (
@@ -103,21 +112,29 @@ def parse_boolean_from_args(args, name, default=None, required=False):
 
 
 def parse_string(request, name, default=None, required=False,
-                 allowed_values=None, param_type="string"):
-    """Parse a string parameter from the request query string.
+                 allowed_values=None, param_type="string", encoding='ascii'):
+    """
+    Parse a string parameter from the request query string.
+
+    If encoding is not None, the content of the query param will be
+    decoded to Unicode using the encoding, otherwise it will be encoded
 
     Args:
         request: the twisted HTTP request.
-        name (str): the name of the query parameter.
-        default (str|None): value to use if the parameter is absent, defaults
-            to None.
+        name (bytes/unicode): the name of the query parameter.
+        default (bytes/unicode|None): value to use if the parameter is absent,
+            defaults to None. Must be bytes if encoding is None.
         required (bool): whether to raise a 400 SynapseError if the
             parameter is absent, defaults to False.
-        allowed_values (list[str]): List of allowed values for the string,
-            or None if any value is allowed, defaults to None
+        allowed_values (list[bytes/unicode]): List of allowed values for the
+            string, or None if any value is allowed, defaults to None. Must be
+            the same type as name, if given.
+        encoding: The encoding to decode the name to, and decode the string
+            content with.
 
     Returns:
-        str|None: A string value or the default.
+        bytes/unicode|None: A string value or the default. Unicode if encoding
+        was given, bytes otherwise.
 
     Raises:
         SynapseError if the parameter is absent and required, or if the
@@ -125,14 +142,22 @@ def parse_string(request, name, default=None, required=False,
             is not one of those allowed values.
     """
     return parse_string_from_args(
-        request.args, name, default, required, allowed_values, param_type,
+        request.args, name, default, required, allowed_values, param_type, encoding
     )
 
 
 def parse_string_from_args(args, name, default=None, required=False,
-                           allowed_values=None, param_type="string"):
+                           allowed_values=None, param_type="string", encoding='ascii'):
+
+    if not isinstance(name, bytes):
+        name = name.encode('ascii')
+
     if name in args:
         value = args[name][0]
+
+        if encoding:
+            value = value.decode(encoding)
+
         if allowed_values is not None and value not in allowed_values:
             message = "Query parameter %r must be one of [%s]" % (
                 name, ", ".join(repr(v) for v in allowed_values)
@@ -145,6 +170,10 @@ def parse_string_from_args(args, name, default=None, required=False,
             message = "Missing %s query parameter %r" % (param_type, name)
             raise SynapseError(400, message, errcode=Codes.MISSING_PARAM)
         else:
+
+            if encoding and isinstance(default, bytes):
+                return default.decode(encoding)
+
             return default
 
 
@@ -170,8 +199,16 @@ def parse_json_value_from_request(request, allow_empty_body=False):
     if not content_bytes and allow_empty_body:
         return None
 
+    # Decode to Unicode so that simplejson will return Unicode strings on
+    # Python 2
+    try:
+        content_unicode = content_bytes.decode('utf8')
+    except UnicodeDecodeError:
+        logger.warn("Unable to decode UTF-8")
+        raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON)
+
     try:
-        content = simplejson.loads(content_bytes)
+        content = json.loads(content_unicode)
     except Exception as e:
         logger.warn("Unable to parse JSON: %s", e)
         raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON)
@@ -205,7 +242,7 @@ def parse_json_object_from_request(request, allow_empty_body=False):
     return content
 
 
-def assert_params_in_request(body, required):
+def assert_params_in_dict(body, required):
     absent = []
     for k in required:
         if k not in body:
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 2664006f8c..88ed3714f9 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -11,16 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import contextlib
 import logging
 import time
 
-from twisted.web.server import Site, Request
+from twisted.web.server import Request, Site
 
 from synapse.http import redact_uri
-from synapse.http.request_metrics import RequestMetrics
-from synapse.util.logcontext import LoggingContext
+from synapse.http.request_metrics import RequestMetrics, requests_counter
+from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
 
 logger = logging.getLogger(__name__)
 
@@ -34,24 +33,43 @@ class SynapseRequest(Request):
 
     It extends twisted's twisted.web.server.Request, and adds:
      * Unique request ID
+     * A log context associated with the request
      * Redaction of access_token query-params in __repr__
      * Logging at start and end
      * Metrics to record CPU, wallclock and DB time by endpoint.
 
-    It provides a method `processing` which should be called by the Resource
-    which is handling the request, and returns a context manager.
+    It also provides a method `processing`, which returns a context manager. If this
+    method is called, the request won't be logged until the context manager is closed;
+    this is useful for asynchronous request handlers which may go on processing the
+    request even after the client has disconnected.
 
+    Attributes:
+        logcontext(LoggingContext) : the log context for this request
     """
-    def __init__(self, site, *args, **kw):
-        Request.__init__(self, *args, **kw)
+    def __init__(self, site, channel, *args, **kw):
+        Request.__init__(self, channel, *args, **kw)
         self.site = site
+        self._channel = channel     # this is used by the tests
         self.authenticated_entity = None
         self.start_time = 0
 
+        # we can't yet create the logcontext, as we don't know the method.
+        self.logcontext = None
+
         global _next_request_seq
         self.request_seq = _next_request_seq
         _next_request_seq += 1
 
+        # whether an asynchronous request handler has called processing()
+        self._is_processing = False
+
+        # the time when the asynchronous request handler completed its processing
+        self._processing_finished_time = None
+
+        # what time we finished sending the response to the client (or the connection
+        # dropped)
+        self.finish_time = None
+
     def __repr__(self):
         # We overwrite this so that we don't log ``access_token``
         return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % (
@@ -73,11 +91,116 @@ class SynapseRequest(Request):
         return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
 
     def render(self, resrc):
+        # this is called once a Resource has been found to serve the request; in our
+        # case the Resource in question will normally be a JsonResource.
+
+        # create a LogContext for this request
+        request_id = self.get_request_id()
+        logcontext = self.logcontext = LoggingContext(request_id)
+        logcontext.request = request_id
+
         # override the Server header which is set by twisted
         self.setHeader("Server", self.site.server_version_string)
-        return Request.render(self, resrc)
+
+        with PreserveLoggingContext(self.logcontext):
+            # we start the request metrics timer here with an initial stab
+            # at the servlet name. For most requests that name will be
+            # JsonResource (or a subclass), and JsonResource._async_render
+            # will update it once it picks a servlet.
+            servlet_name = resrc.__class__.__name__
+            self._started_processing(servlet_name)
+
+            Request.render(self, resrc)
+
+            # record the arrival of the request *after*
+            # dispatching to the handler, so that the handler
+            # can update the servlet name in the request
+            # metrics
+            requests_counter.labels(self.method,
+                                    self.request_metrics.name).inc()
+
+    @contextlib.contextmanager
+    def processing(self):
+        """Record the fact that we are processing this request.
+
+        Returns a context manager; the correct way to use this is:
+
+        @defer.inlineCallbacks
+        def handle_request(request):
+            with request.processing("FooServlet"):
+                yield really_handle_the_request()
+
+        Once the context manager is closed, the completion of the request will be logged,
+        and the various metrics will be updated.
+        """
+        if self._is_processing:
+            raise RuntimeError("Request is already processing")
+        self._is_processing = True
+
+        try:
+            yield
+        except Exception:
+            # this should already have been caught, and sent back to the client as a 500.
+            logger.exception("Asynchronous messge handler raised an uncaught exception")
+        finally:
+            # the request handler has finished its work and either sent the whole response
+            # back, or handed over responsibility to a Producer.
+
+            self._processing_finished_time = time.time()
+            self._is_processing = False
+
+            # if we've already sent the response, log it now; otherwise, we wait for the
+            # response to be sent.
+            if self.finish_time is not None:
+                self._finished_processing()
+
+    def finish(self):
+        """Called when all response data has been written to this Request.
+
+        Overrides twisted.web.server.Request.finish to record the finish time and do
+        logging.
+        """
+        self.finish_time = time.time()
+        Request.finish(self)
+        if not self._is_processing:
+            with PreserveLoggingContext(self.logcontext):
+                self._finished_processing()
+
+    def connectionLost(self, reason):
+        """Called when the client connection is closed before the response is written.
+
+        Overrides twisted.web.server.Request.connectionLost to record the finish time and
+        do logging.
+        """
+        self.finish_time = time.time()
+        Request.connectionLost(self, reason)
+
+        # we only get here if the connection to the client drops before we send
+        # the response.
+        #
+        # It's useful to log it here so that we can get an idea of when
+        # the client disconnects.
+        with PreserveLoggingContext(self.logcontext):
+            logger.warn(
+                "Error processing request %r: %s %s", self, reason.type, reason.value,
+            )
+
+            if not self._is_processing:
+                self._finished_processing()
 
     def _started_processing(self, servlet_name):
+        """Record the fact that we are processing this request.
+
+        This will log the request's arrival. Once the request completes,
+        be sure to call finished_processing.
+
+        Args:
+            servlet_name (str): the name of the servlet which will be
+                processing this request. This is used in the metrics.
+
+                It is possible to update this afterwards by updating
+                self.request_metrics.name.
+        """
         self.start_time = time.time()
         self.request_metrics = RequestMetrics()
         self.request_metrics.start(
@@ -93,72 +216,79 @@ class SynapseRequest(Request):
         )
 
     def _finished_processing(self):
-        try:
-            context = LoggingContext.current_context()
-            ru_utime, ru_stime = context.get_resource_usage()
-            db_txn_count = context.db_txn_count
-            db_txn_duration_sec = context.db_txn_duration_sec
-            db_sched_duration_sec = context.db_sched_duration_sec
-        except Exception:
-            ru_utime, ru_stime = (0, 0)
-            db_txn_count, db_txn_duration_sec = (0, 0)
+        """Log the completion of this request and update the metrics
+        """
 
-        end_time = time.time()
+        if self.logcontext is None:
+            # this can happen if the connection closed before we read the
+            # headers (so render was never called). In that case we'll already
+            # have logged a warning, so just bail out.
+            return
+
+        usage = self.logcontext.get_resource_usage()
+
+        if self._processing_finished_time is None:
+            # we completed the request without anything calling processing()
+            self._processing_finished_time = time.time()
+
+        # the time between receiving the request and the request handler finishing
+        processing_time = self._processing_finished_time - self.start_time
+
+        # the time between the request handler finishing and the response being sent
+        # to the client (nb may be negative)
+        response_send_time = self.finish_time - self._processing_finished_time
+
+        # need to decode as it could be raw utf-8 bytes
+        # from a IDN servname in an auth header
+        authenticated_entity = self.authenticated_entity
+        if authenticated_entity is not None and isinstance(authenticated_entity, bytes):
+            authenticated_entity = authenticated_entity.decode("utf-8", "replace")
+
+        # ...or could be raw utf-8 bytes in the User-Agent header.
+        # N.B. if you don't do this, the logger explodes cryptically
+        # with maximum recursion trying to log errors about
+        # the charset problem.
+        # c.f. https://github.com/matrix-org/synapse/issues/3471
+        user_agent = self.get_user_agent()
+        if user_agent is not None:
+            user_agent = user_agent.decode("utf-8", "replace")
+        else:
+            user_agent = "-"
+
+        code = str(self.code)
+        if not self.finished:
+            # we didn't send the full response before we gave up (presumably because
+            # the connection dropped)
+            code += "!"
 
         self.site.access_logger.info(
             "%s - %s - {%s}"
-            " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
-            " %sB %s \"%s %s %s\" \"%s\"",
+            " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
+            " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]",
             self.getClientIP(),
             self.site.site_tag,
-            self.authenticated_entity,
-            end_time - self.start_time,
-            ru_utime,
-            ru_stime,
-            db_sched_duration_sec,
-            db_txn_duration_sec,
-            int(db_txn_count),
+            authenticated_entity,
+            processing_time,
+            response_send_time,
+            usage.ru_utime,
+            usage.ru_stime,
+            usage.db_sched_duration_sec,
+            usage.db_txn_duration_sec,
+            int(usage.db_txn_count),
             self.sentLength,
-            self.code,
+            code,
             self.method,
             self.get_redacted_uri(),
             self.clientproto,
-            self.get_user_agent(),
+            user_agent,
+            usage.evt_db_fetch_count,
         )
 
         try:
-            self.request_metrics.stop(end_time, self)
+            self.request_metrics.stop(self.finish_time, self)
         except Exception as e:
             logger.warn("Failed to stop metrics: %r", e)
 
-    @contextlib.contextmanager
-    def processing(self, servlet_name):
-        """Record the fact that we are processing this request.
-
-        Returns a context manager; the correct way to use this is:
-
-        @defer.inlineCallbacks
-        def handle_request(request):
-            with request.processing("FooServlet"):
-                yield really_handle_the_request()
-
-        This will log the request's arrival. Once the context manager is
-        closed, the completion of the request will be logged, and the various
-        metrics will be updated.
-
-        Args:
-            servlet_name (str): the name of the servlet which will be
-                processing this request. This is used in the metrics.
-
-                It is possible to update this afterwards by updating
-                self.request_metrics.servlet_name.
-        """
-        # TODO: we should probably just move this into render() and finish(),
-        # to save having to call a separate method.
-        self._started_processing(servlet_name)
-        yield
-        self._finished_processing()
-
 
 class XForwardedForRequest(SynapseRequest):
     def __init__(self, *args, **kw):
@@ -204,7 +334,7 @@ class SynapseSite(Site):
         proxied = config.get("x_forwarded", False)
         self.requestFactory = SynapseRequestFactory(self, proxied)
         self.access_logger = logging.getLogger(logger_name)
-        self.server_version_string = server_version_string
+        self.server_version_string = server_version_string.encode('ascii')
 
     def log(self, request):
         pass
author	Amber Brown <hawkowl@atleastfornow.net>	2018-09-03 21:08:35 +1000
committer	GitHub <noreply@github.com>	2018-09-03 21:08:35 +1000
commit	4fc4b881c58fd638db5f4dac0863721111b67af0 (patch)
tree	cc1604f5e3b4e0a263e0e11a55b62ef4006a64a1 /synapse/http
parent	The project `matrix-synapse-auto-deploy` does not seem to be maintained anymore. (diff)
parent	Merge pull request #3777 from matrix-org/neilj/fix_register_user_registration (diff)
download	synapse-4fc4b881c58fd638db5f4dac0863721111b67af0.tar.xz