4 files changed, 252 insertions, 115 deletions
diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py
index 588e280571..72c2654678 100644
--- a/synapse/http/request_metrics.py
+++ b/synapse/http/request_metrics.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 import logging
+import threading
 
 from prometheus_client.core import Counter, Histogram
 
@@ -111,6 +112,9 @@ in_flight_requests_db_sched_duration = Counter(
 # The set of all in flight requests, set[RequestMetrics]
 _in_flight_requests = set()
 
+# Protects the _in_flight_requests set from concurrent accesss
+_in_flight_requests_lock = threading.Lock()
+
 
 def _get_in_flight_counts():
     """Returns a count of all in flight requests by (method, server_name)
@@ -120,7 +124,8 @@ def _get_in_flight_counts():
     """
     # Cast to a list to prevent it changing while the Prometheus
     # thread is collecting metrics
-    reqs = list(_in_flight_requests)
+    with _in_flight_requests_lock:
+        reqs = list(_in_flight_requests)
 
     for rm in reqs:
         rm.update_metrics()
@@ -154,10 +159,12 @@ class RequestMetrics(object):
         # to the "in flight" metrics.
         self._request_stats = self.start_context.get_resource_usage()
 
-        _in_flight_requests.add(self)
+        with _in_flight_requests_lock:
+            _in_flight_requests.add(self)
 
     def stop(self, time_sec, request):
-        _in_flight_requests.discard(self)
+        with _in_flight_requests_lock:
+            _in_flight_requests.discard(self)
 
         context = LoggingContext.current_context()
 
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 6dacb31037..2d5c23e673 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -25,8 +25,9 @@ from canonicaljson import encode_canonical_json, encode_pretty_printed_json, jso
 
 from twisted.internet import defer
 from twisted.python import failure
-from twisted.web import resource, server
+from twisted.web import resource
 from twisted.web.server import NOT_DONE_YET
+from twisted.web.static import NoRangeStaticProducer
 from twisted.web.util import redirectTo
 
 import synapse.events
@@ -37,10 +38,13 @@ from synapse.api.errors import (
     SynapseError,
     UnrecognizedRequestError,
 )
-from synapse.http.request_metrics import requests_counter
 from synapse.util.caches import intern_dict
-from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
-from synapse.util.metrics import Measure
+from synapse.util.logcontext import preserve_fn
+
+if PY3:
+    from io import BytesIO
+else:
+    from cStringIO import StringIO as BytesIO
 
 logger = logging.getLogger(__name__)
 
@@ -60,11 +64,10 @@ HTML_ERROR_TEMPLATE = """<!DOCTYPE html>
 def wrap_json_request_handler(h):
     """Wraps a request handler method with exception handling.
 
-    Also adds logging as per wrap_request_handler_with_logging.
+    Also does the wrapping with request.processing as per wrap_async_request_handler.
 
     The handler method must have a signature of "handle_foo(self, request)",
-    where "self" must have a "clock" attribute (and "request" must be a
-    SynapseRequest).
+    where "request" must be a SynapseRequest.
 
     The handler must return a deferred. If the deferred succeeds we assume that
     a response has been sent. If the deferred fails with a SynapseError we use
@@ -108,24 +111,23 @@ def wrap_json_request_handler(h):
                 pretty_print=_request_user_agent_is_curl(request),
             )
 
-    return wrap_request_handler_with_logging(wrapped_request_handler)
+    return wrap_async_request_handler(wrapped_request_handler)
 
 
 def wrap_html_request_handler(h):
     """Wraps a request handler method with exception handling.
 
-    Also adds logging as per wrap_request_handler_with_logging.
+    Also does the wrapping with request.processing as per wrap_async_request_handler.
 
     The handler method must have a signature of "handle_foo(self, request)",
-    where "self" must have a "clock" attribute (and "request" must be a
-    SynapseRequest).
+    where "request" must be a SynapseRequest.
     """
     def wrapped_request_handler(self, request):
         d = defer.maybeDeferred(h, self, request)
         d.addErrback(_return_html_error, request)
         return d
 
-    return wrap_request_handler_with_logging(wrapped_request_handler)
+    return wrap_async_request_handler(wrapped_request_handler)
 
 
 def _return_html_error(f, request):
@@ -170,46 +172,26 @@ def _return_html_error(f, request):
     finish_request(request)
 
 
-def wrap_request_handler_with_logging(h):
-    """Wraps a request handler to provide logging and metrics
+def wrap_async_request_handler(h):
+    """Wraps an async request handler so that it calls request.processing.
+
+    This helps ensure that work done by the request handler after the request is completed
+    is correctly recorded against the request metrics/logs.
 
     The handler method must have a signature of "handle_foo(self, request)",
-    where "self" must have a "clock" attribute (and "request" must be a
-    SynapseRequest).
+    where "request" must be a SynapseRequest.
 
-    As well as calling `request.processing` (which will log the response and
-    duration for this request), the wrapped request handler will insert the
-    request id into the logging context.
+    The handler may return a deferred, in which case the completion of the request isn't
+    logged until the deferred completes.
     """
     @defer.inlineCallbacks
-    def wrapped_request_handler(self, request):
-        """
-        Args:
-            self:
-            request (synapse.http.site.SynapseRequest):
-        """
+    def wrapped_async_request_handler(self, request):
+        with request.processing():
+            yield h(self, request)
 
-        request_id = request.get_request_id()
-        with LoggingContext(request_id) as request_context:
-            request_context.request = request_id
-            with Measure(self.clock, "wrapped_request_handler"):
-                # we start the request metrics timer here with an initial stab
-                # at the servlet name. For most requests that name will be
-                # JsonResource (or a subclass), and JsonResource._async_render
-                # will update it once it picks a servlet.
-                servlet_name = self.__class__.__name__
-                with request.processing(servlet_name):
-                    with PreserveLoggingContext(request_context):
-                        d = defer.maybeDeferred(h, self, request)
-
-                        # record the arrival of the request *after*
-                        # dispatching to the handler, so that the handler
-                        # can update the servlet name in the request
-                        # metrics
-                        requests_counter.labels(request.method,
-                                                request.request_metrics.name).inc()
-                        yield d
-    return wrapped_request_handler
+    # we need to preserve_fn here, because the synchronous render method won't yield for
+    # us (obviously)
+    return preserve_fn(wrapped_async_request_handler)
 
 
 class HttpServer(object):
@@ -272,7 +254,7 @@ class JsonResource(HttpServer, resource.Resource):
         """ This gets called by twisted every time someone sends us a request.
         """
         self._async_render(request)
-        return server.NOT_DONE_YET
+        return NOT_DONE_YET
 
     @wrap_json_request_handler
     @defer.inlineCallbacks
@@ -413,8 +395,7 @@ def respond_with_json(request, code, json_object, send_cors=False,
         return
 
     if pretty_print:
-        json_bytes = (encode_pretty_printed_json(json_object) + "\n"
-                      ).encode("utf-8")
+        json_bytes = encode_pretty_printed_json(json_object) + b"\n"
     else:
         if canonical_json or synapse.events.USE_FROZEN_DICTS:
             # canonicaljson already encodes to bytes
@@ -450,8 +431,12 @@ def respond_with_json_bytes(request, code, json_bytes, send_cors=False,
     if send_cors:
         set_cors_headers(request)
 
-    request.write(json_bytes)
-    finish_request(request)
+    # todo: we can almost certainly avoid this copy and encode the json straight into
+    # the bytesIO, but it would involve faffing around with string->bytes wrappers.
+    bytes_io = BytesIO(json_bytes)
+
+    producer = NoRangeStaticProducer(request, bytes_io)
+    producer.start()
     return NOT_DONE_YET
 
 
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index 69f7085291..a1e4b88e6d 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -29,7 +29,7 @@ def parse_integer(request, name, default=None, required=False):
 
     Args:
         request: the twisted HTTP request.
-        name (str): the name of the query parameter.
+        name (bytes/unicode): the name of the query parameter.
         default (int|None): value to use if the parameter is absent, defaults
             to None.
         required (bool): whether to raise a 400 SynapseError if the
@@ -46,6 +46,10 @@ def parse_integer(request, name, default=None, required=False):
 
 
 def parse_integer_from_args(args, name, default=None, required=False):
+
+    if not isinstance(name, bytes):
+        name = name.encode('ascii')
+
     if name in args:
         try:
             return int(args[name][0])
@@ -65,7 +69,7 @@ def parse_boolean(request, name, default=None, required=False):
 
     Args:
         request: the twisted HTTP request.
-        name (str): the name of the query parameter.
+        name (bytes/unicode): the name of the query parameter.
         default (bool|None): value to use if the parameter is absent, defaults
             to None.
         required (bool): whether to raise a 400 SynapseError if the
@@ -83,11 +87,15 @@ def parse_boolean(request, name, default=None, required=False):
 
 
 def parse_boolean_from_args(args, name, default=None, required=False):
+
+    if not isinstance(name, bytes):
+        name = name.encode('ascii')
+
     if name in args:
         try:
             return {
-                "true": True,
-                "false": False,
+                b"true": True,
+                b"false": False,
             }[args[name][0]]
         except Exception:
             message = (
@@ -104,21 +112,29 @@ def parse_boolean_from_args(args, name, default=None, required=False):
 
 
 def parse_string(request, name, default=None, required=False,
-                 allowed_values=None, param_type="string"):
-    """Parse a string parameter from the request query string.
+                 allowed_values=None, param_type="string", encoding='ascii'):
+    """
+    Parse a string parameter from the request query string.
+
+    If encoding is not None, the content of the query param will be
+    decoded to Unicode using the encoding, otherwise it will be encoded
 
     Args:
         request: the twisted HTTP request.
-        name (str): the name of the query parameter.
-        default (str|None): value to use if the parameter is absent, defaults
-            to None.
+        name (bytes/unicode): the name of the query parameter.
+        default (bytes/unicode|None): value to use if the parameter is absent,
+            defaults to None. Must be bytes if encoding is None.
         required (bool): whether to raise a 400 SynapseError if the
             parameter is absent, defaults to False.
-        allowed_values (list[str]): List of allowed values for the string,
-            or None if any value is allowed, defaults to None
+        allowed_values (list[bytes/unicode]): List of allowed values for the
+            string, or None if any value is allowed, defaults to None. Must be
+            the same type as name, if given.
+        encoding: The encoding to decode the name to, and decode the string
+            content with.
 
     Returns:
-        str|None: A string value or the default.
+        bytes/unicode|None: A string value or the default. Unicode if encoding
+        was given, bytes otherwise.
 
     Raises:
         SynapseError if the parameter is absent and required, or if the
@@ -126,14 +142,22 @@ def parse_string(request, name, default=None, required=False,
             is not one of those allowed values.
     """
     return parse_string_from_args(
-        request.args, name, default, required, allowed_values, param_type,
+        request.args, name, default, required, allowed_values, param_type, encoding
     )
 
 
 def parse_string_from_args(args, name, default=None, required=False,
-                           allowed_values=None, param_type="string"):
+                           allowed_values=None, param_type="string", encoding='ascii'):
+
+    if not isinstance(name, bytes):
+        name = name.encode('ascii')
+
     if name in args:
         value = args[name][0]
+
+        if encoding:
+            value = value.decode(encoding)
+
         if allowed_values is not None and value not in allowed_values:
             message = "Query parameter %r must be one of [%s]" % (
                 name, ", ".join(repr(v) for v in allowed_values)
@@ -146,6 +170,10 @@ def parse_string_from_args(args, name, default=None, required=False,
             message = "Missing %s query parameter %r" % (param_type, name)
             raise SynapseError(400, message, errcode=Codes.MISSING_PARAM)
         else:
+
+            if encoding and isinstance(default, bytes):
+                return default.decode(encoding)
+
             return default
 
 
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 5fd30a4c2c..88ed3714f9 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import contextlib
 import logging
 import time
@@ -19,8 +18,8 @@ import time
 from twisted.web.server import Request, Site
 
 from synapse.http import redact_uri
-from synapse.http.request_metrics import RequestMetrics
-from synapse.util.logcontext import ContextResourceUsage, LoggingContext
+from synapse.http.request_metrics import RequestMetrics, requests_counter
+from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
 
 logger = logging.getLogger(__name__)
 
@@ -34,25 +33,43 @@ class SynapseRequest(Request):
 
     It extends twisted's twisted.web.server.Request, and adds:
      * Unique request ID
+     * A log context associated with the request
      * Redaction of access_token query-params in __repr__
      * Logging at start and end
      * Metrics to record CPU, wallclock and DB time by endpoint.
 
-    It provides a method `processing` which should be called by the Resource
-    which is handling the request, and returns a context manager.
+    It also provides a method `processing`, which returns a context manager. If this
+    method is called, the request won't be logged until the context manager is closed;
+    this is useful for asynchronous request handlers which may go on processing the
+    request even after the client has disconnected.
 
+    Attributes:
+        logcontext(LoggingContext) : the log context for this request
     """
     def __init__(self, site, channel, *args, **kw):
         Request.__init__(self, channel, *args, **kw)
         self.site = site
-        self._channel = channel
+        self._channel = channel     # this is used by the tests
         self.authenticated_entity = None
         self.start_time = 0
 
+        # we can't yet create the logcontext, as we don't know the method.
+        self.logcontext = None
+
         global _next_request_seq
         self.request_seq = _next_request_seq
         _next_request_seq += 1
 
+        # whether an asynchronous request handler has called processing()
+        self._is_processing = False
+
+        # the time when the asynchronous request handler completed its processing
+        self._processing_finished_time = None
+
+        # what time we finished sending the response to the client (or the connection
+        # dropped)
+        self.finish_time = None
+
     def __repr__(self):
         # We overwrite this so that we don't log ``access_token``
         return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % (
@@ -74,11 +91,116 @@ class SynapseRequest(Request):
         return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
 
     def render(self, resrc):
+        # this is called once a Resource has been found to serve the request; in our
+        # case the Resource in question will normally be a JsonResource.
+
+        # create a LogContext for this request
+        request_id = self.get_request_id()
+        logcontext = self.logcontext = LoggingContext(request_id)
+        logcontext.request = request_id
+
         # override the Server header which is set by twisted
         self.setHeader("Server", self.site.server_version_string)
-        return Request.render(self, resrc)
+
+        with PreserveLoggingContext(self.logcontext):
+            # we start the request metrics timer here with an initial stab
+            # at the servlet name. For most requests that name will be
+            # JsonResource (or a subclass), and JsonResource._async_render
+            # will update it once it picks a servlet.
+            servlet_name = resrc.__class__.__name__
+            self._started_processing(servlet_name)
+
+            Request.render(self, resrc)
+
+            # record the arrival of the request *after*
+            # dispatching to the handler, so that the handler
+            # can update the servlet name in the request
+            # metrics
+            requests_counter.labels(self.method,
+                                    self.request_metrics.name).inc()
+
+    @contextlib.contextmanager
+    def processing(self):
+        """Record the fact that we are processing this request.
+
+        Returns a context manager; the correct way to use this is:
+
+        @defer.inlineCallbacks
+        def handle_request(request):
+            with request.processing("FooServlet"):
+                yield really_handle_the_request()
+
+        Once the context manager is closed, the completion of the request will be logged,
+        and the various metrics will be updated.
+        """
+        if self._is_processing:
+            raise RuntimeError("Request is already processing")
+        self._is_processing = True
+
+        try:
+            yield
+        except Exception:
+            # this should already have been caught, and sent back to the client as a 500.
+            logger.exception("Asynchronous messge handler raised an uncaught exception")
+        finally:
+            # the request handler has finished its work and either sent the whole response
+            # back, or handed over responsibility to a Producer.
+
+            self._processing_finished_time = time.time()
+            self._is_processing = False
+
+            # if we've already sent the response, log it now; otherwise, we wait for the
+            # response to be sent.
+            if self.finish_time is not None:
+                self._finished_processing()
+
+    def finish(self):
+        """Called when all response data has been written to this Request.
+
+        Overrides twisted.web.server.Request.finish to record the finish time and do
+        logging.
+        """
+        self.finish_time = time.time()
+        Request.finish(self)
+        if not self._is_processing:
+            with PreserveLoggingContext(self.logcontext):
+                self._finished_processing()
+
+    def connectionLost(self, reason):
+        """Called when the client connection is closed before the response is written.
+
+        Overrides twisted.web.server.Request.connectionLost to record the finish time and
+        do logging.
+        """
+        self.finish_time = time.time()
+        Request.connectionLost(self, reason)
+
+        # we only get here if the connection to the client drops before we send
+        # the response.
+        #
+        # It's useful to log it here so that we can get an idea of when
+        # the client disconnects.
+        with PreserveLoggingContext(self.logcontext):
+            logger.warn(
+                "Error processing request %r: %s %s", self, reason.type, reason.value,
+            )
+
+            if not self._is_processing:
+                self._finished_processing()
 
     def _started_processing(self, servlet_name):
+        """Record the fact that we are processing this request.
+
+        This will log the request's arrival. Once the request completes,
+        be sure to call finished_processing.
+
+        Args:
+            servlet_name (str): the name of the servlet which will be
+                processing this request. This is used in the metrics.
+
+                It is possible to update this afterwards by updating
+                self.request_metrics.name.
+        """
         self.start_time = time.time()
         self.request_metrics = RequestMetrics()
         self.request_metrics.start(
@@ -94,18 +216,32 @@ class SynapseRequest(Request):
         )
 
     def _finished_processing(self):
-        try:
-            context = LoggingContext.current_context()
-            usage = context.get_resource_usage()
-        except Exception:
-            usage = ContextResourceUsage()
+        """Log the completion of this request and update the metrics
+        """
+
+        if self.logcontext is None:
+            # this can happen if the connection closed before we read the
+            # headers (so render was never called). In that case we'll already
+            # have logged a warning, so just bail out.
+            return
+
+        usage = self.logcontext.get_resource_usage()
+
+        if self._processing_finished_time is None:
+            # we completed the request without anything calling processing()
+            self._processing_finished_time = time.time()
 
-        end_time = time.time()
+        # the time between receiving the request and the request handler finishing
+        processing_time = self._processing_finished_time - self.start_time
+
+        # the time between the request handler finishing and the response being sent
+        # to the client (nb may be negative)
+        response_send_time = self.finish_time - self._processing_finished_time
 
         # need to decode as it could be raw utf-8 bytes
         # from a IDN servname in an auth header
         authenticated_entity = self.authenticated_entity
-        if authenticated_entity is not None:
+        if authenticated_entity is not None and isinstance(authenticated_entity, bytes):
             authenticated_entity = authenticated_entity.decode("utf-8", "replace")
 
         # ...or could be raw utf-8 bytes in the User-Agent header.
@@ -116,22 +252,31 @@ class SynapseRequest(Request):
         user_agent = self.get_user_agent()
         if user_agent is not None:
             user_agent = user_agent.decode("utf-8", "replace")
+        else:
+            user_agent = "-"
+
+        code = str(self.code)
+        if not self.finished:
+            # we didn't send the full response before we gave up (presumably because
+            # the connection dropped)
+            code += "!"
 
         self.site.access_logger.info(
             "%s - %s - {%s}"
-            " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
+            " Processed request: %.3fsec/%.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
             " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]",
             self.getClientIP(),
             self.site.site_tag,
             authenticated_entity,
-            end_time - self.start_time,
+            processing_time,
+            response_send_time,
             usage.ru_utime,
             usage.ru_stime,
             usage.db_sched_duration_sec,
             usage.db_txn_duration_sec,
             int(usage.db_txn_count),
             self.sentLength,
-            self.code,
+            code,
             self.method,
             self.get_redacted_uri(),
             self.clientproto,
@@ -140,38 +285,10 @@ class SynapseRequest(Request):
         )
 
         try:
-            self.request_metrics.stop(end_time, self)
+            self.request_metrics.stop(self.finish_time, self)
         except Exception as e:
             logger.warn("Failed to stop metrics: %r", e)
 
-    @contextlib.contextmanager
-    def processing(self, servlet_name):
-        """Record the fact that we are processing this request.
-
-        Returns a context manager; the correct way to use this is:
-
-        @defer.inlineCallbacks
-        def handle_request(request):
-            with request.processing("FooServlet"):
-                yield really_handle_the_request()
-
-        This will log the request's arrival. Once the context manager is
-        closed, the completion of the request will be logged, and the various
-        metrics will be updated.
-
-        Args:
-            servlet_name (str): the name of the servlet which will be
-                processing this request. This is used in the metrics.
-
-                It is possible to update this afterwards by updating
-                self.request_metrics.servlet_name.
-        """
-        # TODO: we should probably just move this into render() and finish(),
-        # to save having to call a separate method.
-        self._started_processing(servlet_name)
-        yield
-        self._finished_processing()
-
 
 class XForwardedForRequest(SynapseRequest):
     def __init__(self, *args, **kw):
@@ -217,7 +334,7 @@ class SynapseSite(Site):
         proxied = config.get("x_forwarded", False)
         self.requestFactory = SynapseRequestFactory(self, proxied)
         self.access_logger = logging.getLogger(logger_name)
-        self.server_version_string = server_version_string
+        self.server_version_string = server_version_string.encode('ascii')
 
     def log(self, request):
         pass