summary refs log tree commit diff
path: root/synapse/http
diff options
context:
space:
mode:
authorRichard van der Hoff <richard@matrix.org>2018-07-12 09:56:28 +0100
committerRichard van der Hoff <richard@matrix.org>2018-07-12 09:56:28 +0100
commit482d17b58b55e4a62c1b4df9484d1c3af80d94ff (patch)
treed936edf00491834d76c7c7aa651d2f884e0c307b /synapse/http
parentEnforce the specified API for report_event (diff)
parentMerge pull request #3505 from matrix-org/erikj/receipts_cahce (diff)
downloadsynapse-482d17b58b55e4a62c1b4df9484d1c3af80d94ff.tar.xz
Merge branch 'develop' into rav/enforce_report_api
Diffstat (limited to 'synapse/http')
-rw-r--r--synapse/http/__init__.py13
-rw-r--r--synapse/http/additional_resource.py3
-rw-r--r--synapse/http/client.py61
-rw-r--r--synapse/http/endpoint.py97
-rw-r--r--synapse/http/matrixfederationclient.py77
-rw-r--r--synapse/http/request_metrics.py95
-rw-r--r--synapse/http/server.py41
-rw-r--r--synapse/http/servlet.py9
-rw-r--r--synapse/http/site.py53
9 files changed, 257 insertions, 192 deletions
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py
index 054372e179..58ef8d3ce4 100644
--- a/synapse/http/__init__.py
+++ b/synapse/http/__init__.py
@@ -13,6 +13,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
+
 from twisted.internet.defer import CancelledError
 from twisted.python import failure
 
@@ -34,3 +36,14 @@ def cancelled_to_request_timed_out_error(value, timeout):
         value.trap(CancelledError)
         raise RequestTimedOutError()
     return value
+
+
+ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$')
+
+
+def redact_uri(uri):
+    """Strips access tokens from the uri replaces with <redacted>"""
+    return ACCESS_TOKEN_RE.sub(
+        br'\1<redacted>\3',
+        uri
+    )
diff --git a/synapse/http/additional_resource.py b/synapse/http/additional_resource.py
index a797396ade..0e10e3f8f7 100644
--- a/synapse/http/additional_resource.py
+++ b/synapse/http/additional_resource.py
@@ -13,10 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from synapse.http.server import wrap_json_request_handler
 from twisted.web.resource import Resource
 from twisted.web.server import NOT_DONE_YET
 
+from synapse.http.server import wrap_json_request_handler
+
 
 class AdditionalResource(Resource):
     """Resource wrapper for additional_resources
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 4d4eee3d64..d6a0d75b2b 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -13,39 +13,41 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from OpenSSL import SSL
-from OpenSSL.SSL import VERIFY_NONE
+import logging
+import urllib
 
-from synapse.api.errors import (
-    CodeMessageException, MatrixCodeMessageException, SynapseError, Codes,
-)
-from synapse.http import cancelled_to_request_timed_out_error
-from synapse.util.async import add_timeout_to_deferred
-from synapse.util.caches import CACHE_SIZE_FACTOR
-from synapse.util.logcontext import make_deferred_yieldable
-from synapse.http.endpoint import SpiderEndpoint
+from six import StringIO
 
-from canonicaljson import encode_canonical_json
+from canonicaljson import encode_canonical_json, json
+from prometheus_client import Counter
 
-from twisted.internet import defer, reactor, ssl, protocol, task
+from OpenSSL import SSL
+from OpenSSL.SSL import VERIFY_NONE
+from twisted.internet import defer, protocol, reactor, ssl, task
 from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
+from twisted.web._newclient import ResponseDone
+from twisted.web.client import Agent, BrowserLikeRedirectAgent, ContentDecoderAgent
+from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer
 from twisted.web.client import (
-    BrowserLikeRedirectAgent, ContentDecoderAgent, GzipDecoder, Agent,
-    readBody, PartialDownloadError,
+    GzipDecoder,
     HTTPConnectionPool,
+    PartialDownloadError,
+    readBody,
 )
-from twisted.web.client import FileBodyProducer as TwistedFileBodyProducer
 from twisted.web.http import PotentialDataLoss
 from twisted.web.http_headers import Headers
-from twisted.web._newclient import ResponseDone
-
-from six import StringIO
-
-from prometheus_client import Counter
-import simplejson as json
-import logging
-import urllib
 
+from synapse.api.errors import (
+    CodeMessageException,
+    Codes,
+    MatrixCodeMessageException,
+    SynapseError,
+)
+from synapse.http import cancelled_to_request_timed_out_error, redact_uri
+from synapse.http.endpoint import SpiderEndpoint
+from synapse.util.async import add_timeout_to_deferred
+from synapse.util.caches import CACHE_SIZE_FACTOR
+from synapse.util.logcontext import make_deferred_yieldable
 
 logger = logging.getLogger(__name__)
 
@@ -90,31 +92,32 @@ class SimpleHttpClient(object):
         # counters to it
         outgoing_requests_counter.labels(method).inc()
 
-        logger.info("Sending request %s %s", method, uri)
+        # log request but strip `access_token` (AS requests for example include this)
+        logger.info("Sending request %s %s", method, redact_uri(uri))
 
         try:
             request_deferred = self.agent.request(
                 method, uri, *args, **kwargs
             )
             add_timeout_to_deferred(
-                request_deferred,
-                60, cancelled_to_request_timed_out_error,
+                request_deferred, 60, self.hs.get_reactor(),
+                cancelled_to_request_timed_out_error,
             )
             response = yield make_deferred_yieldable(request_deferred)
 
             incoming_responses_counter.labels(method, response.code).inc()
             logger.info(
                 "Received response to  %s %s: %s",
-                method, uri, response.code
+                method, redact_uri(uri), response.code
             )
             defer.returnValue(response)
         except Exception as e:
             incoming_responses_counter.labels(method, "ERR").inc()
             logger.info(
                 "Error sending request to  %s %s: %s %s",
-                method, uri, type(e).__name__, e.message
+                method, redact_uri(uri), type(e).__name__, e.message
             )
-            raise e
+            raise
 
     @defer.inlineCallbacks
     def post_urlencoded_get_json(self, uri, args={}, headers=None):
diff --git a/synapse/http/endpoint.py b/synapse/http/endpoint.py
index 87a482650d..d65daa72bb 100644
--- a/synapse/http/endpoint.py
+++ b/synapse/http/endpoint.py
@@ -12,17 +12,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
-from twisted.internet import defer, reactor
-from twisted.internet.error import ConnectError
-from twisted.names import client, dns
-from twisted.names.error import DNSNameError, DomainError
-
 import collections
 import logging
 import random
+import re
 import time
 
+from twisted.internet import defer
+from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS
+from twisted.internet.error import ConnectError
+from twisted.names import client, dns
+from twisted.names.error import DNSNameError, DomainError
 
 logger = logging.getLogger(__name__)
 
@@ -38,6 +38,71 @@ _Server = collections.namedtuple(
 )
 
 
+def parse_server_name(server_name):
+    """Split a server name into host/port parts.
+
+    Args:
+        server_name (str): server name to parse
+
+    Returns:
+        Tuple[str, int|None]: host/port parts.
+
+    Raises:
+        ValueError if the server name could not be parsed.
+    """
+    try:
+        if server_name[-1] == ']':
+            # ipv6 literal, hopefully
+            return server_name, None
+
+        domain_port = server_name.rsplit(":", 1)
+        domain = domain_port[0]
+        port = int(domain_port[1]) if domain_port[1:] else None
+        return domain, port
+    except Exception:
+        raise ValueError("Invalid server name '%s'" % server_name)
+
+
+VALID_HOST_REGEX = re.compile(
+    "\\A[0-9a-zA-Z.-]+\\Z",
+)
+
+
+def parse_and_validate_server_name(server_name):
+    """Split a server name into host/port parts and do some basic validation.
+
+    Args:
+        server_name (str): server name to parse
+
+    Returns:
+        Tuple[str, int|None]: host/port parts.
+
+    Raises:
+        ValueError if the server name could not be parsed.
+    """
+    host, port = parse_server_name(server_name)
+
+    # these tests don't need to be bulletproof as we'll find out soon enough
+    # if somebody is giving us invalid data. What we *do* need is to be sure
+    # that nobody is sneaking IP literals in that look like hostnames, etc.
+
+    # look for ipv6 literals
+    if host[0] == '[':
+        if host[-1] != ']':
+            raise ValueError("Mismatched [...] in server name '%s'" % (
+                server_name,
+            ))
+        return host, port
+
+    # otherwise it should only be alphanumerics.
+    if not VALID_HOST_REGEX.match(host):
+        raise ValueError("Server name '%s' contains invalid characters" % (
+            server_name,
+        ))
+
+    return host, port
+
+
 def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None,
                                timeout=None):
     """Construct an endpoint for the given matrix destination.
@@ -50,9 +115,7 @@ def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None,
         timeout (int): connection timeout in seconds
     """
 
-    domain_port = destination.split(":")
-    domain = domain_port[0]
-    port = int(domain_port[1]) if domain_port[1:] else None
+    domain, port = parse_server_name(destination)
 
     endpoint_kw_args = {}
 
@@ -74,21 +137,22 @@ def matrix_federation_endpoint(reactor, destination, ssl_context_factory=None,
             reactor, "matrix", domain, protocol="tcp",
             default_port=default_port, endpoint=transport_endpoint,
             endpoint_kw_args=endpoint_kw_args
-        ))
+        ), reactor)
     else:
         return _WrappingEndpointFac(transport_endpoint(
             reactor, domain, port, **endpoint_kw_args
-        ))
+        ), reactor)
 
 
 class _WrappingEndpointFac(object):
-    def __init__(self, endpoint_fac):
+    def __init__(self, endpoint_fac, reactor):
         self.endpoint_fac = endpoint_fac
+        self.reactor = reactor
 
     @defer.inlineCallbacks
     def connect(self, protocolFactory):
         conn = yield self.endpoint_fac.connect(protocolFactory)
-        conn = _WrappedConnection(conn)
+        conn = _WrappedConnection(conn, self.reactor)
         defer.returnValue(conn)
 
 
@@ -98,9 +162,10 @@ class _WrappedConnection(object):
     """
     __slots__ = ["conn", "last_request"]
 
-    def __init__(self, conn):
+    def __init__(self, conn, reactor):
         object.__setattr__(self, "conn", conn)
         object.__setattr__(self, "last_request", time.time())
+        self._reactor = reactor
 
     def __getattr__(self, name):
         return getattr(self.conn, name)
@@ -131,14 +196,14 @@ class _WrappedConnection(object):
         # Time this connection out if we haven't send a request in the last
         # N minutes
         # TODO: Cancel the previous callLater?
-        reactor.callLater(3 * 60, self._time_things_out_maybe)
+        self._reactor.callLater(3 * 60, self._time_things_out_maybe)
 
         d = self.conn.request(request)
 
         def update_request_time(res):
             self.last_request = time.time()
             # TODO: Cancel the previous callLater?
-            reactor.callLater(3 * 60, self._time_things_out_maybe)
+            self._reactor.callLater(3 * 60, self._time_things_out_maybe)
             return res
 
         d.addCallback(update_request_time)
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 821aed362b..bf1aa29502 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -13,39 +13,38 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from twisted.internet import defer, reactor, protocol
-from twisted.internet.error import DNSLookupError
-from twisted.web.client import readBody, HTTPConnectionPool, Agent
-from twisted.web.http_headers import Headers
-from twisted.web._newclient import ResponseDone
-
-from synapse.http import cancelled_to_request_timed_out_error
-from synapse.http.endpoint import matrix_federation_endpoint
-import synapse.metrics
-from synapse.util.async import sleep, add_timeout_to_deferred
-from synapse.util import logcontext
-from synapse.util.logcontext import make_deferred_yieldable
-import synapse.util.retryutils
-
-from canonicaljson import encode_canonical_json
-
-from synapse.api.errors import (
-    SynapseError, Codes, HttpResponseException, FederationDeniedError,
-)
-
-from signedjson.sign import sign_json
-
 import cgi
-import simplejson as json
 import logging
 import random
 import sys
 import urllib
-from six.moves.urllib import parse as urlparse
-from six import string_types
 
+from six import string_types
+from six.moves.urllib import parse as urlparse
 
+from canonicaljson import encode_canonical_json, json
 from prometheus_client import Counter
+from signedjson.sign import sign_json
+
+from twisted.internet import defer, protocol, reactor
+from twisted.internet.error import DNSLookupError
+from twisted.web._newclient import ResponseDone
+from twisted.web.client import Agent, HTTPConnectionPool, readBody
+from twisted.web.http_headers import Headers
+
+import synapse.metrics
+import synapse.util.retryutils
+from synapse.api.errors import (
+    Codes,
+    FederationDeniedError,
+    HttpResponseException,
+    SynapseError,
+)
+from synapse.http import cancelled_to_request_timed_out_error
+from synapse.http.endpoint import matrix_federation_endpoint
+from synapse.util import logcontext
+from synapse.util.async import add_timeout_to_deferred
+from synapse.util.logcontext import make_deferred_yieldable
 
 logger = logging.getLogger(__name__)
 outbound_logger = logging.getLogger("synapse.http.outbound")
@@ -193,6 +192,7 @@ class MatrixFederationHttpClient(object):
                         add_timeout_to_deferred(
                             request_deferred,
                             timeout / 1000. if timeout else 60,
+                            self.hs.get_reactor(),
                             cancelled_to_request_timed_out_error,
                         )
                         response = yield make_deferred_yieldable(
@@ -234,7 +234,7 @@ class MatrixFederationHttpClient(object):
                                 delay = min(delay, 2)
                                 delay *= random.uniform(0.8, 1.4)
 
-                            yield sleep(delay)
+                            yield self.clock.sleep(delay)
                             retries_left -= 1
                         else:
                             raise
@@ -260,14 +260,35 @@ class MatrixFederationHttpClient(object):
             defer.returnValue(response)
 
     def sign_request(self, destination, method, url_bytes, headers_dict,
-                     content=None):
+                     content=None, destination_is=None):
+        """
+        Signs a request by adding an Authorization header to headers_dict
+        Args:
+            destination (bytes|None): The desination home server of the request.
+                May be None if the destination is an identity server, in which case
+                destination_is must be non-None.
+            method (bytes): The HTTP method of the request
+            url_bytes (bytes): The URI path of the request
+            headers_dict (dict): Dictionary of request headers to append to
+            content (bytes): The body of the request
+            destination_is (bytes): As 'destination', but if the destination is an
+                identity server
+
+        Returns:
+            None
+        """
         request = {
             "method": method,
             "uri": url_bytes,
             "origin": self.server_name,
-            "destination": destination,
         }
 
+        if destination is not None:
+            request["destination"] = destination
+
+        if destination_is is not None:
+            request["destination_is"] = destination_is
+
         if content is not None:
             request["content"] = content
 
diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py
index dc06f6c443..f24b4b949c 100644
--- a/synapse/http/request_metrics.py
+++ b/synapse/http/request_metrics.py
@@ -17,8 +17,8 @@
 import logging
 
 from prometheus_client.core import Counter, Histogram
-from synapse.metrics import LaterGauge
 
+from synapse.metrics import LaterGauge
 from synapse.util.logcontext import LoggingContext
 
 logger = logging.getLogger(__name__)
@@ -117,13 +117,17 @@ def _get_in_flight_counts():
     Returns:
         dict[tuple[str, str], int]
     """
-    for rm in _in_flight_requests:
+    # Cast to a list to prevent it changing while the Prometheus
+    # thread is collecting metrics
+    reqs = list(_in_flight_requests)
+
+    for rm in reqs:
         rm.update_metrics()
 
     # Map from (method, name) -> int, the number of in flight requests of that
     # type
     counts = {}
-    for rm in _in_flight_requests:
+    for rm in reqs:
         key = (rm.method, rm.name,)
         counts[key] = counts.get(key, 0) + 1
 
@@ -131,7 +135,7 @@ def _get_in_flight_counts():
 
 
 LaterGauge(
-    "synapse_http_request_metrics_in_flight_requests_count",
+    "synapse_http_server_in_flight_requests_count",
     "",
     ["method", "servlet"],
     _get_in_flight_counts,
@@ -145,7 +149,9 @@ class RequestMetrics(object):
         self.name = name
         self.method = method
 
-        self._request_stats = _RequestStats.from_context(self.start_context)
+        # _request_stats records resource usage that we have already added
+        # to the "in flight" metrics.
+        self._request_stats = self.start_context.get_resource_usage()
 
         _in_flight_requests.add(self)
 
@@ -173,18 +179,22 @@ class RequestMetrics(object):
             time_sec - self.start
         )
 
-        ru_utime, ru_stime = context.get_resource_usage()
+        resource_usage = context.get_resource_usage()
 
-        response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime)
-        response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime)
+        response_ru_utime.labels(request.method, self.name, tag).inc(
+            resource_usage.ru_utime,
+        )
+        response_ru_stime.labels(request.method, self.name, tag).inc(
+            resource_usage.ru_stime,
+        )
         response_db_txn_count.labels(request.method, self.name, tag).inc(
-            context.db_txn_count
+            resource_usage.db_txn_count
         )
         response_db_txn_duration.labels(request.method, self.name, tag).inc(
-            context.db_txn_duration_sec
+            resource_usage.db_txn_duration_sec
         )
         response_db_sched_duration.labels(request.method, self.name, tag).inc(
-            context.db_sched_duration_sec
+            resource_usage.db_sched_duration_sec
         )
 
         response_size.labels(request.method, self.name, tag).inc(request.sentLength)
@@ -197,7 +207,10 @@ class RequestMetrics(object):
     def update_metrics(self):
         """Updates the in flight metrics with values from this request.
         """
-        diff = self._request_stats.update(self.start_context)
+        new_stats = self.start_context.get_resource_usage()
+
+        diff = new_stats - self._request_stats
+        self._request_stats = new_stats
 
         in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime)
         in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime)
@@ -213,61 +226,3 @@ class RequestMetrics(object):
         in_flight_requests_db_sched_duration.labels(self.method, self.name).inc(
             diff.db_sched_duration_sec
         )
-
-
-class _RequestStats(object):
-    """Keeps tracks of various metrics for an in flight request.
-    """
-
-    __slots__ = [
-        "ru_utime",
-        "ru_stime",
-        "db_txn_count",
-        "db_txn_duration_sec",
-        "db_sched_duration_sec",
-    ]
-
-    def __init__(
-        self, ru_utime, ru_stime, db_txn_count, db_txn_duration_sec, db_sched_duration_sec
-    ):
-        self.ru_utime = ru_utime
-        self.ru_stime = ru_stime
-        self.db_txn_count = db_txn_count
-        self.db_txn_duration_sec = db_txn_duration_sec
-        self.db_sched_duration_sec = db_sched_duration_sec
-
-    @staticmethod
-    def from_context(context):
-        ru_utime, ru_stime = context.get_resource_usage()
-
-        return _RequestStats(
-            ru_utime, ru_stime,
-            context.db_txn_count,
-            context.db_txn_duration_sec,
-            context.db_sched_duration_sec,
-        )
-
-    def update(self, context):
-        """Updates the current values and returns the difference between the
-        old and new values.
-
-        Returns:
-            _RequestStats: The difference between the old and new values
-        """
-        new = _RequestStats.from_context(context)
-
-        diff = _RequestStats(
-            new.ru_utime - self.ru_utime,
-            new.ru_stime - self.ru_stime,
-            new.db_txn_count - self.db_txn_count,
-            new.db_txn_duration_sec - self.db_txn_duration_sec,
-            new.db_sched_duration_sec - self.db_sched_duration_sec,
-        )
-
-        self.ru_utime = new.ru_utime
-        self.ru_stime = new.ru_stime
-        self.db_txn_count = new.db_txn_count
-        self.db_txn_duration_sec = new.db_txn_duration_sec
-        self.db_sched_duration_sec = new.db_sched_duration_sec
-
-        return diff
diff --git a/synapse/http/server.py b/synapse/http/server.py
index bc09b8b2be..c70fdbdfd2 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -14,34 +14,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import cgi
-from six.moves import http_client
+import collections
+import logging
+import urllib
 
-from synapse.api.errors import (
-    cs_exception, SynapseError, CodeMessageException, UnrecognizedRequestError, Codes
-)
-from synapse.http.request_metrics import (
-    requests_counter,
-)
-from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
-from synapse.util.caches import intern_dict
-from synapse.util.metrics import Measure
-import synapse.metrics
-import synapse.events
+from six.moves import http_client
 
-from canonicaljson import (
-    encode_canonical_json, encode_pretty_printed_json
-)
+from canonicaljson import encode_canonical_json, encode_pretty_printed_json, json
 
 from twisted.internet import defer
 from twisted.python import failure
-from twisted.web import server, resource
+from twisted.web import resource, server
 from twisted.web.server import NOT_DONE_YET
 from twisted.web.util import redirectTo
 
-import collections
-import logging
-import urllib
-import simplejson
+import synapse.events
+import synapse.metrics
+from synapse.api.errors import (
+    CodeMessageException,
+    Codes,
+    SynapseError,
+    UnrecognizedRequestError,
+    cs_exception,
+)
+from synapse.http.request_metrics import requests_counter
+from synapse.util.caches import intern_dict
+from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
+from synapse.util.metrics import Measure
 
 logger = logging.getLogger(__name__)
 
@@ -410,7 +409,7 @@ def respond_with_json(request, code, json_object, send_cors=False,
         if canonical_json or synapse.events.USE_FROZEN_DICTS:
             json_bytes = encode_canonical_json(json_object)
         else:
-            json_bytes = simplejson.dumps(json_object)
+            json_bytes = json.dumps(json_object)
 
     return respond_with_json_bytes(
         request, code, json_bytes,
diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index ef8e62901b..cf6723563a 100644
--- a/synapse/http/servlet.py
+++ b/synapse/http/servlet.py
@@ -15,10 +15,11 @@
 
 """ This module contains base REST classes for constructing REST servlets. """
 
-from synapse.api.errors import SynapseError, Codes
-
 import logging
-import simplejson
+
+from canonicaljson import json
+
+from synapse.api.errors import Codes, SynapseError
 
 logger = logging.getLogger(__name__)
 
@@ -171,7 +172,7 @@ def parse_json_value_from_request(request, allow_empty_body=False):
         return None
 
     try:
-        content = simplejson.loads(content_bytes)
+        content = json.loads(content_bytes)
     except Exception as e:
         logger.warn("Unable to parse JSON: %s", e)
         raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON)
diff --git a/synapse/http/site.py b/synapse/http/site.py
index 60299657b9..21e26f9c5e 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -14,18 +14,16 @@
 
 import contextlib
 import logging
-import re
 import time
 
-from twisted.web.server import Site, Request
+from twisted.web.server import Request, Site
 
+from synapse.http import redact_uri
 from synapse.http.request_metrics import RequestMetrics
-from synapse.util.logcontext import LoggingContext
+from synapse.util.logcontext import LoggingContext, ContextResourceUsage
 
 logger = logging.getLogger(__name__)
 
-ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$')
-
 _next_request_seq = 0
 
 
@@ -69,10 +67,7 @@ class SynapseRequest(Request):
         return "%s-%i" % (self.method, self.request_seq)
 
     def get_redacted_uri(self):
-        return ACCESS_TOKEN_RE.sub(
-            br'\1<redacted>\3',
-            self.uri
-        )
+        return redact_uri(self.uri)
 
     def get_user_agent(self):
         return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
@@ -100,35 +95,47 @@ class SynapseRequest(Request):
     def _finished_processing(self):
         try:
             context = LoggingContext.current_context()
-            ru_utime, ru_stime = context.get_resource_usage()
-            db_txn_count = context.db_txn_count
-            db_txn_duration_sec = context.db_txn_duration_sec
-            db_sched_duration_sec = context.db_sched_duration_sec
+            usage = context.get_resource_usage()
         except Exception:
-            ru_utime, ru_stime = (0, 0)
-            db_txn_count, db_txn_duration_sec = (0, 0)
+            usage = ContextResourceUsage()
 
         end_time = time.time()
 
+        # need to decode as it could be raw utf-8 bytes
+        # from a IDN servname in an auth header
+        authenticated_entity = self.authenticated_entity
+        if authenticated_entity is not None:
+            authenticated_entity = authenticated_entity.decode("utf-8", "replace")
+
+        # ...or could be raw utf-8 bytes in the User-Agent header.
+        # N.B. if you don't do this, the logger explodes cryptically
+        # with maximum recursion trying to log errors about
+        # the charset problem.
+        # c.f. https://github.com/matrix-org/synapse/issues/3471
+        user_agent = self.get_user_agent()
+        if user_agent is not None:
+            user_agent = user_agent.decode("utf-8", "replace")
+
         self.site.access_logger.info(
             "%s - %s - {%s}"
             " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
-            " %sB %s \"%s %s %s\" \"%s\"",
+            " %sB %s \"%s %s %s\" \"%s\" [%d dbevts]",
             self.getClientIP(),
             self.site.site_tag,
-            self.authenticated_entity,
+            authenticated_entity,
             end_time - self.start_time,
-            ru_utime,
-            ru_stime,
-            db_sched_duration_sec,
-            db_txn_duration_sec,
-            int(db_txn_count),
+            usage.ru_utime,
+            usage.ru_stime,
+            usage.db_sched_duration_sec,
+            usage.db_txn_duration_sec,
+            int(usage.db_txn_count),
             self.sentLength,
             self.code,
             self.method,
             self.get_redacted_uri(),
             self.clientproto,
-            self.get_user_agent(),
+            user_agent,
+            usage.evt_db_fetch_count,
         )
 
         try: