summary refs log tree commit diff
path: root/synapse/http
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/http')
-rw-r--r--synapse/http/__init__.py13
-rw-r--r--synapse/http/client.py30
-rw-r--r--synapse/http/matrixfederationclient.py16
-rw-r--r--synapse/http/request_metrics.py216
-rw-r--r--synapse/http/server.py4
-rw-r--r--synapse/http/site.py31
6 files changed, 137 insertions, 173 deletions
diff --git a/synapse/http/__init__.py b/synapse/http/__init__.py
index 054372e179..58ef8d3ce4 100644
--- a/synapse/http/__init__.py
+++ b/synapse/http/__init__.py
@@ -13,6 +13,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
+
 from twisted.internet.defer import CancelledError
 from twisted.python import failure
 
@@ -34,3 +36,14 @@ def cancelled_to_request_timed_out_error(value, timeout):
         value.trap(CancelledError)
         raise RequestTimedOutError()
     return value
+
+
+ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$')
+
+
+def redact_uri(uri):
+    """Strips access tokens from the uri replaces with <redacted>"""
+    return ACCESS_TOKEN_RE.sub(
+        br'\1<redacted>\3',
+        uri
+    )
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 70a19d9b74..8064a84c5c 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -19,11 +19,10 @@ from OpenSSL.SSL import VERIFY_NONE
 from synapse.api.errors import (
     CodeMessageException, MatrixCodeMessageException, SynapseError, Codes,
 )
-from synapse.http import cancelled_to_request_timed_out_error
+from synapse.http import cancelled_to_request_timed_out_error, redact_uri
 from synapse.util.async import add_timeout_to_deferred
 from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.logcontext import make_deferred_yieldable
-import synapse.metrics
 from synapse.http.endpoint import SpiderEndpoint
 
 from canonicaljson import encode_canonical_json
@@ -42,6 +41,7 @@ from twisted.web._newclient import ResponseDone
 
 from six import StringIO
 
+from prometheus_client import Counter
 import simplejson as json
 import logging
 import urllib
@@ -49,16 +49,9 @@ import urllib
 
 logger = logging.getLogger(__name__)
 
-metrics = synapse.metrics.get_metrics_for(__name__)
-
-outgoing_requests_counter = metrics.register_counter(
-    "requests",
-    labels=["method"],
-)
-incoming_responses_counter = metrics.register_counter(
-    "responses",
-    labels=["method", "code"],
-)
+outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"])
+incoming_responses_counter = Counter("synapse_http_client_responses", "",
+                                     ["method", "code"])
 
 
 class SimpleHttpClient(object):
@@ -95,9 +88,10 @@ class SimpleHttpClient(object):
     def request(self, method, uri, *args, **kwargs):
         # A small wrapper around self.agent.request() so we can easily attach
         # counters to it
-        outgoing_requests_counter.inc(method)
+        outgoing_requests_counter.labels(method).inc()
 
-        logger.info("Sending request %s %s", method, uri)
+        # log request but strip `access_token` (AS requests for example include this)
+        logger.info("Sending request %s %s", method, redact_uri(uri))
 
         try:
             request_deferred = self.agent.request(
@@ -109,17 +103,17 @@ class SimpleHttpClient(object):
             )
             response = yield make_deferred_yieldable(request_deferred)
 
-            incoming_responses_counter.inc(method, response.code)
+            incoming_responses_counter.labels(method, response.code).inc()
             logger.info(
                 "Received response to  %s %s: %s",
-                method, uri, response.code
+                method, redact_uri(uri), response.code
             )
             defer.returnValue(response)
         except Exception as e:
-            incoming_responses_counter.inc(method, "ERR")
+            incoming_responses_counter.labels(method, "ERR").inc()
             logger.info(
                 "Error sending request to  %s %s: %s %s",
-                method, uri, type(e).__name__, e.message
+                method, redact_uri(uri), type(e).__name__, e.message
             )
             raise e
 
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index bf56e77c7a..993dc06e02 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -45,19 +45,15 @@ from six.moves.urllib import parse as urlparse
 from six import string_types
 
 
+from prometheus_client import Counter
+
 logger = logging.getLogger(__name__)
 outbound_logger = logging.getLogger("synapse.http.outbound")
 
-metrics = synapse.metrics.get_metrics_for(__name__)
-
-outgoing_requests_counter = metrics.register_counter(
-    "requests",
-    labels=["method"],
-)
-incoming_responses_counter = metrics.register_counter(
-    "responses",
-    labels=["method", "code"],
-)
+outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests",
+                                    "", ["method"])
+incoming_responses_counter = Counter("synapse_http_matrixfederationclient_responses",
+                                     "", ["method", "code"])
 
 
 MAX_LONG_RETRIES = 10
diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py
index 5917c83958..dc06f6c443 100644
--- a/synapse/http/request_metrics.py
+++ b/synapse/http/request_metrics.py
@@ -16,137 +16,109 @@
 
 import logging
 
-import synapse.metrics
+from prometheus_client.core import Counter, Histogram
+from synapse.metrics import LaterGauge
+
 from synapse.util.logcontext import LoggingContext
 
 logger = logging.getLogger(__name__)
 
-metrics = synapse.metrics.get_metrics_for("synapse.http.server")
 
 # total number of responses served, split by method/servlet/tag
-response_count = metrics.register_counter(
-    "response_count",
-    labels=["method", "servlet", "tag"],
-    alternative_names=(
-        # the following are all deprecated aliases for the same metric
-        metrics.name_prefix + x for x in (
-            "_requests",
-            "_response_time:count",
-            "_response_ru_utime:count",
-            "_response_ru_stime:count",
-            "_response_db_txn_count:count",
-            "_response_db_txn_duration:count",
-        )
-    )
+response_count = Counter(
+    "synapse_http_server_response_count", "", ["method", "servlet", "tag"]
 )
 
-requests_counter = metrics.register_counter(
-    "requests_received",
-    labels=["method", "servlet", ],
+requests_counter = Counter(
+    "synapse_http_server_requests_received", "", ["method", "servlet"]
 )
 
-outgoing_responses_counter = metrics.register_counter(
-    "responses",
-    labels=["method", "code"],
+outgoing_responses_counter = Counter(
+    "synapse_http_server_responses", "", ["method", "code"]
 )
 
-response_timer = metrics.register_counter(
-    "response_time_seconds",
-    labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_time:total",
-    ),
+response_timer = Histogram(
+    "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"]
 )
 
-response_ru_utime = metrics.register_counter(
-    "response_ru_utime_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_ru_utime:total",
-    ),
+response_ru_utime = Counter(
+    "synapse_http_server_response_ru_utime_seconds", "sec", ["method", "servlet", "tag"]
 )
 
-response_ru_stime = metrics.register_counter(
-    "response_ru_stime_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_ru_stime:total",
-    ),
+response_ru_stime = Counter(
+    "synapse_http_server_response_ru_stime_seconds", "sec", ["method", "servlet", "tag"]
 )
 
-response_db_txn_count = metrics.register_counter(
-    "response_db_txn_count", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_db_txn_count:total",
-    ),
+response_db_txn_count = Counter(
+    "synapse_http_server_response_db_txn_count", "", ["method", "servlet", "tag"]
 )
 
 # seconds spent waiting for db txns, excluding scheduling time, when processing
 # this request
-response_db_txn_duration = metrics.register_counter(
-    "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_db_txn_duration:total",
-    ),
+response_db_txn_duration = Counter(
+    "synapse_http_server_response_db_txn_duration_seconds",
+    "",
+    ["method", "servlet", "tag"],
 )
 
 # seconds spent waiting for a db connection, when processing this request
-response_db_sched_duration = metrics.register_counter(
-    "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"]
+response_db_sched_duration = Counter(
+    "synapse_http_server_response_db_sched_duration_seconds",
+    "",
+    ["method", "servlet", "tag"],
 )
 
 # size in bytes of the response written
-response_size = metrics.register_counter(
-    "response_size", labels=["method", "servlet", "tag"]
+response_size = Counter(
+    "synapse_http_server_response_size", "", ["method", "servlet", "tag"]
 )
 
 # In flight metrics are incremented while the requests are in flight, rather
 # than when the response was written.
 
-in_flight_requests_ru_utime = metrics.register_counter(
-    "in_flight_requests_ru_utime_seconds", labels=["method", "servlet"],
+in_flight_requests_ru_utime = Counter(
+    "synapse_http_server_in_flight_requests_ru_utime_seconds",
+    "",
+    ["method", "servlet"],
 )
 
-in_flight_requests_ru_stime = metrics.register_counter(
-    "in_flight_requests_ru_stime_seconds", labels=["method", "servlet"],
+in_flight_requests_ru_stime = Counter(
+    "synapse_http_server_in_flight_requests_ru_stime_seconds",
+    "",
+    ["method", "servlet"],
 )
 
-in_flight_requests_db_txn_count = metrics.register_counter(
-    "in_flight_requests_db_txn_count", labels=["method", "servlet"],
+in_flight_requests_db_txn_count = Counter(
+    "synapse_http_server_in_flight_requests_db_txn_count", "", ["method", "servlet"]
 )
 
 # seconds spent waiting for db txns, excluding scheduling time, when processing
 # this request
-in_flight_requests_db_txn_duration = metrics.register_counter(
-    "in_flight_requests_db_txn_duration_seconds", labels=["method", "servlet"],
+in_flight_requests_db_txn_duration = Counter(
+    "synapse_http_server_in_flight_requests_db_txn_duration_seconds",
+    "",
+    ["method", "servlet"],
 )
 
 # seconds spent waiting for a db connection, when processing this request
-in_flight_requests_db_sched_duration = metrics.register_counter(
-    "in_flight_requests_db_sched_duration_seconds", labels=["method", "servlet"]
+in_flight_requests_db_sched_duration = Counter(
+    "synapse_http_server_in_flight_requests_db_sched_duration_seconds",
+    "",
+    ["method", "servlet"],
 )
 
-
 # The set of all in flight requests, set[RequestMetrics]
 _in_flight_requests = set()
 
 
-def _collect_in_flight():
-    """Called just before metrics are collected, so we use it to update all
-    the in flight request metrics
-    """
-
-    for rm in _in_flight_requests:
-        rm.update_metrics()
-
-
-metrics.register_collector(_collect_in_flight)
-
-
 def _get_in_flight_counts():
     """Returns a count of all in flight requests by (method, server_name)
 
     Returns:
         dict[tuple[str, str], int]
     """
+    for rm in _in_flight_requests:
+        rm.update_metrics()
 
     # Map from (method, name) -> int, the number of in flight requests of that
     # type
@@ -158,16 +130,17 @@ def _get_in_flight_counts():
     return counts
 
 
-metrics.register_callback(
-    "in_flight_requests_count",
+LaterGauge(
+    "synapse_http_request_metrics_in_flight_requests_count",
+    "",
+    ["method", "servlet"],
     _get_in_flight_counts,
-    labels=["method", "servlet"]
 )
 
 
 class RequestMetrics(object):
-    def start(self, time_msec, name, method):
-        self.start = time_msec
+    def start(self, time_sec, name, method):
+        self.start = time_sec
         self.start_context = LoggingContext.current_context()
         self.name = name
         self.method = method
@@ -176,7 +149,7 @@ class RequestMetrics(object):
 
         _in_flight_requests.add(self)
 
-    def stop(self, time_msec, request):
+    def stop(self, time_sec, request):
         _in_flight_requests.discard(self)
 
         context = LoggingContext.current_context()
@@ -192,34 +165,29 @@ class RequestMetrics(object):
                 )
                 return
 
-        outgoing_responses_counter.inc(request.method, str(request.code))
+        outgoing_responses_counter.labels(request.method, str(request.code)).inc()
 
-        response_count.inc(request.method, self.name, tag)
+        response_count.labels(request.method, self.name, tag).inc()
 
-        response_timer.inc_by(
-            time_msec - self.start, request.method,
-            self.name, tag
+        response_timer.labels(request.method, self.name, tag).observe(
+            time_sec - self.start
         )
 
         ru_utime, ru_stime = context.get_resource_usage()
 
-        response_ru_utime.inc_by(
-            ru_utime, request.method, self.name, tag
-        )
-        response_ru_stime.inc_by(
-            ru_stime, request.method, self.name, tag
-        )
-        response_db_txn_count.inc_by(
-            context.db_txn_count, request.method, self.name, tag
+        response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime)
+        response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime)
+        response_db_txn_count.labels(request.method, self.name, tag).inc(
+            context.db_txn_count
         )
-        response_db_txn_duration.inc_by(
-            context.db_txn_duration_ms / 1000., request.method, self.name, tag
+        response_db_txn_duration.labels(request.method, self.name, tag).inc(
+            context.db_txn_duration_sec
         )
-        response_db_sched_duration.inc_by(
-            context.db_sched_duration_ms / 1000., request.method, self.name, tag
+        response_db_sched_duration.labels(request.method, self.name, tag).inc(
+            context.db_sched_duration_sec
         )
 
-        response_size.inc_by(request.sentLength, request.method, self.name, tag)
+        response_size.labels(request.method, self.name, tag).inc(request.sentLength)
 
         # We always call this at the end to ensure that we update the metrics
         # regardless of whether a call to /metrics while the request was in
@@ -229,27 +197,21 @@ class RequestMetrics(object):
     def update_metrics(self):
         """Updates the in flight metrics with values from this request.
         """
-
         diff = self._request_stats.update(self.start_context)
 
-        in_flight_requests_ru_utime.inc_by(
-            diff.ru_utime, self.method, self.name,
-        )
-
-        in_flight_requests_ru_stime.inc_by(
-            diff.ru_stime, self.method, self.name,
-        )
+        in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime)
+        in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime)
 
-        in_flight_requests_db_txn_count.inc_by(
-            diff.db_txn_count, self.method, self.name,
+        in_flight_requests_db_txn_count.labels(self.method, self.name).inc(
+            diff.db_txn_count
         )
 
-        in_flight_requests_db_txn_duration.inc_by(
-            diff.db_txn_duration_ms / 1000., self.method, self.name,
+        in_flight_requests_db_txn_duration.labels(self.method, self.name).inc(
+            diff.db_txn_duration_sec
         )
 
-        in_flight_requests_db_sched_duration.inc_by(
-            diff.db_sched_duration_ms / 1000., self.method, self.name,
+        in_flight_requests_db_sched_duration.labels(self.method, self.name).inc(
+            diff.db_sched_duration_sec
         )
 
 
@@ -258,17 +220,21 @@ class _RequestStats(object):
     """
 
     __slots__ = [
-        "ru_utime", "ru_stime",
-        "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms",
+        "ru_utime",
+        "ru_stime",
+        "db_txn_count",
+        "db_txn_duration_sec",
+        "db_sched_duration_sec",
     ]
 
-    def __init__(self, ru_utime, ru_stime, db_txn_count,
-                 db_txn_duration_ms, db_sched_duration_ms):
+    def __init__(
+        self, ru_utime, ru_stime, db_txn_count, db_txn_duration_sec, db_sched_duration_sec
+    ):
         self.ru_utime = ru_utime
         self.ru_stime = ru_stime
         self.db_txn_count = db_txn_count
-        self.db_txn_duration_ms = db_txn_duration_ms
-        self.db_sched_duration_ms = db_sched_duration_ms
+        self.db_txn_duration_sec = db_txn_duration_sec
+        self.db_sched_duration_sec = db_sched_duration_sec
 
     @staticmethod
     def from_context(context):
@@ -277,8 +243,8 @@ class _RequestStats(object):
         return _RequestStats(
             ru_utime, ru_stime,
             context.db_txn_count,
-            context.db_txn_duration_ms,
-            context.db_sched_duration_ms,
+            context.db_txn_duration_sec,
+            context.db_sched_duration_sec,
         )
 
     def update(self, context):
@@ -294,14 +260,14 @@ class _RequestStats(object):
             new.ru_utime - self.ru_utime,
             new.ru_stime - self.ru_stime,
             new.db_txn_count - self.db_txn_count,
-            new.db_txn_duration_ms - self.db_txn_duration_ms,
-            new.db_sched_duration_ms - self.db_sched_duration_ms,
+            new.db_txn_duration_sec - self.db_txn_duration_sec,
+            new.db_sched_duration_sec - self.db_sched_duration_sec,
         )
 
         self.ru_utime = new.ru_utime
         self.ru_stime = new.ru_stime
         self.db_txn_count = new.db_txn_count
-        self.db_txn_duration_ms = new.db_txn_duration_ms
-        self.db_sched_duration_ms = new.db_sched_duration_ms
+        self.db_txn_duration_sec = new.db_txn_duration_sec
+        self.db_sched_duration_sec = new.db_sched_duration_sec
 
         return diff
diff --git a/synapse/http/server.py b/synapse/http/server.py
index faf700851a..bc09b8b2be 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -210,8 +210,8 @@ def wrap_request_handler_with_logging(h):
                         # dispatching to the handler, so that the handler
                         # can update the servlet name in the request
                         # metrics
-                        requests_counter.inc(request.method,
-                                             request.request_metrics.name)
+                        requests_counter.labels(request.method,
+                                                request.request_metrics.name).inc()
                         yield d
     return wrapped_request_handler
 
diff --git a/synapse/http/site.py b/synapse/http/site.py
index b608504225..2664006f8c 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -14,18 +14,16 @@
 
 import contextlib
 import logging
-import re
 import time
 
 from twisted.web.server import Site, Request
 
+from synapse.http import redact_uri
 from synapse.http.request_metrics import RequestMetrics
 from synapse.util.logcontext import LoggingContext
 
 logger = logging.getLogger(__name__)
 
-ACCESS_TOKEN_RE = re.compile(br'(\?.*access(_|%5[Ff])token=)[^&]*(.*)$')
-
 _next_request_seq = 0
 
 
@@ -56,7 +54,7 @@ class SynapseRequest(Request):
 
     def __repr__(self):
         # We overwrite this so that we don't log ``access_token``
-        return '<%s at 0x%x method=%s uri=%s clientproto=%s site=%s>' % (
+        return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % (
             self.__class__.__name__,
             id(self),
             self.method,
@@ -69,10 +67,7 @@ class SynapseRequest(Request):
         return "%s-%i" % (self.method, self.request_seq)
 
     def get_redacted_uri(self):
-        return ACCESS_TOKEN_RE.sub(
-            br'\1<redacted>\3',
-            self.uri
-        )
+        return redact_uri(self.uri)
 
     def get_user_agent(self):
         return self.requestHeaders.getRawHeaders(b"User-Agent", [None])[-1]
@@ -83,7 +78,7 @@ class SynapseRequest(Request):
         return Request.render(self, resrc)
 
     def _started_processing(self, servlet_name):
-        self.start_time = int(time.time() * 1000)
+        self.start_time = time.time()
         self.request_metrics = RequestMetrics()
         self.request_metrics.start(
             self.start_time, name=servlet_name, method=self.method,
@@ -102,26 +97,26 @@ class SynapseRequest(Request):
             context = LoggingContext.current_context()
             ru_utime, ru_stime = context.get_resource_usage()
             db_txn_count = context.db_txn_count
-            db_txn_duration_ms = context.db_txn_duration_ms
-            db_sched_duration_ms = context.db_sched_duration_ms
+            db_txn_duration_sec = context.db_txn_duration_sec
+            db_sched_duration_sec = context.db_sched_duration_sec
         except Exception:
             ru_utime, ru_stime = (0, 0)
-            db_txn_count, db_txn_duration_ms = (0, 0)
+            db_txn_count, db_txn_duration_sec = (0, 0)
 
-        end_time = int(time.time() * 1000)
+        end_time = time.time()
 
         self.site.access_logger.info(
             "%s - %s - {%s}"
-            " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)"
+            " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
             " %sB %s \"%s %s %s\" \"%s\"",
             self.getClientIP(),
             self.site.site_tag,
             self.authenticated_entity,
             end_time - self.start_time,
-            int(ru_utime * 1000),
-            int(ru_stime * 1000),
-            db_sched_duration_ms,
-            db_txn_duration_ms,
+            ru_utime,
+            ru_stime,
+            db_sched_duration_sec,
+            db_txn_duration_sec,
             int(db_txn_count),
             self.sentLength,
             self.code,