summary refs log tree commit diff
path: root/synapse/http
diff options
context:
space:
mode:
authorNeil Johnson <neil@matrix.org>2018-06-06 12:27:33 +0100
committerNeil Johnson <neil@matrix.org>2018-06-06 12:27:33 +0100
commit752b7b32ed1c33651c0c64fbbb4289c3b62ac89b (patch)
treee120f6fffa36a2d4f6eae37f555be078940b7854 /synapse/http
parentMerge pull request #3290 from rubo77/patch-7 (diff)
parent7 char sha in changelog (diff)
downloadsynapse-752b7b32ed1c33651c0c64fbbb4289c3b62ac89b.tar.xz
Merge tag 'v0.31.0'
Changes in synapse v0.31.0 (2018-06-06)
======================================

Most notable change from v0.30.0 is to switch to python prometheus library to improve system
stats reporting. WARNING this changes a number of prometheus metrics in a
backwards-incompatible manner. For more details, see
`docs/metrics-howto.rst <docs/metrics-howto.rst#removal-of-deprecated-metrics--time-based-counters-becoming-histograms-in-0310>`_.

Bug Fixes:

* Fix metric documentation tables (PR #3341)
* Fix LaterGuage error handling (694968f)
* Fix replication metrics (b7e7fd2)

Changes in synapse v0.31.0-rc1 (2018-06-04)
==========================================

Features:

* Switch to the Python Prometheus library (PR #3256, #3274)
* Let users leave the server notice room after joining (PR #3287)

Changes:

* daily user type phone home stats (PR #3264)
* Use iter* methods for _filter_events_for_server (PR #3267)
* Docs on consent bits (PR #3268)
* Remove users from user directory on deactivate (PR #3277)
* Avoid sending consent notice to guest users (PR #3288)
* disable CPUMetrics if no /proc/self/stat (PR #3299)
* Add local and loopback IPv6 addresses to url_preview_ip_range_blacklist (PR #3312) Thanks to @thegcat!
* Consistently use six's iteritems and wrap lazy keys/values in list() if they're not meant to be lazy (PR #3307)
* Add private IPv6 addresses to example config for url preview blacklist (PR #3317) Thanks to @thegcat!
* Reduce stuck read-receipts: ignore depth when updating (PR #3318)
* Put python's logs into Trial when running unit tests (PR #3319)

Changes, python 3 migration:

* Replace some more comparisons with six (PR #3243) Thanks to @NotAFile!
* replace some iteritems with six (PR #3244) Thanks to @NotAFile!
* Add batch_iter to utils (PR #3245) Thanks to @NotAFile!
* use repr, not str (PR #3246) Thanks to @NotAFile!
* Misc Python3 fixes (PR #3247) Thanks to @NotAFile!
* Py3 storage/_base.py (PR #3278) Thanks to @NotAFile!
* more six iteritems (PR #3279) Thanks to @NotAFile!
* More Misc. py3 fixes (PR #3280) Thanks to @NotAFile!
* remaining isintance fixes (PR #3281) Thanks to @NotAFile!
* py3-ize state.py (PR #3283) Thanks to @NotAFile!
* extend tox testing for py3 to avoid regressions (PR #3302) Thanks to @krombel!
* use memoryview in py3 (PR #3303) Thanks to @NotAFile!

Bugs:

* Fix federation backfill bugs (PR #3261)
* federation: fix LaterGauge usage (PR #3328) Thanks to @intelfx!
Diffstat (limited to 'synapse/http')
-rw-r--r--synapse/http/client.py21
-rw-r--r--synapse/http/matrixfederationclient.py24
-rw-r--r--synapse/http/request_metrics.py216
-rw-r--r--synapse/http/server.py4
-rw-r--r--synapse/http/site.py22
5 files changed, 122 insertions, 165 deletions
diff --git a/synapse/http/client.py b/synapse/http/client.py
index 70a19d9b74..4d4eee3d64 100644
--- a/synapse/http/client.py
+++ b/synapse/http/client.py
@@ -23,7 +23,6 @@ from synapse.http import cancelled_to_request_timed_out_error
 from synapse.util.async import add_timeout_to_deferred
 from synapse.util.caches import CACHE_SIZE_FACTOR
 from synapse.util.logcontext import make_deferred_yieldable
-import synapse.metrics
 from synapse.http.endpoint import SpiderEndpoint
 
 from canonicaljson import encode_canonical_json
@@ -42,6 +41,7 @@ from twisted.web._newclient import ResponseDone
 
 from six import StringIO
 
+from prometheus_client import Counter
 import simplejson as json
 import logging
 import urllib
@@ -49,16 +49,9 @@ import urllib
 
 logger = logging.getLogger(__name__)
 
-metrics = synapse.metrics.get_metrics_for(__name__)
-
-outgoing_requests_counter = metrics.register_counter(
-    "requests",
-    labels=["method"],
-)
-incoming_responses_counter = metrics.register_counter(
-    "responses",
-    labels=["method", "code"],
-)
+outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"])
+incoming_responses_counter = Counter("synapse_http_client_responses", "",
+                                     ["method", "code"])
 
 
 class SimpleHttpClient(object):
@@ -95,7 +88,7 @@ class SimpleHttpClient(object):
     def request(self, method, uri, *args, **kwargs):
         # A small wrapper around self.agent.request() so we can easily attach
         # counters to it
-        outgoing_requests_counter.inc(method)
+        outgoing_requests_counter.labels(method).inc()
 
         logger.info("Sending request %s %s", method, uri)
 
@@ -109,14 +102,14 @@ class SimpleHttpClient(object):
             )
             response = yield make_deferred_yieldable(request_deferred)
 
-            incoming_responses_counter.inc(method, response.code)
+            incoming_responses_counter.labels(method, response.code).inc()
             logger.info(
                 "Received response to  %s %s: %s",
                 method, uri, response.code
             )
             defer.returnValue(response)
         except Exception as e:
-            incoming_responses_counter.inc(method, "ERR")
+            incoming_responses_counter.labels(method, "ERR").inc()
             logger.info(
                 "Error sending request to  %s %s: %s %s",
                 method, uri, type(e).__name__, e.message
diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py
index 4b2b85464d..821aed362b 100644
--- a/synapse/http/matrixfederationclient.py
+++ b/synapse/http/matrixfederationclient.py
@@ -42,20 +42,18 @@ import random
 import sys
 import urllib
 from six.moves.urllib import parse as urlparse
+from six import string_types
+
+
+from prometheus_client import Counter
 
 logger = logging.getLogger(__name__)
 outbound_logger = logging.getLogger("synapse.http.outbound")
 
-metrics = synapse.metrics.get_metrics_for(__name__)
-
-outgoing_requests_counter = metrics.register_counter(
-    "requests",
-    labels=["method"],
-)
-incoming_responses_counter = metrics.register_counter(
-    "responses",
-    labels=["method", "code"],
-)
+outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests",
+                                    "", ["method"])
+incoming_responses_counter = Counter("synapse_http_matrixfederationclient_responses",
+                                     "", ["method", "code"])
 
 
 MAX_LONG_RETRIES = 10
@@ -553,7 +551,7 @@ class MatrixFederationHttpClient(object):
 
         encoded_args = {}
         for k, vs in args.items():
-            if isinstance(vs, basestring):
+            if isinstance(vs, string_types):
                 vs = [vs]
             encoded_args[k] = [v.encode("UTF-8") for v in vs]
 
@@ -668,7 +666,7 @@ def check_content_type_is_json(headers):
         RuntimeError if the
 
     """
-    c_type = headers.getRawHeaders("Content-Type")
+    c_type = headers.getRawHeaders(b"Content-Type")
     if c_type is None:
         raise RuntimeError(
             "No Content-Type header"
@@ -685,7 +683,7 @@ def check_content_type_is_json(headers):
 def encode_query_args(args):
     encoded_args = {}
     for k, vs in args.items():
-        if isinstance(vs, basestring):
+        if isinstance(vs, string_types):
             vs = [vs]
         encoded_args[k] = [v.encode("UTF-8") for v in vs]
 
diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py
index 5917c83958..dc06f6c443 100644
--- a/synapse/http/request_metrics.py
+++ b/synapse/http/request_metrics.py
@@ -16,137 +16,109 @@
 
 import logging
 
-import synapse.metrics
+from prometheus_client.core import Counter, Histogram
+from synapse.metrics import LaterGauge
+
 from synapse.util.logcontext import LoggingContext
 
 logger = logging.getLogger(__name__)
 
-metrics = synapse.metrics.get_metrics_for("synapse.http.server")
 
 # total number of responses served, split by method/servlet/tag
-response_count = metrics.register_counter(
-    "response_count",
-    labels=["method", "servlet", "tag"],
-    alternative_names=(
-        # the following are all deprecated aliases for the same metric
-        metrics.name_prefix + x for x in (
-            "_requests",
-            "_response_time:count",
-            "_response_ru_utime:count",
-            "_response_ru_stime:count",
-            "_response_db_txn_count:count",
-            "_response_db_txn_duration:count",
-        )
-    )
+response_count = Counter(
+    "synapse_http_server_response_count", "", ["method", "servlet", "tag"]
 )
 
-requests_counter = metrics.register_counter(
-    "requests_received",
-    labels=["method", "servlet", ],
+requests_counter = Counter(
+    "synapse_http_server_requests_received", "", ["method", "servlet"]
 )
 
-outgoing_responses_counter = metrics.register_counter(
-    "responses",
-    labels=["method", "code"],
+outgoing_responses_counter = Counter(
+    "synapse_http_server_responses", "", ["method", "code"]
 )
 
-response_timer = metrics.register_counter(
-    "response_time_seconds",
-    labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_time:total",
-    ),
+response_timer = Histogram(
+    "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"]
 )
 
-response_ru_utime = metrics.register_counter(
-    "response_ru_utime_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_ru_utime:total",
-    ),
+response_ru_utime = Counter(
+    "synapse_http_server_response_ru_utime_seconds", "sec", ["method", "servlet", "tag"]
 )
 
-response_ru_stime = metrics.register_counter(
-    "response_ru_stime_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_ru_stime:total",
-    ),
+response_ru_stime = Counter(
+    "synapse_http_server_response_ru_stime_seconds", "sec", ["method", "servlet", "tag"]
 )
 
-response_db_txn_count = metrics.register_counter(
-    "response_db_txn_count", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_db_txn_count:total",
-    ),
+response_db_txn_count = Counter(
+    "synapse_http_server_response_db_txn_count", "", ["method", "servlet", "tag"]
 )
 
 # seconds spent waiting for db txns, excluding scheduling time, when processing
 # this request
-response_db_txn_duration = metrics.register_counter(
-    "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"],
-    alternative_names=(
-        metrics.name_prefix + "_response_db_txn_duration:total",
-    ),
+response_db_txn_duration = Counter(
+    "synapse_http_server_response_db_txn_duration_seconds",
+    "",
+    ["method", "servlet", "tag"],
 )
 
 # seconds spent waiting for a db connection, when processing this request
-response_db_sched_duration = metrics.register_counter(
-    "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"]
+response_db_sched_duration = Counter(
+    "synapse_http_server_response_db_sched_duration_seconds",
+    "",
+    ["method", "servlet", "tag"],
 )
 
 # size in bytes of the response written
-response_size = metrics.register_counter(
-    "response_size", labels=["method", "servlet", "tag"]
+response_size = Counter(
+    "synapse_http_server_response_size", "", ["method", "servlet", "tag"]
 )
 
 # In flight metrics are incremented while the requests are in flight, rather
 # than when the response was written.
 
-in_flight_requests_ru_utime = metrics.register_counter(
-    "in_flight_requests_ru_utime_seconds", labels=["method", "servlet"],
+in_flight_requests_ru_utime = Counter(
+    "synapse_http_server_in_flight_requests_ru_utime_seconds",
+    "",
+    ["method", "servlet"],
 )
 
-in_flight_requests_ru_stime = metrics.register_counter(
-    "in_flight_requests_ru_stime_seconds", labels=["method", "servlet"],
+in_flight_requests_ru_stime = Counter(
+    "synapse_http_server_in_flight_requests_ru_stime_seconds",
+    "",
+    ["method", "servlet"],
 )
 
-in_flight_requests_db_txn_count = metrics.register_counter(
-    "in_flight_requests_db_txn_count", labels=["method", "servlet"],
+in_flight_requests_db_txn_count = Counter(
+    "synapse_http_server_in_flight_requests_db_txn_count", "", ["method", "servlet"]
 )
 
 # seconds spent waiting for db txns, excluding scheduling time, when processing
 # this request
-in_flight_requests_db_txn_duration = metrics.register_counter(
-    "in_flight_requests_db_txn_duration_seconds", labels=["method", "servlet"],
+in_flight_requests_db_txn_duration = Counter(
+    "synapse_http_server_in_flight_requests_db_txn_duration_seconds",
+    "",
+    ["method", "servlet"],
 )
 
 # seconds spent waiting for a db connection, when processing this request
-in_flight_requests_db_sched_duration = metrics.register_counter(
-    "in_flight_requests_db_sched_duration_seconds", labels=["method", "servlet"]
+in_flight_requests_db_sched_duration = Counter(
+    "synapse_http_server_in_flight_requests_db_sched_duration_seconds",
+    "",
+    ["method", "servlet"],
 )
 
-
 # The set of all in flight requests, set[RequestMetrics]
 _in_flight_requests = set()
 
 
-def _collect_in_flight():
-    """Called just before metrics are collected, so we use it to update all
-    the in flight request metrics
-    """
-
-    for rm in _in_flight_requests:
-        rm.update_metrics()
-
-
-metrics.register_collector(_collect_in_flight)
-
-
 def _get_in_flight_counts():
     """Returns a count of all in flight requests by (method, server_name)
 
     Returns:
         dict[tuple[str, str], int]
     """
+    for rm in _in_flight_requests:
+        rm.update_metrics()
 
     # Map from (method, name) -> int, the number of in flight requests of that
     # type
@@ -158,16 +130,17 @@ def _get_in_flight_counts():
     return counts
 
 
-metrics.register_callback(
-    "in_flight_requests_count",
+LaterGauge(
+    "synapse_http_request_metrics_in_flight_requests_count",
+    "",
+    ["method", "servlet"],
     _get_in_flight_counts,
-    labels=["method", "servlet"]
 )
 
 
 class RequestMetrics(object):
-    def start(self, time_msec, name, method):
-        self.start = time_msec
+    def start(self, time_sec, name, method):
+        self.start = time_sec
         self.start_context = LoggingContext.current_context()
         self.name = name
         self.method = method
@@ -176,7 +149,7 @@ class RequestMetrics(object):
 
         _in_flight_requests.add(self)
 
-    def stop(self, time_msec, request):
+    def stop(self, time_sec, request):
         _in_flight_requests.discard(self)
 
         context = LoggingContext.current_context()
@@ -192,34 +165,29 @@ class RequestMetrics(object):
                 )
                 return
 
-        outgoing_responses_counter.inc(request.method, str(request.code))
+        outgoing_responses_counter.labels(request.method, str(request.code)).inc()
 
-        response_count.inc(request.method, self.name, tag)
+        response_count.labels(request.method, self.name, tag).inc()
 
-        response_timer.inc_by(
-            time_msec - self.start, request.method,
-            self.name, tag
+        response_timer.labels(request.method, self.name, tag).observe(
+            time_sec - self.start
         )
 
         ru_utime, ru_stime = context.get_resource_usage()
 
-        response_ru_utime.inc_by(
-            ru_utime, request.method, self.name, tag
-        )
-        response_ru_stime.inc_by(
-            ru_stime, request.method, self.name, tag
-        )
-        response_db_txn_count.inc_by(
-            context.db_txn_count, request.method, self.name, tag
+        response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime)
+        response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime)
+        response_db_txn_count.labels(request.method, self.name, tag).inc(
+            context.db_txn_count
         )
-        response_db_txn_duration.inc_by(
-            context.db_txn_duration_ms / 1000., request.method, self.name, tag
+        response_db_txn_duration.labels(request.method, self.name, tag).inc(
+            context.db_txn_duration_sec
         )
-        response_db_sched_duration.inc_by(
-            context.db_sched_duration_ms / 1000., request.method, self.name, tag
+        response_db_sched_duration.labels(request.method, self.name, tag).inc(
+            context.db_sched_duration_sec
         )
 
-        response_size.inc_by(request.sentLength, request.method, self.name, tag)
+        response_size.labels(request.method, self.name, tag).inc(request.sentLength)
 
         # We always call this at the end to ensure that we update the metrics
         # regardless of whether a call to /metrics while the request was in
@@ -229,27 +197,21 @@ class RequestMetrics(object):
     def update_metrics(self):
         """Updates the in flight metrics with values from this request.
         """
-
         diff = self._request_stats.update(self.start_context)
 
-        in_flight_requests_ru_utime.inc_by(
-            diff.ru_utime, self.method, self.name,
-        )
-
-        in_flight_requests_ru_stime.inc_by(
-            diff.ru_stime, self.method, self.name,
-        )
+        in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime)
+        in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime)
 
-        in_flight_requests_db_txn_count.inc_by(
-            diff.db_txn_count, self.method, self.name,
+        in_flight_requests_db_txn_count.labels(self.method, self.name).inc(
+            diff.db_txn_count
         )
 
-        in_flight_requests_db_txn_duration.inc_by(
-            diff.db_txn_duration_ms / 1000., self.method, self.name,
+        in_flight_requests_db_txn_duration.labels(self.method, self.name).inc(
+            diff.db_txn_duration_sec
         )
 
-        in_flight_requests_db_sched_duration.inc_by(
-            diff.db_sched_duration_ms / 1000., self.method, self.name,
+        in_flight_requests_db_sched_duration.labels(self.method, self.name).inc(
+            diff.db_sched_duration_sec
         )
 
 
@@ -258,17 +220,21 @@ class _RequestStats(object):
     """
 
     __slots__ = [
-        "ru_utime", "ru_stime",
-        "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms",
+        "ru_utime",
+        "ru_stime",
+        "db_txn_count",
+        "db_txn_duration_sec",
+        "db_sched_duration_sec",
     ]
 
-    def __init__(self, ru_utime, ru_stime, db_txn_count,
-                 db_txn_duration_ms, db_sched_duration_ms):
+    def __init__(
+        self, ru_utime, ru_stime, db_txn_count, db_txn_duration_sec, db_sched_duration_sec
+    ):
         self.ru_utime = ru_utime
         self.ru_stime = ru_stime
         self.db_txn_count = db_txn_count
-        self.db_txn_duration_ms = db_txn_duration_ms
-        self.db_sched_duration_ms = db_sched_duration_ms
+        self.db_txn_duration_sec = db_txn_duration_sec
+        self.db_sched_duration_sec = db_sched_duration_sec
 
     @staticmethod
     def from_context(context):
@@ -277,8 +243,8 @@ class _RequestStats(object):
         return _RequestStats(
             ru_utime, ru_stime,
             context.db_txn_count,
-            context.db_txn_duration_ms,
-            context.db_sched_duration_ms,
+            context.db_txn_duration_sec,
+            context.db_sched_duration_sec,
         )
 
     def update(self, context):
@@ -294,14 +260,14 @@ class _RequestStats(object):
             new.ru_utime - self.ru_utime,
             new.ru_stime - self.ru_stime,
             new.db_txn_count - self.db_txn_count,
-            new.db_txn_duration_ms - self.db_txn_duration_ms,
-            new.db_sched_duration_ms - self.db_sched_duration_ms,
+            new.db_txn_duration_sec - self.db_txn_duration_sec,
+            new.db_sched_duration_sec - self.db_sched_duration_sec,
         )
 
         self.ru_utime = new.ru_utime
         self.ru_stime = new.ru_stime
         self.db_txn_count = new.db_txn_count
-        self.db_txn_duration_ms = new.db_txn_duration_ms
-        self.db_sched_duration_ms = new.db_sched_duration_ms
+        self.db_txn_duration_sec = new.db_txn_duration_sec
+        self.db_sched_duration_sec = new.db_sched_duration_sec
 
         return diff
diff --git a/synapse/http/server.py b/synapse/http/server.py
index faf700851a..bc09b8b2be 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -210,8 +210,8 @@ def wrap_request_handler_with_logging(h):
                         # dispatching to the handler, so that the handler
                         # can update the servlet name in the request
                         # metrics
-                        requests_counter.inc(request.method,
-                                             request.request_metrics.name)
+                        requests_counter.labels(request.method,
+                                                request.request_metrics.name).inc()
                         yield d
     return wrapped_request_handler
 
diff --git a/synapse/http/site.py b/synapse/http/site.py
index b608504225..60299657b9 100644
--- a/synapse/http/site.py
+++ b/synapse/http/site.py
@@ -56,7 +56,7 @@ class SynapseRequest(Request):
 
     def __repr__(self):
         # We overwrite this so that we don't log ``access_token``
-        return '<%s at 0x%x method=%s uri=%s clientproto=%s site=%s>' % (
+        return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % (
             self.__class__.__name__,
             id(self),
             self.method,
@@ -83,7 +83,7 @@ class SynapseRequest(Request):
         return Request.render(self, resrc)
 
     def _started_processing(self, servlet_name):
-        self.start_time = int(time.time() * 1000)
+        self.start_time = time.time()
         self.request_metrics = RequestMetrics()
         self.request_metrics.start(
             self.start_time, name=servlet_name, method=self.method,
@@ -102,26 +102,26 @@ class SynapseRequest(Request):
             context = LoggingContext.current_context()
             ru_utime, ru_stime = context.get_resource_usage()
             db_txn_count = context.db_txn_count
-            db_txn_duration_ms = context.db_txn_duration_ms
-            db_sched_duration_ms = context.db_sched_duration_ms
+            db_txn_duration_sec = context.db_txn_duration_sec
+            db_sched_duration_sec = context.db_sched_duration_sec
         except Exception:
             ru_utime, ru_stime = (0, 0)
-            db_txn_count, db_txn_duration_ms = (0, 0)
+            db_txn_count, db_txn_duration_sec = (0, 0)
 
-        end_time = int(time.time() * 1000)
+        end_time = time.time()
 
         self.site.access_logger.info(
             "%s - %s - {%s}"
-            " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)"
+            " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)"
             " %sB %s \"%s %s %s\" \"%s\"",
             self.getClientIP(),
             self.site.site_tag,
             self.authenticated_entity,
             end_time - self.start_time,
-            int(ru_utime * 1000),
-            int(ru_stime * 1000),
-            db_sched_duration_ms,
-            db_txn_duration_ms,
+            ru_utime,
+            ru_stime,
+            db_sched_duration_sec,
+            db_txn_duration_sec,
             int(db_txn_count),
             self.sentLength,
             self.code,