summary refs log tree commit diff
path: root/synapse/util/metrics.py
diff options
context:
space:
mode:
authorNeil Johnson <neil@matrix.org>2018-06-06 12:27:33 +0100
committerNeil Johnson <neil@matrix.org>2018-06-06 12:27:33 +0100
commit752b7b32ed1c33651c0c64fbbb4289c3b62ac89b (patch)
treee120f6fffa36a2d4f6eae37f555be078940b7854 /synapse/util/metrics.py
parentMerge pull request #3290 from rubo77/patch-7 (diff)
parent7 char sha in changelog (diff)
downloadsynapse-752b7b32ed1c33651c0c64fbbb4289c3b62ac89b.tar.xz
Merge tag 'v0.31.0'
Changes in synapse v0.31.0 (2018-06-06)
======================================

Most notable change from v0.30.0 is to switch to python prometheus library to improve system
stats reporting. WARNING this changes a number of prometheus metrics in a
backwards-incompatible manner. For more details, see
`docs/metrics-howto.rst <docs/metrics-howto.rst#removal-of-deprecated-metrics--time-based-counters-becoming-histograms-in-0310>`_.

Bug Fixes:

* Fix metric documentation tables (PR #3341)
* Fix LaterGuage error handling (694968f)
* Fix replication metrics (b7e7fd2)

Changes in synapse v0.31.0-rc1 (2018-06-04)
==========================================

Features:

* Switch to the Python Prometheus library (PR #3256, #3274)
* Let users leave the server notice room after joining (PR #3287)

Changes:

* daily user type phone home stats (PR #3264)
* Use iter* methods for _filter_events_for_server (PR #3267)
* Docs on consent bits (PR #3268)
* Remove users from user directory on deactivate (PR #3277)
* Avoid sending consent notice to guest users (PR #3288)
* disable CPUMetrics if no /proc/self/stat (PR #3299)
* Add local and loopback IPv6 addresses to url_preview_ip_range_blacklist (PR #3312) Thanks to @thegcat!
* Consistently use six's iteritems and wrap lazy keys/values in list() if they're not meant to be lazy (PR #3307)
* Add private IPv6 addresses to example config for url preview blacklist (PR #3317) Thanks to @thegcat!
* Reduce stuck read-receipts: ignore depth when updating (PR #3318)
* Put python's logs into Trial when running unit tests (PR #3319)

Changes, python 3 migration:

* Replace some more comparisons with six (PR #3243) Thanks to @NotAFile!
* replace some iteritems with six (PR #3244) Thanks to @NotAFile!
* Add batch_iter to utils (PR #3245) Thanks to @NotAFile!
* use repr, not str (PR #3246) Thanks to @NotAFile!
* Misc Python3 fixes (PR #3247) Thanks to @NotAFile!
* Py3 storage/_base.py (PR #3278) Thanks to @NotAFile!
* more six iteritems (PR #3279) Thanks to @NotAFile!
* More Misc. py3 fixes (PR #3280) Thanks to @NotAFile!
* remaining isintance fixes (PR #3281) Thanks to @NotAFile!
* py3-ize state.py (PR #3283) Thanks to @NotAFile!
* extend tox testing for py3 to avoid regressions (PR #3302) Thanks to @krombel!
* use memoryview in py3 (PR #3303) Thanks to @NotAFile!

Bugs:

* Fix federation backfill bugs (PR #3261)
* federation: fix LaterGauge usage (PR #3328) Thanks to @intelfx!
Diffstat (limited to 'synapse/util/metrics.py')
-rw-r--r--synapse/util/metrics.py106
1 files changed, 30 insertions, 76 deletions
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index e4b5687a4b..1ba7d65c7c 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -15,8 +15,8 @@
 
 from twisted.internet import defer
 
+from prometheus_client import Counter
 from synapse.util.logcontext import LoggingContext
-import synapse.metrics
 
 from functools import wraps
 import logging
@@ -24,66 +24,26 @@ import logging
 
 logger = logging.getLogger(__name__)
 
+block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"])
 
-metrics = synapse.metrics.get_metrics_for(__name__)
-
-# total number of times we have hit this block
-block_counter = metrics.register_counter(
-    "block_count",
-    labels=["block_name"],
-    alternative_names=(
-        # the following are all deprecated aliases for the same metric
-        metrics.name_prefix + x for x in (
-            "_block_timer:count",
-            "_block_ru_utime:count",
-            "_block_ru_stime:count",
-            "_block_db_txn_count:count",
-            "_block_db_txn_duration:count",
-        )
-    )
-)
-
-block_timer = metrics.register_counter(
-    "block_time_seconds",
-    labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_timer:total",
-    ),
-)
-
-block_ru_utime = metrics.register_counter(
-    "block_ru_utime_seconds", labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_ru_utime:total",
-    ),
-)
-
-block_ru_stime = metrics.register_counter(
-    "block_ru_stime_seconds", labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_ru_stime:total",
-    ),
-)
-
-block_db_txn_count = metrics.register_counter(
-    "block_db_txn_count", labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_db_txn_count:total",
-    ),
-)
+block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"])
+
+block_ru_utime = Counter(
+    "synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"])
+
+block_ru_stime = Counter(
+    "synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"])
+
+block_db_txn_count = Counter(
+    "synapse_util_metrics_block_db_txn_count", "", ["block_name"])
 
 # seconds spent waiting for db txns, excluding scheduling time, in this block
-block_db_txn_duration = metrics.register_counter(
-    "block_db_txn_duration_seconds", labels=["block_name"],
-    alternative_names=(
-        metrics.name_prefix + "_block_db_txn_duration:total",
-    ),
-)
+block_db_txn_duration = Counter(
+    "synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"])
 
 # seconds spent waiting for a db connection, in this block
-block_db_sched_duration = metrics.register_counter(
-    "block_db_sched_duration_seconds", labels=["block_name"],
-)
+block_db_sched_duration = Counter(
+    "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"])
 
 
 def measure_func(name):
@@ -102,7 +62,7 @@ class Measure(object):
     __slots__ = [
         "clock", "name", "start_context", "start", "new_context", "ru_utime",
         "ru_stime",
-        "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms",
+        "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec",
         "created_context",
     ]
 
@@ -114,7 +74,7 @@ class Measure(object):
         self.created_context = False
 
     def __enter__(self):
-        self.start = self.clock.time_msec()
+        self.start = self.clock.time()
         self.start_context = LoggingContext.current_context()
         if not self.start_context:
             self.start_context = LoggingContext("Measure")
@@ -123,17 +83,17 @@ class Measure(object):
 
         self.ru_utime, self.ru_stime = self.start_context.get_resource_usage()
         self.db_txn_count = self.start_context.db_txn_count
-        self.db_txn_duration_ms = self.start_context.db_txn_duration_ms
-        self.db_sched_duration_ms = self.start_context.db_sched_duration_ms
+        self.db_txn_duration_sec = self.start_context.db_txn_duration_sec
+        self.db_sched_duration_sec = self.start_context.db_sched_duration_sec
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         if isinstance(exc_type, Exception) or not self.start_context:
             return
 
-        duration = self.clock.time_msec() - self.start
+        duration = self.clock.time() - self.start
 
-        block_counter.inc(self.name)
-        block_timer.inc_by(duration, self.name)
+        block_counter.labels(self.name).inc()
+        block_timer.labels(self.name).inc(duration)
 
         context = LoggingContext.current_context()
 
@@ -150,19 +110,13 @@ class Measure(object):
 
         ru_utime, ru_stime = context.get_resource_usage()
 
-        block_ru_utime.inc_by(ru_utime - self.ru_utime, self.name)
-        block_ru_stime.inc_by(ru_stime - self.ru_stime, self.name)
-        block_db_txn_count.inc_by(
-            context.db_txn_count - self.db_txn_count, self.name
-        )
-        block_db_txn_duration.inc_by(
-            (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.,
-            self.name
-        )
-        block_db_sched_duration.inc_by(
-            (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.,
-            self.name
-        )
+        block_ru_utime.labels(self.name).inc(ru_utime - self.ru_utime)
+        block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime)
+        block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count)
+        block_db_txn_duration.labels(self.name).inc(
+            context.db_txn_duration_sec - self.db_txn_duration_sec)
+        block_db_sched_duration.labels(self.name).inc(
+            context.db_sched_duration_sec - self.db_sched_duration_sec)
 
         if self.created_context:
             self.start_context.__exit__(exc_type, exc_val, exc_tb)