summary refs log tree commit diff
diff options
context:
space:
mode:
authorPaul Evans <leonerd@leonerd.org.uk>2016-11-03 17:22:15 +0000
committerGitHub <noreply@github.com>2016-11-03 17:22:15 +0000
commit7fc2b5c06329ef3bf97ec30aa60ce801596786a3 (patch)
tree1b94859c119edd096b540dd787d1e1f03baf386e
parentMerge pull request #1192 from matrix-org/erikj/postgres_gist (diff)
parentFix copypasto error in metric rename table in docs (diff)
downloadsynapse-7fc2b5c06329ef3bf97ec30aa60ce801596786a3.tar.xz
Merge pull request #1193 from matrix-org/paul/metrics
More updates to Promethese metrics exposition
-rw-r--r--docs/metrics-howto.rst8
-rw-r--r--synapse/metrics/__init__.py16
-rw-r--r--synapse/metrics/process_collector.py65
3 files changed, 18 insertions, 71 deletions
diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst
index 7aa4757a35..ca10799b00 100644
--- a/docs/metrics-howto.rst
+++ b/docs/metrics-howto.rst
@@ -51,9 +51,9 @@ python_gc_counts            reactor_gc_counts
 
 The twisted-specific reactor metrics have been renamed.
 
-==================================== =================
+==================================== =====================
 New name                             Old name
------------------------------------- -----------------
-python_twisted_reactor_pending_calls reactor_tick_time
+------------------------------------ ---------------------
+python_twisted_reactor_pending_calls reactor_pending_calls
 python_twisted_reactor_tick_time     reactor_tick_time
-==================================== =================
+==================================== =====================
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 7041da25ce..2265e6e8d6 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -111,18 +111,20 @@ def render_all():
     return "\n".join(strs)
 
 
-reactor_metrics = get_metrics_for("reactor")
-tick_time = reactor_metrics.register_distribution("tick_time")
-pending_calls_metric = reactor_metrics.register_distribution("pending_calls")
+register_process_collector(get_metrics_for("process"))
+
 
-gc_time = reactor_metrics.register_distribution("gc_time", labels=["gen"])
-gc_unreachable = reactor_metrics.register_counter("gc_unreachable", labels=["gen"])
+python_metrics = get_metrics_for("python")
 
-reactor_metrics.register_callback(
+gc_time = python_metrics.register_distribution("gc_time", labels=["gen"])
+gc_unreachable = python_metrics.register_counter("gc_unreachable_total", labels=["gen"])
+python_metrics.register_callback(
     "gc_counts", lambda: {(i,): v for i, v in enumerate(gc.get_count())}, labels=["gen"]
 )
 
-register_process_collector(get_metrics_for("process"))
+reactor_metrics = get_metrics_for("python.twisted.reactor")
+tick_time = reactor_metrics.register_distribution("tick_time")
+pending_calls_metric = reactor_metrics.register_distribution("pending_calls")
 
 
 def runUntilCurrentTimer(func):
diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py
index 0e95582368..6fec3de399 100644
--- a/synapse/metrics/process_collector.py
+++ b/synapse/metrics/process_collector.py
@@ -13,12 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Because otherwise 'resource' collides with synapse.metrics.resource
-from __future__ import absolute_import
-
 import os
-import stat
-from resource import getrusage, RUSAGE_SELF
 
 
 TICKS_PER_SEC = 100
@@ -29,16 +24,6 @@ HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
 HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits")
 HAVE_PROC_SELF_FD = os.path.exists("/proc/self/fd")
 
-TYPES = {
-    stat.S_IFSOCK: "SOCK",
-    stat.S_IFLNK: "LNK",
-    stat.S_IFREG: "REG",
-    stat.S_IFBLK: "BLK",
-    stat.S_IFDIR: "DIR",
-    stat.S_IFCHR: "CHR",
-    stat.S_IFIFO: "FIFO",
-}
-
 # Field indexes from /proc/self/stat, taken from the proc(5) manpage
 STAT_FIELDS = {
     "utime": 14,
@@ -49,9 +34,7 @@ STAT_FIELDS = {
 }
 
 
-rusage = None
 stats = {}
-fd_counts = None
 
 # In order to report process_start_time_seconds we need to know the
 # machine's boot time, because the value in /proc/self/stat is relative to
@@ -65,9 +48,6 @@ if HAVE_PROC_STAT:
 
 
 def update_resource_metrics():
-    global rusage
-    rusage = getrusage(RUSAGE_SELF)
-
     if HAVE_PROC_SELF_STAT:
         global stats
         with open("/proc/self/stat") as s:
@@ -80,52 +60,17 @@ def update_resource_metrics():
                 # we've lost the first two fields in PID and COMMAND above
                 stats[name] = int(raw_stats[index - 3])
 
-    global fd_counts
-    fd_counts = _process_fds()
-
-
-def _process_fds():
-    counts = {(k,): 0 for k in TYPES.values()}
-    counts[("other",)] = 0
 
+def _count_fds():
     # Not every OS will have a /proc/self/fd directory
     if not HAVE_PROC_SELF_FD:
-        return counts
-
-    for fd in os.listdir("/proc/self/fd"):
-        try:
-            s = os.stat("/proc/self/fd/%s" % (fd))
-            fmt = stat.S_IFMT(s.st_mode)
-            if fmt in TYPES:
-                t = TYPES[fmt]
-            else:
-                t = "other"
+        return 0
 
-            counts[(t,)] += 1
-        except OSError:
-            # the dirh itself used by listdir() is usually missing by now
-            pass
-
-    return counts
+    return len(os.listdir("/proc/self/fd"))
 
 
 def register_process_collector(process_metrics):
-    # Legacy synapse-invented metric names
-
-    resource_metrics = process_metrics.make_subspace("resource")
-
-    resource_metrics.register_collector(update_resource_metrics)
-
-    # msecs
-    resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000)
-    resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000)
-
-    # kilobytes
-    resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024)
-
-    process_metrics.register_callback("fds", _process_fds, labels=["type"])
-
-    # New prometheus-standard metric names
+    process_metrics.register_collector(update_resource_metrics)
 
     if HAVE_PROC_SELF_STAT:
         process_metrics.register_callback(
@@ -158,7 +103,7 @@ def register_process_collector(process_metrics):
     if HAVE_PROC_SELF_FD:
         process_metrics.register_callback(
             "open_fds",
-            lambda: sum(fd_counts.values())
+            lambda: _count_fds()
         )
 
     if HAVE_PROC_SELF_LIMITS: