From 3ae48a1f991a98292df326d56b545d08ed54b135 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 18:10:24 +0100 Subject: Move the process metrics collector code into its own file --- synapse/metrics/process_collector.py | 159 +++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 synapse/metrics/process_collector.py (limited to 'synapse/metrics/process_collector.py') diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py new file mode 100644 index 0000000000..4145694c5c --- /dev/null +++ b/synapse/metrics/process_collector.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# Copyright 2015, 2016 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Because otherwise 'resource' collides with synapse.metrics.resource +from __future__ import absolute_import + +import os +import stat +from resource import getrusage, RUSAGE_SELF + +from synapse.metrics import get_metrics_for + + +TICKS_PER_SEC = 100 +BYTES_PER_PAGE = 4096 + +HAVE_PROC_STAT = os.path.exists("/proc/stat") +HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") +HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits") +HAVE_PROC_SELF_FD = os.path.exists("/proc/self/fd") + +TYPES = { + stat.S_IFSOCK: "SOCK", + stat.S_IFLNK: "LNK", + stat.S_IFREG: "REG", + stat.S_IFBLK: "BLK", + stat.S_IFDIR: "DIR", + stat.S_IFCHR: "CHR", + stat.S_IFIFO: "FIFO", +} + + +rusage = None +stats = None +fd_counts = None + +# In order to report process_start_time_seconds we need to know the +# machine's boot time, because the value in /proc/self/stat is relative to +# this +boot_time = None +if HAVE_PROC_STAT: + with open("/proc/stat") as _procstat: + for line in _procstat: + if line.startswith("btime "): + boot_time = int(line.split()[1]) + + +def update_resource_metrics(): + global rusage + rusage = getrusage(RUSAGE_SELF) + + if HAVE_PROC_SELF_STAT: + global stats + with open("/proc/self/stat") as s: + line = s.read() + # line is PID (command) more stats go here ... + stats = line.split(") ", 1)[1].split(" ") + + global fd_counts + fd_counts = _process_fds() + + +def _process_fds(): + counts = {(k,): 0 for k in TYPES.values()} + counts[("other",)] = 0 + + # Not every OS will have a /proc/self/fd directory + if not HAVE_PROC_SELF_FD: + return counts + + for fd in os.listdir("/proc/self/fd"): + try: + s = os.stat("/proc/self/fd/%s" % (fd)) + fmt = stat.S_IFMT(s.st_mode) + if fmt in TYPES: + t = TYPES[fmt] + else: + t = "other" + + counts[(t,)] += 1 + except OSError: + # the dirh itself used by listdir() is usually missing by now + pass + + return counts + + +def register_process_collector(): + # Legacy synapse-invented metric names + + resource_metrics = get_metrics_for("process.resource") + + resource_metrics.register_collector(update_resource_metrics) + + # msecs + resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000) + resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000) + + # kilobytes + resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024) + + get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"]) + + # New prometheus-standard metric names + + process_metrics = get_metrics_for("process") + + if HAVE_PROC_SELF_STAT: + process_metrics.register_callback( + "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC + ) + process_metrics.register_callback( + "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC + ) + process_metrics.register_callback( + "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC + ) + + process_metrics.register_callback( + "virtual_memory_bytes", lambda: int(stats[20]) + ) + process_metrics.register_callback( + "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE + ) + + process_metrics.register_callback( + "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC + ) + + if HAVE_PROC_SELF_FD: + process_metrics.register_callback( + "open_fds", lambda: sum(fd_counts.values()) + ) + + if HAVE_PROC_SELF_LIMITS: + def _get_max_fds(): + with open("/proc/self/limits") as limits: + for line in limits: + if not line.startswith("Max open files "): + continue + # Line is Max open files $SOFT $HARD + return int(line.split()[3]) + return None + + process_metrics.register_callback( + "max_fds", lambda: _get_max_fds() + ) -- cgit 1.4.1 From 6453d03edd0765b4f403bf324588df59328623e5 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 18:21:40 +0100 Subject: Cut the raw /proc/self/stat line up into named fields at collection time --- synapse/metrics/process_collector.py | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'synapse/metrics/process_collector.py') diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py index 4145694c5c..f31aa79c5c 100644 --- a/synapse/metrics/process_collector.py +++ b/synapse/metrics/process_collector.py @@ -41,9 +41,18 @@ TYPES = { stat.S_IFIFO: "FIFO", } +# Field indexes from /proc/self/stat, taken from the proc(5) manpage +STAT_FIELDS = { + "utime": 14, + "stime": 15, + "starttime": 22, + "vsize": 23, + "rss": 24, +} + rusage = None -stats = None +stats = {} fd_counts = None # In order to report process_start_time_seconds we need to know the @@ -66,7 +75,12 @@ def update_resource_metrics(): with open("/proc/self/stat") as s: line = s.read() # line is PID (command) more stats go here ... - stats = line.split(") ", 1)[1].split(" ") + raw_stats = line.split(") ", 1)[1].split(" ") + + for (name, index) in STAT_FIELDS.iteritems(): + # subtract 3 from the index, because proc(5) is 1-based, and + # we've lost the first two fields in PID and COMMAND above + stats[name] = int(raw_stats[index - 3]) global fd_counts fd_counts = _process_fds() @@ -119,24 +133,24 @@ def register_process_collector(): if HAVE_PROC_SELF_STAT: process_metrics.register_callback( - "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC + "cpu_user_seconds_total", lambda: float(stats["utime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC + "cpu_system_seconds_total", lambda: float(stats["stime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC + "cpu_seconds_total", lambda: (float(stats["utime"]) + float(stats["stime"])) / TICKS_PER_SEC ) process_metrics.register_callback( - "virtual_memory_bytes", lambda: int(stats[20]) + "virtual_memory_bytes", lambda: int(stats["vsize"]) ) process_metrics.register_callback( - "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE + "resident_memory_bytes", lambda: int(stats["rss"]) * BYTES_PER_PAGE ) process_metrics.register_callback( - "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC + "start_time_seconds", lambda: boot_time + int(stats["starttime"]) / TICKS_PER_SEC ) if HAVE_PROC_SELF_FD: -- cgit 1.4.1 From 1071c7d9634ac25aff657cee5a8dbfbb80128c71 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 18:23:25 +0100 Subject: Adjust code for <100 char line limit --- synapse/metrics/process_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/metrics/process_collector.py') diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py index f31aa79c5c..f8542affd5 100644 --- a/synapse/metrics/process_collector.py +++ b/synapse/metrics/process_collector.py @@ -139,7 +139,7 @@ def register_process_collector(): "cpu_system_seconds_total", lambda: float(stats["stime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats["utime"]) + float(stats["stime"])) / TICKS_PER_SEC + "cpu_seconds_total", lambda: (float(stats["utime"] + stats["stime"])) / TICKS_PER_SEC ) process_metrics.register_callback( -- cgit 1.4.1 From b01aaadd4892d0edabbd241d1314921d8b47cec0 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 18:26:13 +0100 Subject: Split callback metric lambda functions down onto their own lines to keep line lengths under 90 --- synapse/metrics/process_collector.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'synapse/metrics/process_collector.py') diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py index f8542affd5..1c851d9234 100644 --- a/synapse/metrics/process_collector.py +++ b/synapse/metrics/process_collector.py @@ -133,29 +133,36 @@ def register_process_collector(): if HAVE_PROC_SELF_STAT: process_metrics.register_callback( - "cpu_user_seconds_total", lambda: float(stats["utime"]) / TICKS_PER_SEC + "cpu_user_seconds_total", + lambda: float(stats["utime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_system_seconds_total", lambda: float(stats["stime"]) / TICKS_PER_SEC + "cpu_system_seconds_total", + lambda: float(stats["stime"]) / TICKS_PER_SEC ) process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats["utime"] + stats["stime"])) / TICKS_PER_SEC + "cpu_seconds_total", + lambda: (float(stats["utime"] + stats["stime"])) / TICKS_PER_SEC ) process_metrics.register_callback( - "virtual_memory_bytes", lambda: int(stats["vsize"]) + "virtual_memory_bytes", + lambda: int(stats["vsize"]) ) process_metrics.register_callback( - "resident_memory_bytes", lambda: int(stats["rss"]) * BYTES_PER_PAGE + "resident_memory_bytes", + lambda: int(stats["rss"]) * BYTES_PER_PAGE ) process_metrics.register_callback( - "start_time_seconds", lambda: boot_time + int(stats["starttime"]) / TICKS_PER_SEC + "start_time_seconds", + lambda: boot_time + int(stats["starttime"]) / TICKS_PER_SEC ) if HAVE_PROC_SELF_FD: process_metrics.register_callback( - "open_fds", lambda: sum(fd_counts.values()) + "open_fds", + lambda: sum(fd_counts.values()) ) if HAVE_PROC_SELF_LIMITS: @@ -169,5 +176,6 @@ def register_process_collector(): return None process_metrics.register_callback( - "max_fds", lambda: _get_max_fds() + "max_fds", + lambda: _get_max_fds() ) -- cgit 1.4.1 From aac13b1f9a940f5661d5989e3045b4fc086f1e20 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 27 Oct 2016 18:08:15 +0100 Subject: Pass the Metrics group into the process collector instead of having it find its own one; this avoids it needing to import from synapse.metrics --- synapse/app/homeserver.py | 2 +- synapse/metrics/process_collector.py | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'synapse/metrics/process_collector.py') diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index f27150d411..6766befc4e 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -338,7 +338,7 @@ def setup(config_options): hs.get_replication_layer().start_get_pdu_cache() register_memory_metrics(hs) - register_process_collector() + register_process_collector(get_metrics_for("process")) reactor.callWhenRunning(start) diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py index 1c851d9234..0e95582368 100644 --- a/synapse/metrics/process_collector.py +++ b/synapse/metrics/process_collector.py @@ -20,8 +20,6 @@ import os import stat from resource import getrusage, RUSAGE_SELF -from synapse.metrics import get_metrics_for - TICKS_PER_SEC = 100 BYTES_PER_PAGE = 4096 @@ -111,10 +109,10 @@ def _process_fds(): return counts -def register_process_collector(): +def register_process_collector(process_metrics): # Legacy synapse-invented metric names - resource_metrics = get_metrics_for("process.resource") + resource_metrics = process_metrics.make_subspace("resource") resource_metrics.register_collector(update_resource_metrics) @@ -125,12 +123,10 @@ def register_process_collector(): # kilobytes resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024) - get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"]) + process_metrics.register_callback("fds", _process_fds, labels=["type"]) # New prometheus-standard metric names - process_metrics = get_metrics_for("process") - if HAVE_PROC_SELF_STAT: process_metrics.register_callback( "cpu_user_seconds_total", -- cgit 1.4.1