diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 54f35900f8..f27150d411 100755
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -52,6 +52,7 @@ from synapse.config.homeserver import HomeServerConfig
from synapse.crypto import context_factory
from synapse.util.logcontext import LoggingContext
from synapse.metrics import register_memory_metrics, get_metrics_for
+from synapse.metrics.process_collector import register_process_collector
from synapse.metrics.resource import MetricsResource, METRICS_PREFIX
from synapse.replication.resource import ReplicationResource, REPLICATION_PREFIX
from synapse.federation.transport.server import TransportLayerServer
@@ -337,6 +338,7 @@ def setup(config_options):
hs.get_replication_layer().start_get_pdu_cache()
register_memory_metrics(hs)
+ register_process_collector()
reactor.callWhenRunning(start)
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 76d5998d75..a6b868775d 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -13,14 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Because otherwise 'resource' collides with synapse.metrics.resource
-from __future__ import absolute_import
-
import logging
-from resource import getrusage, RUSAGE_SELF
import functools
-import os
-import stat
import time
import gc
@@ -36,6 +30,7 @@ logger = logging.getLogger(__name__)
all_metrics = []
+all_collectors = []
class Metrics(object):
@@ -46,6 +41,9 @@ class Metrics(object):
def __init__(self, name):
self.name_prefix = name
+ def register_collector(self, func):
+ all_collectors.append(func)
+
def _register(self, metric_class, name, *args, **kwargs):
full_name = "%s_%s" % (self.name_prefix, name)
@@ -94,8 +92,8 @@ def get_metrics_for(pkg_name):
def render_all():
strs = []
- # TODO(paul): Internal hack
- update_resource_metrics()
+ for collector in all_collectors:
+ collector()
for metric in all_metrics:
try:
@@ -109,62 +107,6 @@ def render_all():
return "\n".join(strs)
-# Now register some standard process-wide state metrics, to give indications of
-# process resource usage
-
-rusage = None
-
-
-def update_resource_metrics():
- global rusage
- rusage = getrusage(RUSAGE_SELF)
-
-resource_metrics = get_metrics_for("process.resource")
-
-# msecs
-resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000)
-resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000)
-
-# kilobytes
-resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024)
-
-TYPES = {
- stat.S_IFSOCK: "SOCK",
- stat.S_IFLNK: "LNK",
- stat.S_IFREG: "REG",
- stat.S_IFBLK: "BLK",
- stat.S_IFDIR: "DIR",
- stat.S_IFCHR: "CHR",
- stat.S_IFIFO: "FIFO",
-}
-
-
-def _process_fds():
- counts = {(k,): 0 for k in TYPES.values()}
- counts[("other",)] = 0
-
- # Not every OS will have a /proc/self/fd directory
- if not os.path.exists("/proc/self/fd"):
- return counts
-
- for fd in os.listdir("/proc/self/fd"):
- try:
- s = os.stat("/proc/self/fd/%s" % (fd))
- fmt = stat.S_IFMT(s.st_mode)
- if fmt in TYPES:
- t = TYPES[fmt]
- else:
- t = "other"
-
- counts[(t,)] += 1
- except OSError:
- # the dirh itself used by listdir() is usually missing by now
- pass
-
- return counts
-
-get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"])
-
reactor_metrics = get_metrics_for("reactor")
tick_time = reactor_metrics.register_distribution("tick_time")
pending_calls_metric = reactor_metrics.register_distribution("pending_calls")
diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py
index e81af29895..e87b2b80a7 100644
--- a/synapse/metrics/metric.py
+++ b/synapse/metrics/metric.py
@@ -98,9 +98,9 @@ class CallbackMetric(BaseMetric):
value = self.callback()
if self.is_scalar():
- return ["%s %d" % (self.name, value)]
+ return ["%s %.12g" % (self.name, value)]
- return ["%s%s %d" % (self.name, self._render_key(k), value[k])
+ return ["%s%s %.12g" % (self.name, self._render_key(k), value[k])
for k in sorted(value.keys())]
diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py
new file mode 100644
index 0000000000..1c851d9234
--- /dev/null
+++ b/synapse/metrics/process_collector.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015, 2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Because otherwise 'resource' collides with synapse.metrics.resource
+from __future__ import absolute_import
+
+import os
+import stat
+from resource import getrusage, RUSAGE_SELF
+
+from synapse.metrics import get_metrics_for
+
+
+TICKS_PER_SEC = 100
+BYTES_PER_PAGE = 4096
+
+HAVE_PROC_STAT = os.path.exists("/proc/stat")
+HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
+HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits")
+HAVE_PROC_SELF_FD = os.path.exists("/proc/self/fd")
+
+TYPES = {
+ stat.S_IFSOCK: "SOCK",
+ stat.S_IFLNK: "LNK",
+ stat.S_IFREG: "REG",
+ stat.S_IFBLK: "BLK",
+ stat.S_IFDIR: "DIR",
+ stat.S_IFCHR: "CHR",
+ stat.S_IFIFO: "FIFO",
+}
+
+# Field indexes from /proc/self/stat, taken from the proc(5) manpage
+STAT_FIELDS = {
+ "utime": 14,
+ "stime": 15,
+ "starttime": 22,
+ "vsize": 23,
+ "rss": 24,
+}
+
+
+rusage = None
+stats = {}
+fd_counts = None
+
+# In order to report process_start_time_seconds we need to know the
+# machine's boot time, because the value in /proc/self/stat is relative to
+# this
+boot_time = None
+if HAVE_PROC_STAT:
+ with open("/proc/stat") as _procstat:
+ for line in _procstat:
+ if line.startswith("btime "):
+ boot_time = int(line.split()[1])
+
+
+def update_resource_metrics():
+ global rusage
+ rusage = getrusage(RUSAGE_SELF)
+
+ if HAVE_PROC_SELF_STAT:
+ global stats
+ with open("/proc/self/stat") as s:
+ line = s.read()
+ # line is PID (command) more stats go here ...
+ raw_stats = line.split(") ", 1)[1].split(" ")
+
+ for (name, index) in STAT_FIELDS.iteritems():
+ # subtract 3 from the index, because proc(5) is 1-based, and
+ # we've lost the first two fields in PID and COMMAND above
+ stats[name] = int(raw_stats[index - 3])
+
+ global fd_counts
+ fd_counts = _process_fds()
+
+
+def _process_fds():
+ counts = {(k,): 0 for k in TYPES.values()}
+ counts[("other",)] = 0
+
+ # Not every OS will have a /proc/self/fd directory
+ if not HAVE_PROC_SELF_FD:
+ return counts
+
+ for fd in os.listdir("/proc/self/fd"):
+ try:
+ s = os.stat("/proc/self/fd/%s" % (fd))
+ fmt = stat.S_IFMT(s.st_mode)
+ if fmt in TYPES:
+ t = TYPES[fmt]
+ else:
+ t = "other"
+
+ counts[(t,)] += 1
+ except OSError:
+ # the dirh itself used by listdir() is usually missing by now
+ pass
+
+ return counts
+
+
+def register_process_collector():
+ # Legacy synapse-invented metric names
+
+ resource_metrics = get_metrics_for("process.resource")
+
+ resource_metrics.register_collector(update_resource_metrics)
+
+ # msecs
+ resource_metrics.register_callback("utime", lambda: rusage.ru_utime * 1000)
+ resource_metrics.register_callback("stime", lambda: rusage.ru_stime * 1000)
+
+ # kilobytes
+ resource_metrics.register_callback("maxrss", lambda: rusage.ru_maxrss * 1024)
+
+ get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"])
+
+ # New prometheus-standard metric names
+
+ process_metrics = get_metrics_for("process")
+
+ if HAVE_PROC_SELF_STAT:
+ process_metrics.register_callback(
+ "cpu_user_seconds_total",
+ lambda: float(stats["utime"]) / TICKS_PER_SEC
+ )
+ process_metrics.register_callback(
+ "cpu_system_seconds_total",
+ lambda: float(stats["stime"]) / TICKS_PER_SEC
+ )
+ process_metrics.register_callback(
+ "cpu_seconds_total",
+ lambda: (float(stats["utime"] + stats["stime"])) / TICKS_PER_SEC
+ )
+
+ process_metrics.register_callback(
+ "virtual_memory_bytes",
+ lambda: int(stats["vsize"])
+ )
+ process_metrics.register_callback(
+ "resident_memory_bytes",
+ lambda: int(stats["rss"]) * BYTES_PER_PAGE
+ )
+
+ process_metrics.register_callback(
+ "start_time_seconds",
+ lambda: boot_time + int(stats["starttime"]) / TICKS_PER_SEC
+ )
+
+ if HAVE_PROC_SELF_FD:
+ process_metrics.register_callback(
+ "open_fds",
+ lambda: sum(fd_counts.values())
+ )
+
+ if HAVE_PROC_SELF_LIMITS:
+ def _get_max_fds():
+ with open("/proc/self/limits") as limits:
+ for line in limits:
+ if not line.startswith("Max open files "):
+ continue
+ # Line is Max open files $SOFT $HARD
+ return int(line.split()[3])
+ return None
+
+ process_metrics.register_callback(
+ "max_fds",
+ lambda: _get_max_fds()
+ )
|