diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 2265e6e8d6..a9158fc066 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -13,118 +13,198 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
import functools
-import time
import gc
+import logging
+import os
+import platform
+import time
-from twisted.internet import reactor
-
-from .metric import (
- CounterMetric, CallbackMetric, DistributionMetric, CacheMetric,
- MemoryUsageMetric,
-)
-from .process_collector import register_process_collector
+import attr
+from prometheus_client import Counter, Gauge, Histogram
+from prometheus_client.core import REGISTRY, GaugeMetricFamily
+from twisted.internet import reactor
logger = logging.getLogger(__name__)
-
+running_on_pypy = platform.python_implementation() == "PyPy"
all_metrics = []
all_collectors = []
+all_gauges = {}
+HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
-class Metrics(object):
- """ A single Metrics object gives a (mutable) slice view of the all_metrics
- dict, allowing callers to easily register new metrics that are namespaced
- nicely."""
- def __init__(self, name):
- self.name_prefix = name
+class RegistryProxy(object):
- def make_subspace(self, name):
- return Metrics("%s_%s" % (self.name_prefix, name))
+ @staticmethod
+ def collect():
+ for metric in REGISTRY.collect():
+ if not metric.name.startswith("__"):
+ yield metric
- def register_collector(self, func):
- all_collectors.append(func)
- def _register(self, metric_class, name, *args, **kwargs):
- full_name = "%s_%s" % (self.name_prefix, name)
+@attr.s(hash=True)
+class LaterGauge(object):
- metric = metric_class(full_name, *args, **kwargs)
+ name = attr.ib()
+ desc = attr.ib()
+ labels = attr.ib(hash=False)
+ caller = attr.ib()
- all_metrics.append(metric)
- return metric
+ def collect(self):
- def register_counter(self, *args, **kwargs):
- return self._register(CounterMetric, *args, **kwargs)
+ g = GaugeMetricFamily(self.name, self.desc, labels=self.labels)
- def register_callback(self, *args, **kwargs):
- return self._register(CallbackMetric, *args, **kwargs)
+ try:
+ calls = self.caller()
+ except Exception:
+ logger.exception(
+ "Exception running callback for LaterGauge(%s)",
+ self.name,
+ )
+ yield g
+ return
- def register_distribution(self, *args, **kwargs):
- return self._register(DistributionMetric, *args, **kwargs)
+ if isinstance(calls, dict):
+ for k, v in calls.items():
+ g.add_metric(k, v)
+ else:
+ g.add_metric([], calls)
- def register_cache(self, *args, **kwargs):
- return self._register(CacheMetric, *args, **kwargs)
+ yield g
+ def __attrs_post_init__(self):
+ self._register()
-def register_memory_metrics(hs):
- try:
- import psutil
- process = psutil.Process()
- process.memory_info().rss
- except (ImportError, AttributeError):
- logger.warn(
- "psutil is not installed or incorrect version."
- " Disabling memory metrics."
- )
- return
- metric = MemoryUsageMetric(hs, psutil)
- all_metrics.append(metric)
+ def _register(self):
+ if self.name in all_gauges.keys():
+ logger.warning("%s already registered, reregistering" % (self.name,))
+ REGISTRY.unregister(all_gauges.pop(self.name))
+ REGISTRY.register(self)
+ all_gauges[self.name] = self
-def get_metrics_for(pkg_name):
- """ Returns a Metrics instance for conveniently creating metrics
- namespaced with the given name prefix. """
- # Convert a "package.name" to "package_name" because Prometheus doesn't
- # let us use . in metric names
- return Metrics(pkg_name.replace(".", "_"))
+#
+# Detailed CPU metrics
+#
+class CPUMetrics(object):
-def render_all():
- strs = []
+ def __init__(self):
+ ticks_per_sec = 100
+ try:
+ # Try and get the system config
+ ticks_per_sec = os.sysconf('SC_CLK_TCK')
+ except (ValueError, TypeError, AttributeError):
+ pass
- for collector in all_collectors:
- collector()
+ self.ticks_per_sec = ticks_per_sec
- for metric in all_metrics:
- try:
- strs += metric.render()
- except Exception:
- strs += ["# FAILED to render"]
- logger.exception("Failed to render metric")
+ def collect(self):
+ if not HAVE_PROC_SELF_STAT:
+ return
- strs.append("") # to generate a final CRLF
+ with open("/proc/self/stat") as s:
+ line = s.read()
+ raw_stats = line.split(") ", 1)[1].split(" ")
- return "\n".join(strs)
+ user = GaugeMetricFamily("process_cpu_user_seconds_total", "")
+ user.add_metric([], float(raw_stats[11]) / self.ticks_per_sec)
+ yield user
+ sys = GaugeMetricFamily("process_cpu_system_seconds_total", "")
+ sys.add_metric([], float(raw_stats[12]) / self.ticks_per_sec)
+ yield sys
-register_process_collector(get_metrics_for("process"))
+REGISTRY.register(CPUMetrics())
-python_metrics = get_metrics_for("python")
+#
+# Python GC metrics
+#
-gc_time = python_metrics.register_distribution("gc_time", labels=["gen"])
-gc_unreachable = python_metrics.register_counter("gc_unreachable_total", labels=["gen"])
-python_metrics.register_callback(
- "gc_counts", lambda: {(i,): v for i, v in enumerate(gc.get_count())}, labels=["gen"]
+gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
+gc_time = Histogram(
+ "python_gc_time",
+ "Time taken to GC (sec)",
+ ["gen"],
+ buckets=[0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.50, 1.00, 2.50,
+ 5.00, 7.50, 15.00, 30.00, 45.00, 60.00],
)
-reactor_metrics = get_metrics_for("python.twisted.reactor")
-tick_time = reactor_metrics.register_distribution("tick_time")
-pending_calls_metric = reactor_metrics.register_distribution("pending_calls")
+
+class GCCounts(object):
+
+ def collect(self):
+ cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
+ for n, m in enumerate(gc.get_count()):
+ cm.add_metric([str(n)], m)
+
+ yield cm
+
+
+if not running_on_pypy:
+ REGISTRY.register(GCCounts())
+
+#
+# Twisted reactor metrics
+#
+
+tick_time = Histogram(
+ "python_twisted_reactor_tick_time",
+ "Tick time of the Twisted reactor (sec)",
+ buckets=[0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 1, 2, 5],
+)
+pending_calls_metric = Histogram(
+ "python_twisted_reactor_pending_calls",
+ "Pending calls",
+ buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000],
+)
+
+#
+# Federation Metrics
+#
+
+sent_edus_counter = Counter("synapse_federation_client_sent_edus", "")
+
+sent_transactions_counter = Counter("synapse_federation_client_sent_transactions", "")
+
+events_processed_counter = Counter("synapse_federation_client_events_processed", "")
+
+# Used to track where various components have processed in the event stream,
+# e.g. federation sending, appservice sending, etc.
+event_processing_positions = Gauge("synapse_event_processing_positions", "", ["name"])
+
+# Used to track the current max events stream position
+event_persisted_position = Gauge("synapse_event_persisted_position", "")
+
+# Used to track the received_ts of the last event processed by various
+# components
+event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name"])
+
+# Used to track the lag processing events. This is the time difference
+# between the last processed event's received_ts and the time it was
+# finished being processed.
+event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"])
+
+last_ticked = time.time()
+
+
+class ReactorLastSeenMetric(object):
+
+ def collect(self):
+ cm = GaugeMetricFamily(
+ "python_twisted_reactor_last_seen",
+ "Seconds since the Twisted reactor was last seen",
+ )
+ cm.add_metric([], time.time() - last_ticked)
+ yield cm
+
+
+REGISTRY.register(ReactorLastSeenMetric())
def runUntilCurrentTimer(func):
@@ -146,12 +226,25 @@ def runUntilCurrentTimer(func):
num_pending += 1
num_pending += len(reactor.threadCallQueue)
-
- start = time.time() * 1000
+ start = time.time()
ret = func(*args, **kwargs)
- end = time.time() * 1000
- tick_time.inc_by(end - start)
- pending_calls_metric.inc_by(num_pending)
+ end = time.time()
+
+ # record the amount of wallclock time spent running pending calls.
+ # This is a proxy for the actual amount of time between reactor polls,
+ # since about 25% of time is actually spent running things triggered by
+ # I/O events, but that is harder to capture without rewriting half the
+ # reactor.
+ tick_time.observe(end - start)
+ pending_calls_metric.observe(num_pending)
+
+ # Update the time we last ticked, for the metric to test whether
+ # Synapse's reactor has frozen
+ global last_ticked
+ last_ticked = end
+
+ if running_on_pypy:
+ return ret
# Check if we need to do a manual GC (since its been disabled), and do
# one if necessary.
@@ -161,12 +254,12 @@ def runUntilCurrentTimer(func):
if threshold[i] < counts[i]:
logger.info("Collecting gc %d", i)
- start = time.time() * 1000
+ start = time.time()
unreachable = gc.collect(i)
- end = time.time() * 1000
+ end = time.time()
- gc_time.inc_by(end - start, i)
- gc_unreachable.inc_by(unreachable, i)
+ gc_time.labels(i).observe(end - start)
+ gc_unreachable.labels(i).set(unreachable)
return ret
@@ -185,6 +278,7 @@ try:
# We manually run the GC each reactor tick so that we can get some metrics
# about time spent doing GC,
- gc.disable()
+ if not running_on_pypy:
+ gc.disable()
except AttributeError:
pass
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
new file mode 100644
index 0000000000..9d820e44a6
--- /dev/null
+++ b/synapse/metrics/background_process_metrics.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import six
+
+from prometheus_client.core import REGISTRY, Counter, GaugeMetricFamily
+
+from twisted.internet import defer
+
+from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
+
+_background_process_start_count = Counter(
+ "synapse_background_process_start_count",
+ "Number of background processes started",
+ ["name"],
+)
+
+# we set registry=None in all of these to stop them getting registered with
+# the default registry. Instead we collect them all via the CustomCollector,
+# which ensures that we can update them before they are collected.
+#
+_background_process_ru_utime = Counter(
+ "synapse_background_process_ru_utime_seconds",
+ "User CPU time used by background processes, in seconds",
+ ["name"],
+ registry=None,
+)
+
+_background_process_ru_stime = Counter(
+ "synapse_background_process_ru_stime_seconds",
+ "System CPU time used by background processes, in seconds",
+ ["name"],
+ registry=None,
+)
+
+_background_process_db_txn_count = Counter(
+ "synapse_background_process_db_txn_count",
+ "Number of database transactions done by background processes",
+ ["name"],
+ registry=None,
+)
+
+_background_process_db_txn_duration = Counter(
+ "synapse_background_process_db_txn_duration_seconds",
+ ("Seconds spent by background processes waiting for database "
+ "transactions, excluding scheduling time"),
+ ["name"],
+ registry=None,
+)
+
+_background_process_db_sched_duration = Counter(
+ "synapse_background_process_db_sched_duration_seconds",
+ "Seconds spent by background processes waiting for database connections",
+ ["name"],
+ registry=None,
+)
+
+# map from description to a counter, so that we can name our logcontexts
+# incrementally. (It actually duplicates _background_process_start_count, but
+# it's much simpler to do so than to try to combine them.)
+_background_process_counts = dict() # type: dict[str, int]
+
+# map from description to the currently running background processes.
+#
+# it's kept as a dict of sets rather than a big set so that we can keep track
+# of process descriptions that no longer have any active processes.
+_background_processes = dict() # type: dict[str, set[_BackgroundProcess]]
+
+
+class _Collector(object):
+ """A custom metrics collector for the background process metrics.
+
+ Ensures that all of the metrics are up-to-date with any in-flight processes
+ before they are returned.
+ """
+ def collect(self):
+ background_process_in_flight_count = GaugeMetricFamily(
+ "synapse_background_process_in_flight_count",
+ "Number of background processes in flight",
+ labels=["name"],
+ )
+
+ for desc, processes in six.iteritems(_background_processes):
+ background_process_in_flight_count.add_metric(
+ (desc,), len(processes),
+ )
+ for process in processes:
+ process.update_metrics()
+
+ yield background_process_in_flight_count
+
+ # now we need to run collect() over each of the static Counters, and
+ # yield each metric they return.
+ for m in (
+ _background_process_ru_utime,
+ _background_process_ru_stime,
+ _background_process_db_txn_count,
+ _background_process_db_txn_duration,
+ _background_process_db_sched_duration,
+ ):
+ for r in m.collect():
+ yield r
+
+
+REGISTRY.register(_Collector())
+
+
+class _BackgroundProcess(object):
+ def __init__(self, desc, ctx):
+ self.desc = desc
+ self._context = ctx
+ self._reported_stats = None
+
+ def update_metrics(self):
+ """Updates the metrics with values from this process."""
+ new_stats = self._context.get_resource_usage()
+ if self._reported_stats is None:
+ diff = new_stats
+ else:
+ diff = new_stats - self._reported_stats
+ self._reported_stats = new_stats
+
+ _background_process_ru_utime.labels(self.desc).inc(diff.ru_utime)
+ _background_process_ru_stime.labels(self.desc).inc(diff.ru_stime)
+ _background_process_db_txn_count.labels(self.desc).inc(
+ diff.db_txn_count,
+ )
+ _background_process_db_txn_duration.labels(self.desc).inc(
+ diff.db_txn_duration_sec,
+ )
+ _background_process_db_sched_duration.labels(self.desc).inc(
+ diff.db_sched_duration_sec,
+ )
+
+
+def run_as_background_process(desc, func, *args, **kwargs):
+ """Run the given function in its own logcontext, with resource metrics
+
+ This should be used to wrap processes which are fired off to run in the
+ background, instead of being associated with a particular request.
+
+ Args:
+ desc (str): a description for this background process type
+ func: a function, which may return a Deferred
+ args: positional args for func
+ kwargs: keyword args for func
+
+ Returns: None
+ """
+ @defer.inlineCallbacks
+ def run():
+ count = _background_process_counts.get(desc, 0)
+ _background_process_counts[desc] = count + 1
+ _background_process_start_count.labels(desc).inc()
+
+ with LoggingContext(desc) as context:
+ context.request = "%s-%i" % (desc, count)
+ proc = _BackgroundProcess(desc, context)
+ _background_processes.setdefault(desc, set()).add(proc)
+ try:
+ yield func(*args, **kwargs)
+ finally:
+ proc.update_metrics()
+ _background_processes[desc].remove(proc)
+
+ with PreserveLoggingContext():
+ run()
diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py
deleted file mode 100644
index e87b2b80a7..0000000000
--- a/synapse/metrics/metric.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from itertools import chain
-
-
-# TODO(paul): I can't believe Python doesn't have one of these
-def map_concat(func, items):
- # flatten a list-of-lists
- return list(chain.from_iterable(map(func, items)))
-
-
-class BaseMetric(object):
-
- def __init__(self, name, labels=[]):
- self.name = name
- self.labels = labels # OK not to clone as we never write it
-
- def dimension(self):
- return len(self.labels)
-
- def is_scalar(self):
- return not len(self.labels)
-
- def _render_labelvalue(self, value):
- # TODO: some kind of value escape
- return '"%s"' % (value)
-
- def _render_key(self, values):
- if self.is_scalar():
- return ""
- return "{%s}" % (
- ",".join(["%s=%s" % (k, self._render_labelvalue(v))
- for k, v in zip(self.labels, values)])
- )
-
-
-class CounterMetric(BaseMetric):
- """The simplest kind of metric; one that stores a monotonically-increasing
- integer that counts events."""
-
- def __init__(self, *args, **kwargs):
- super(CounterMetric, self).__init__(*args, **kwargs)
-
- self.counts = {}
-
- # Scalar metrics are never empty
- if self.is_scalar():
- self.counts[()] = 0
-
- def inc_by(self, incr, *values):
- if len(values) != self.dimension():
- raise ValueError(
- "Expected as many values to inc() as labels (%d)" % (self.dimension())
- )
-
- # TODO: should assert that the tag values are all strings
-
- if values not in self.counts:
- self.counts[values] = incr
- else:
- self.counts[values] += incr
-
- def inc(self, *values):
- self.inc_by(1, *values)
-
- def render_item(self, k):
- return ["%s%s %d" % (self.name, self._render_key(k), self.counts[k])]
-
- def render(self):
- return map_concat(self.render_item, sorted(self.counts.keys()))
-
-
-class CallbackMetric(BaseMetric):
- """A metric that returns the numeric value returned by a callback whenever
- it is rendered. Typically this is used to implement gauges that yield the
- size or other state of some in-memory object by actively querying it."""
-
- def __init__(self, name, callback, labels=[]):
- super(CallbackMetric, self).__init__(name, labels=labels)
-
- self.callback = callback
-
- def render(self):
- value = self.callback()
-
- if self.is_scalar():
- return ["%s %.12g" % (self.name, value)]
-
- return ["%s%s %.12g" % (self.name, self._render_key(k), value[k])
- for k in sorted(value.keys())]
-
-
-class DistributionMetric(object):
- """A combination of an event counter and an accumulator, which counts
- both the number of events and accumulates the total value. Typically this
- could be used to keep track of method-running times, or other distributions
- of values that occur in discrete occurances.
-
- TODO(paul): Try to export some heatmap-style stats?
- """
-
- def __init__(self, name, *args, **kwargs):
- self.counts = CounterMetric(name + ":count", **kwargs)
- self.totals = CounterMetric(name + ":total", **kwargs)
-
- def inc_by(self, inc, *values):
- self.counts.inc(*values)
- self.totals.inc_by(inc, *values)
-
- def render(self):
- return self.counts.render() + self.totals.render()
-
-
-class CacheMetric(object):
- __slots__ = ("name", "cache_name", "hits", "misses", "size_callback")
-
- def __init__(self, name, size_callback, cache_name):
- self.name = name
- self.cache_name = cache_name
-
- self.hits = 0
- self.misses = 0
-
- self.size_callback = size_callback
-
- def inc_hits(self):
- self.hits += 1
-
- def inc_misses(self):
- self.misses += 1
-
- def render(self):
- size = self.size_callback()
- hits = self.hits
- total = self.misses + self.hits
-
- return [
- """%s:hits{name="%s"} %d""" % (self.name, self.cache_name, hits),
- """%s:total{name="%s"} %d""" % (self.name, self.cache_name, total),
- """%s:size{name="%s"} %d""" % (self.name, self.cache_name, size),
- ]
-
-
-class MemoryUsageMetric(object):
- """Keeps track of the current memory usage, using psutil.
-
- The class will keep the current min/max/sum/counts of rss over the last
- WINDOW_SIZE_SEC, by polling UPDATE_HZ times per second
- """
-
- UPDATE_HZ = 2 # number of times to get memory per second
- WINDOW_SIZE_SEC = 30 # the size of the window in seconds
-
- def __init__(self, hs, psutil):
- clock = hs.get_clock()
- self.memory_snapshots = []
-
- self.process = psutil.Process()
-
- clock.looping_call(self._update_curr_values, 1000 / self.UPDATE_HZ)
-
- def _update_curr_values(self):
- max_size = self.UPDATE_HZ * self.WINDOW_SIZE_SEC
- self.memory_snapshots.append(self.process.memory_info().rss)
- self.memory_snapshots[:] = self.memory_snapshots[-max_size:]
-
- def render(self):
- if not self.memory_snapshots:
- return []
-
- max_rss = max(self.memory_snapshots)
- min_rss = min(self.memory_snapshots)
- sum_rss = sum(self.memory_snapshots)
- len_rss = len(self.memory_snapshots)
-
- return [
- "process_psutil_rss:max %d" % max_rss,
- "process_psutil_rss:min %d" % min_rss,
- "process_psutil_rss:total %d" % sum_rss,
- "process_psutil_rss:count %d" % len_rss,
- ]
diff --git a/synapse/metrics/process_collector.py b/synapse/metrics/process_collector.py
deleted file mode 100644
index 6fec3de399..0000000000
--- a/synapse/metrics/process_collector.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2015, 2016 OpenMarket Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-
-TICKS_PER_SEC = 100
-BYTES_PER_PAGE = 4096
-
-HAVE_PROC_STAT = os.path.exists("/proc/stat")
-HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
-HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits")
-HAVE_PROC_SELF_FD = os.path.exists("/proc/self/fd")
-
-# Field indexes from /proc/self/stat, taken from the proc(5) manpage
-STAT_FIELDS = {
- "utime": 14,
- "stime": 15,
- "starttime": 22,
- "vsize": 23,
- "rss": 24,
-}
-
-
-stats = {}
-
-# In order to report process_start_time_seconds we need to know the
-# machine's boot time, because the value in /proc/self/stat is relative to
-# this
-boot_time = None
-if HAVE_PROC_STAT:
- with open("/proc/stat") as _procstat:
- for line in _procstat:
- if line.startswith("btime "):
- boot_time = int(line.split()[1])
-
-
-def update_resource_metrics():
- if HAVE_PROC_SELF_STAT:
- global stats
- with open("/proc/self/stat") as s:
- line = s.read()
- # line is PID (command) more stats go here ...
- raw_stats = line.split(") ", 1)[1].split(" ")
-
- for (name, index) in STAT_FIELDS.iteritems():
- # subtract 3 from the index, because proc(5) is 1-based, and
- # we've lost the first two fields in PID and COMMAND above
- stats[name] = int(raw_stats[index - 3])
-
-
-def _count_fds():
- # Not every OS will have a /proc/self/fd directory
- if not HAVE_PROC_SELF_FD:
- return 0
-
- return len(os.listdir("/proc/self/fd"))
-
-
-def register_process_collector(process_metrics):
- process_metrics.register_collector(update_resource_metrics)
-
- if HAVE_PROC_SELF_STAT:
- process_metrics.register_callback(
- "cpu_user_seconds_total",
- lambda: float(stats["utime"]) / TICKS_PER_SEC
- )
- process_metrics.register_callback(
- "cpu_system_seconds_total",
- lambda: float(stats["stime"]) / TICKS_PER_SEC
- )
- process_metrics.register_callback(
- "cpu_seconds_total",
- lambda: (float(stats["utime"] + stats["stime"])) / TICKS_PER_SEC
- )
-
- process_metrics.register_callback(
- "virtual_memory_bytes",
- lambda: int(stats["vsize"])
- )
- process_metrics.register_callback(
- "resident_memory_bytes",
- lambda: int(stats["rss"]) * BYTES_PER_PAGE
- )
-
- process_metrics.register_callback(
- "start_time_seconds",
- lambda: boot_time + int(stats["starttime"]) / TICKS_PER_SEC
- )
-
- if HAVE_PROC_SELF_FD:
- process_metrics.register_callback(
- "open_fds",
- lambda: _count_fds()
- )
-
- if HAVE_PROC_SELF_LIMITS:
- def _get_max_fds():
- with open("/proc/self/limits") as limits:
- for line in limits:
- if not line.startswith("Max open files "):
- continue
- # Line is Max open files $SOFT $HARD
- return int(line.split()[3])
- return None
-
- process_metrics.register_callback(
- "max_fds",
- lambda: _get_max_fds()
- )
diff --git a/synapse/metrics/resource.py b/synapse/metrics/resource.py
index 870f400600..9789359077 100644
--- a/synapse/metrics/resource.py
+++ b/synapse/metrics/resource.py
@@ -13,27 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from twisted.web.resource import Resource
-
-import synapse.metrics
-
+from prometheus_client.twisted import MetricsResource
METRICS_PREFIX = "/_synapse/metrics"
-
-class MetricsResource(Resource):
- isLeaf = True
-
- def __init__(self, hs):
- Resource.__init__(self) # Resource is old-style, so no super()
-
- self.hs = hs
-
- def render_GET(self, request):
- response = synapse.metrics.render_all()
-
- request.setHeader("Content-Type", "text/plain")
- request.setHeader("Content-Length", str(len(response)))
-
- # Encode as UTF-8 (default)
- return response.encode()
+__all__ = ["MetricsResource", "METRICS_PREFIX"]
|