diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 5664d5a381..bdd7292a30 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -22,6 +22,7 @@ import functools
import os
import stat
import time
+import gc
from twisted.internet import reactor
@@ -33,11 +34,7 @@ from .metric import (
logger = logging.getLogger(__name__)
-# We'll keep all the available metrics in a single toplevel dict, one shared
-# for the entire process. We don't currently support per-HomeServer instances
-# of metrics, because in practice any one python VM will host only one
-# HomeServer anyway. This makes a lot of implementation neater
-all_metrics = {}
+all_metrics = []
class Metrics(object):
@@ -53,7 +50,7 @@ class Metrics(object):
metric = metric_class(full_name, *args, **kwargs)
- all_metrics[full_name] = metric
+ all_metrics.append(metric)
return metric
def register_counter(self, *args, **kwargs):
@@ -84,12 +81,12 @@ def render_all():
# TODO(paul): Internal hack
update_resource_metrics()
- for name in sorted(all_metrics.keys()):
+ for metric in all_metrics:
try:
- strs += all_metrics[name].render()
+ strs += metric.render()
except Exception:
- strs += ["# FAILED to render %s" % name]
- logger.exception("Failed to render %s metric", name)
+ strs += ["# FAILED to render"]
+ logger.exception("Failed to render metric")
strs.append("") # to generate a final CRLF
@@ -156,6 +153,13 @@ reactor_metrics = get_metrics_for("reactor")
tick_time = reactor_metrics.register_distribution("tick_time")
pending_calls_metric = reactor_metrics.register_distribution("pending_calls")
+gc_time = reactor_metrics.register_distribution("gc_time", labels=["gen"])
+gc_unreachable = reactor_metrics.register_counter("gc_unreachable", labels=["gen"])
+
+reactor_metrics.register_callback(
+ "gc_counts", lambda: {(i,): v for i, v in enumerate(gc.get_count())}, labels=["gen"]
+)
+
def runUntilCurrentTimer(func):
@@ -182,6 +186,22 @@ def runUntilCurrentTimer(func):
end = time.time() * 1000
tick_time.inc_by(end - start)
pending_calls_metric.inc_by(num_pending)
+
+ # Check if we need to do a manual GC (since its been disabled), and do
+ # one if necessary.
+ threshold = gc.get_threshold()
+ counts = gc.get_count()
+ for i in (2, 1, 0):
+ if threshold[i] < counts[i]:
+ logger.info("Collecting gc %d", i)
+
+ start = time.time() * 1000
+ unreachable = gc.collect(i)
+ end = time.time() * 1000
+
+ gc_time.inc_by(end - start, i)
+ gc_unreachable.inc_by(unreachable, i)
+
return ret
return f
@@ -196,5 +216,9 @@ try:
# runUntilCurrent is called when we have pending calls. It is called once
# per iteratation after fd polling.
reactor.runUntilCurrent = runUntilCurrentTimer(reactor.runUntilCurrent)
+
+ # We manually run the GC each reactor tick so that we can get some metrics
+ # about time spent doing GC,
+ gc.disable()
except AttributeError:
pass
diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py
index 368fc24984..341043952a 100644
--- a/synapse/metrics/metric.py
+++ b/synapse/metrics/metric.py
@@ -47,9 +47,6 @@ class BaseMetric(object):
for k, v in zip(self.labels, values)])
)
- def render(self):
- return map_concat(self.render_item, sorted(self.counts.keys()))
-
class CounterMetric(BaseMetric):
"""The simplest kind of metric; one that stores a monotonically-increasing
@@ -83,6 +80,9 @@ class CounterMetric(BaseMetric):
def render_item(self, k):
return ["%s%s %d" % (self.name, self._render_key(k), self.counts[k])]
+ def render(self):
+ return map_concat(self.render_item, sorted(self.counts.keys()))
+
class CallbackMetric(BaseMetric):
"""A metric that returns the numeric value returned by a callback whenever
@@ -126,30 +126,30 @@ class DistributionMetric(object):
class CacheMetric(object):
- """A combination of two CounterMetrics, one to count cache hits and one to
- count a total, and a callback metric to yield the current size.
+ __slots__ = ("name", "cache_name", "hits", "misses", "size_callback")
- This metric generates standard metric name pairs, so that monitoring rules
- can easily be applied to measure hit ratio."""
-
- def __init__(self, name, size_callback, labels=[]):
+ def __init__(self, name, size_callback, cache_name):
self.name = name
+ self.cache_name = cache_name
- self.hits = CounterMetric(name + ":hits", labels=labels)
- self.total = CounterMetric(name + ":total", labels=labels)
+ self.hits = 0
+ self.misses = 0
- self.size = CallbackMetric(
- name + ":size",
- callback=size_callback,
- labels=labels,
- )
+ self.size_callback = size_callback
- def inc_hits(self, *values):
- self.hits.inc(*values)
- self.total.inc(*values)
+ def inc_hits(self):
+ self.hits += 1
- def inc_misses(self, *values):
- self.total.inc(*values)
+ def inc_misses(self):
+ self.misses += 1
def render(self):
- return self.hits.render() + self.total.render() + self.size.render()
+ size = self.size_callback()
+ hits = self.hits
+ total = self.misses + self.hits
+
+ return [
+ """%s:hits{name="%s"} %d""" % (self.name, self.cache_name, hits),
+ """%s:total{name="%s"} %d""" % (self.name, self.cache_name, total),
+ """%s:size{name="%s"} %d""" % (self.name, self.cache_name, size),
+ ]
|