diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 5664d5a381..bdd7292a30 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -22,6 +22,7 @@ import functools
import os
import stat
import time
+import gc
from twisted.internet import reactor
@@ -33,11 +34,7 @@ from .metric import (
logger = logging.getLogger(__name__)
-# We'll keep all the available metrics in a single toplevel dict, one shared
-# for the entire process. We don't currently support per-HomeServer instances
-# of metrics, because in practice any one python VM will host only one
-# HomeServer anyway. This makes a lot of implementation neater
-all_metrics = {}
+all_metrics = []
class Metrics(object):
@@ -53,7 +50,7 @@ class Metrics(object):
metric = metric_class(full_name, *args, **kwargs)
- all_metrics[full_name] = metric
+ all_metrics.append(metric)
return metric
def register_counter(self, *args, **kwargs):
@@ -84,12 +81,12 @@ def render_all():
# TODO(paul): Internal hack
update_resource_metrics()
- for name in sorted(all_metrics.keys()):
+ for metric in all_metrics:
try:
- strs += all_metrics[name].render()
+ strs += metric.render()
except Exception:
- strs += ["# FAILED to render %s" % name]
- logger.exception("Failed to render %s metric", name)
+ strs += ["# FAILED to render"]
+ logger.exception("Failed to render metric")
strs.append("") # to generate a final CRLF
@@ -156,6 +153,13 @@ reactor_metrics = get_metrics_for("reactor")
tick_time = reactor_metrics.register_distribution("tick_time")
pending_calls_metric = reactor_metrics.register_distribution("pending_calls")
+gc_time = reactor_metrics.register_distribution("gc_time", labels=["gen"])
+gc_unreachable = reactor_metrics.register_counter("gc_unreachable", labels=["gen"])
+
+reactor_metrics.register_callback(
+ "gc_counts", lambda: {(i,): v for i, v in enumerate(gc.get_count())}, labels=["gen"]
+)
+
def runUntilCurrentTimer(func):
@@ -182,6 +186,22 @@ def runUntilCurrentTimer(func):
end = time.time() * 1000
tick_time.inc_by(end - start)
pending_calls_metric.inc_by(num_pending)
+
+ # Check if we need to do a manual GC (since its been disabled), and do
+ # one if necessary.
+ threshold = gc.get_threshold()
+ counts = gc.get_count()
+ for i in (2, 1, 0):
+ if threshold[i] < counts[i]:
+ logger.info("Collecting gc %d", i)
+
+ start = time.time() * 1000
+ unreachable = gc.collect(i)
+ end = time.time() * 1000
+
+ gc_time.inc_by(end - start, i)
+ gc_unreachable.inc_by(unreachable, i)
+
return ret
return f
@@ -196,5 +216,9 @@ try:
# runUntilCurrent is called when we have pending calls. It is called once
# per iteratation after fd polling.
reactor.runUntilCurrent = runUntilCurrentTimer(reactor.runUntilCurrent)
+
+ # We manually run the GC each reactor tick so that we can get some metrics
+ # about time spent doing GC,
+ gc.disable()
except AttributeError:
pass
|