summary refs log tree commit diff
path: root/synapse/metrics
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2021-05-05 16:53:45 +0100
committerGitHub <noreply@github.com>2021-05-05 16:53:45 +0100
commit1fb9a2d0bf2506ca6e5343cb340a441585ca1c07 (patch)
tree20779cc0b54efac7c36412bf49e050ef3557b894 /synapse/metrics
parentDon't set the external cache if its been done recently (#9905) (diff)
downloadsynapse-1fb9a2d0bf2506ca6e5343cb340a441585ca1c07.tar.xz
Limit how often GC happens by time. (#9902)
Synapse can be quite memory intensive, and unless care is taken to tune
the GC thresholds it can end up thrashing, causing noticable performance
problems for large servers. We fix this by limiting how often we GC a
given generation, regardless of current counts/thresholds.

This does not help with the reverse problem where the thresholds are set
too high, but that should only happen in situations where they've been
manually configured.

Adds a `gc_min_seconds_between` config option to override the defaults.

Fixes #9890.
Diffstat (limited to 'synapse/metrics')
-rw-r--r--synapse/metrics/__init__.py18
1 files changed, 16 insertions, 2 deletions
diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 31b7b3c256..e671da26d5 100644
--- a/synapse/metrics/__init__.py
+++ b/synapse/metrics/__init__.py
@@ -535,6 +535,13 @@ class ReactorLastSeenMetric:
 
 REGISTRY.register(ReactorLastSeenMetric())
 
+# The minimum time in seconds between GCs for each generation, regardless of the current GC
+# thresholds and counts.
+MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0)
+
+# The time (in seconds since the epoch) of the last time we did a GC for each generation.
+_last_gc = [0.0, 0.0, 0.0]
+
 
 def runUntilCurrentTimer(reactor, func):
     @functools.wraps(func)
@@ -575,11 +582,16 @@ def runUntilCurrentTimer(reactor, func):
             return ret
 
         # Check if we need to do a manual GC (since its been disabled), and do
-        # one if necessary.
+        # one if necessary. Note we go in reverse order as e.g. a gen 1 GC may
+        # promote an object into gen 2, and we don't want to handle the same
+        # object multiple times.
         threshold = gc.get_threshold()
         counts = gc.get_count()
         for i in (2, 1, 0):
-            if threshold[i] < counts[i]:
+            # We check if we need to do one based on a straightforward
+            # comparison between the threshold and count. We also do an extra
+            # check to make sure that we don't a GC too often.
+            if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
                 if i == 0:
                     logger.debug("Collecting gc %d", i)
                 else:
@@ -589,6 +601,8 @@ def runUntilCurrentTimer(reactor, func):
                 unreachable = gc.collect(i)
                 end = time.time()
 
+                _last_gc[i] = end
+
                 gc_time.labels(i).observe(end - start)
                 gc_unreachable.labels(i).set(unreachable)