summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2021-04-21 10:03:31 +0100
committerGitHub <noreply@github.com>2021-04-21 10:03:31 +0100
commit5d281c10dd3d4d1f96635e92d803a74e3880d6b7 (patch)
tree383d2aa43a5c156ca71e8232cc6a8def5221d109
parentFurther tweaking on gpg signing key notice (diff)
downloadsynapse-5d281c10dd3d4d1f96635e92d803a74e3880d6b7.tar.xz
Stop BackgroundProcessLoggingContext making new prometheus timeseries (#9854)
This undoes part of b076bc276e881b262048307b6a226061d96c4a8d.
-rw-r--r--changelog.d/9854.bugfix1
-rw-r--r--synapse/metrics/background_process_metrics.py20
-rw-r--r--synapse/replication/tcp/protocol.py2
3 files changed, 18 insertions, 5 deletions
diff --git a/changelog.d/9854.bugfix b/changelog.d/9854.bugfix
new file mode 100644
index 0000000000..e39a3f9915
--- /dev/null
+++ b/changelog.d/9854.bugfix
@@ -0,0 +1 @@
+Fix a regression in Synapse 1.32.0 which caused Synapse to report large numbers of Prometheus time series, potentially overwhelming Prometheus instances.
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index 78e9cfbc26..3f621539f3 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -16,7 +16,7 @@
 import logging
 import threading
 from functools import wraps
-from typing import TYPE_CHECKING, Dict, Optional, Set
+from typing import TYPE_CHECKING, Dict, Optional, Set, Union
 
 from prometheus_client.core import REGISTRY, Counter, Gauge
 
@@ -199,7 +199,7 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar
         _background_process_start_count.labels(desc).inc()
         _background_process_in_flight_count.labels(desc).inc()
 
-        with BackgroundProcessLoggingContext("%s-%s" % (desc, count)) as context:
+        with BackgroundProcessLoggingContext(desc, count) as context:
             try:
                 ctx = noop_context_manager()
                 if bg_start_span:
@@ -244,8 +244,20 @@ class BackgroundProcessLoggingContext(LoggingContext):
 
     __slots__ = ["_proc"]
 
-    def __init__(self, name: str):
-        super().__init__(name)
+    def __init__(self, name: str, instance_id: Optional[Union[int, str]] = None):
+        """
+
+        Args:
+            name: The name of the background process. Each distinct `name` gets a
+                separate prometheus time series.
+
+            instance_id: an identifer to add to `name` to distinguish this instance of
+                the named background process in the logs. If this is `None`, one is
+                made up based on id(self).
+        """
+        if instance_id is None:
+            instance_id = id(self)
+        super().__init__("%s-%s" % (name, instance_id))
         self._proc = _BackgroundProcess(name, self)
 
     def start(self, rusage: "Optional[resource._RUsage]"):
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index ba753318bd..d10d574246 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -185,7 +185,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
         # a logcontext which we use for processing incoming commands. We declare it as a
         # background process so that the CPU stats get reported to prometheus.
         self._logging_context = BackgroundProcessLoggingContext(
-            "replication-conn-%s" % (self.conn_id,)
+            "replication-conn", self.conn_id
         )
 
     def connectionMade(self):