summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.md21
-rw-r--r--UPGRADE.rst13
-rw-r--r--debian/changelog6
-rw-r--r--synapse/__init__.py2
-rw-r--r--synapse/metrics/background_process_metrics.py20
-rw-r--r--synapse/replication/tcp/protocol.py2
6 files changed, 49 insertions, 15 deletions
diff --git a/CHANGES.md b/CHANGES.md
index 7713328f12..7188f94445 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,11 +1,24 @@
+Synapse 1.32.1 (2021-04-21)
+===========================
+
+This release fixes [a regression](https://github.com/matrix-org/synapse/issues/9853)
+in Synapse 1.32.0 that caused connected Prometheus instances to become unstable. If you
+ran Synapse 1.32.0 with Prometheus metrics, first upgrade to Synapse 1.32.1 and follow
+[these instructions](https://github.com/matrix-org/synapse/pull/9854#issuecomment-823472183)
+to clean up any excess writeahead logs.
+
+Bugfixes
+--------
+
+- Fix a regression in Synapse 1.32.0 which caused Synapse to report large numbers of Prometheus time series, potentially overwhelming Prometheus instances. ([\#9854](https://github.com/matrix-org/synapse/issues/9854))
+
+
 Synapse 1.32.0 (2021-04-20)
 ===========================
 
-**Note:** This release introduces [a regression](https://githubcom/matrix-org/synapse/issues/9853)
+**Note:** This release introduces [a regression](https://github.com/matrix-org/synapse/issues/9853)
 that can overwhelm connected Prometheus instances. This issue was not present in
-Synapse v1.32.0rc1. It is recommended not to update to this release. If you have
-upgraded to v1.32.0 already, please downgrade to v1.31.0. This issue will be
-resolved in a subsequent release version shortly.
+1.32.0rc1, and is fixed in 1.32.1. See the changelog for 1.32.1 above for more information.
 
 **Note:** This release requires Python 3.6+ and Postgres 9.6+ or SQLite 3.22+.
 
diff --git a/UPGRADE.rst b/UPGRADE.rst
index e3e0c1e40d..215211b6b5 100644
--- a/UPGRADE.rst
+++ b/UPGRADE.rst
@@ -114,11 +114,14 @@ Upgrading to v1.32.0
 Regression causing connected Prometheus instances to become overwhelmed
 -----------------------------------------------------------------------
 
-This release introduces `a regression <https://githubcom/matrix-org/synapse/issues/9853>`_
-that can overwhelm connected Prometheus instances. This issue was not present in
-Synapse v1.32.0rc1. It is recommended not to update to this release. If you have
-upgraded to v1.32.0 already, please downgrade to v1.31.0. This issue will be
-resolved in a subsequent release version shortly.
+This release introduces `a regression <https://github.com/matrix-org/synapse/issues/9853>`_
+that can overwhelm connected Prometheus instances. This issue is not present in
+Synapse v1.32.0rc1, and is fixed in Synapse v1.32.1.
+
+If you have been affected, please first upgrade to a more recent Synapse version.
+You then may need to remove excess writeahead logs in order for Prometheus to recover.
+Instructions for doing so are provided
+`here <https://github.com/matrix-org/synapse/pull/9854#issuecomment-823472183>`_.
 
 Dropping support for old Python, Postgres and SQLite versions
 -------------------------------------------------------------
diff --git a/debian/changelog b/debian/changelog
index 83be4497ec..b8cf2cac58 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+matrix-synapse-py3 (1.32.1) stable; urgency=medium
+
+  * New synapse release 1.32.1.
+
+ -- Synapse Packaging team <packages@matrix.org>  Wed, 21 Apr 2021 14:00:55 +0100
+
 matrix-synapse-py3 (1.32.0) stable; urgency=medium
 
   [ Dan Callahan ]
diff --git a/synapse/__init__.py b/synapse/__init__.py
index 7e97f5d995..5bfae24cbd 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -47,7 +47,7 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.32.0"
+__version__ = "1.32.1"
 
 if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
     # We import here so that we don't have to install a bunch of deps when
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index c56f0dd124..714caf84c3 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -15,7 +15,7 @@
 import logging
 import threading
 from functools import wraps
-from typing import TYPE_CHECKING, Dict, Optional, Set
+from typing import TYPE_CHECKING, Dict, Optional, Set, Union
 
 from prometheus_client.core import REGISTRY, Counter, Gauge
 
@@ -198,7 +198,7 @@ def run_as_background_process(desc: str, func, *args, bg_start_span=True, **kwar
         _background_process_start_count.labels(desc).inc()
         _background_process_in_flight_count.labels(desc).inc()
 
-        with BackgroundProcessLoggingContext("%s-%s" % (desc, count)) as context:
+        with BackgroundProcessLoggingContext(desc, count) as context:
             try:
                 ctx = noop_context_manager()
                 if bg_start_span:
@@ -243,8 +243,20 @@ class BackgroundProcessLoggingContext(LoggingContext):
 
     __slots__ = ["_proc"]
 
-    def __init__(self, name: str):
-        super().__init__(name)
+    def __init__(self, name: str, instance_id: Optional[Union[int, str]] = None):
+        """
+
+        Args:
+            name: The name of the background process. Each distinct `name` gets a
+                separate prometheus time series.
+
+            instance_id: an identifer to add to `name` to distinguish this instance of
+                the named background process in the logs. If this is `None`, one is
+                made up based on id(self).
+        """
+        if instance_id is None:
+            instance_id = id(self)
+        super().__init__("%s-%s" % (name, instance_id))
         self._proc = _BackgroundProcess(name, self)
 
     def start(self, rusage: "Optional[resource._RUsage]"):
diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py
index 2df028f315..6860576e78 100644
--- a/synapse/replication/tcp/protocol.py
+++ b/synapse/replication/tcp/protocol.py
@@ -184,7 +184,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver):
         # a logcontext which we use for processing incoming commands. We declare it as a
         # background process so that the CPU stats get reported to prometheus.
         self._logging_context = BackgroundProcessLoggingContext(
-            "replication-conn-%s" % (self.conn_id,)
+            "replication-conn", self.conn_id
         )
 
     def connectionMade(self):