summary refs log tree commit diff
diff options
context:
space:
mode:
authorDavid Robertson <davidr@element.io>2022-10-31 13:02:07 +0000
committerGitHub <noreply@github.com>2022-10-31 13:02:07 +0000
commit2bb2c32e8ed5642a5bf3ba1e8c49e10cecc88905 (patch)
treeb25c0eb8017de0c12dc176dbae2679d9177c0cbf
parentPrevent federation user keys query from returning device names if disallowed ... (diff)
downloadsynapse-2bb2c32e8ed5642a5bf3ba1e8c49e10cecc88905.tar.xz
Avoid incrementing bg process utime/stime counters by negative durations (#14323)
-rw-r--r--changelog.d/14323.bugfix1
-rw-r--r--mypy.ini4
-rw-r--r--synapse/metrics/background_process_metrics.py6
-rw-r--r--tests/metrics/__init__.py0
-rw-r--r--tests/metrics/test_background_process_metrics.py19
-rw-r--r--tests/metrics/test_metrics.py (renamed from tests/test_metrics.py)10
6 files changed, 35 insertions, 5 deletions
diff --git a/changelog.d/14323.bugfix b/changelog.d/14323.bugfix
new file mode 100644
index 0000000000..da39bc020c
--- /dev/null
+++ b/changelog.d/14323.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 0.34.0rc2 where logs could include error spam when background processes are measured as taking a negative amount of time.
diff --git a/mypy.ini b/mypy.ini
index 34b4523e00..8f1141a239 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -56,7 +56,6 @@ exclude = (?x)
    |tests/rest/media/v1/test_media_storage.py
    |tests/server.py
    |tests/server_notices/test_resource_limits_server_notices.py
-   |tests/test_metrics.py
    |tests/test_state.py
    |tests/test_terms_auth.py
    |tests/util/caches/test_cached_call.py
@@ -106,6 +105,9 @@ disallow_untyped_defs = False
 [mypy-tests.handlers.test_user_directory]
 disallow_untyped_defs = True
 
+[mypy-tests.metrics.test_background_process_metrics]
+disallow_untyped_defs = True
+
 [mypy-tests.push.test_bulk_push_rule_evaluator]
 disallow_untyped_defs = True
 
diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index 7a1516d3a8..9ea4e23b31 100644
--- a/synapse/metrics/background_process_metrics.py
+++ b/synapse/metrics/background_process_metrics.py
@@ -174,8 +174,10 @@ class _BackgroundProcess:
             diff = new_stats - self._reported_stats
         self._reported_stats = new_stats
 
-        _background_process_ru_utime.labels(self.desc).inc(diff.ru_utime)
-        _background_process_ru_stime.labels(self.desc).inc(diff.ru_stime)
+        # For unknown reasons, the difference in times can be negative. See comment in
+        # synapse.http.request_metrics.RequestMetrics.update_metrics.
+        _background_process_ru_utime.labels(self.desc).inc(max(diff.ru_utime, 0))
+        _background_process_ru_stime.labels(self.desc).inc(max(diff.ru_stime, 0))
         _background_process_db_txn_count.labels(self.desc).inc(diff.db_txn_count)
         _background_process_db_txn_duration.labels(self.desc).inc(
             diff.db_txn_duration_sec
diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tests/metrics/__init__.py
diff --git a/tests/metrics/test_background_process_metrics.py b/tests/metrics/test_background_process_metrics.py
new file mode 100644
index 0000000000..f0f6cb2912
--- /dev/null
+++ b/tests/metrics/test_background_process_metrics.py
@@ -0,0 +1,19 @@
+from unittest import TestCase as StdlibTestCase
+from unittest.mock import Mock
+
+from synapse.logging.context import ContextResourceUsage, LoggingContext
+from synapse.metrics.background_process_metrics import _BackgroundProcess
+
+
+class TestBackgroundProcessMetrics(StdlibTestCase):
+    def test_update_metrics_with_negative_time_diff(self) -> None:
+        """We should ignore negative reported utime and stime differences"""
+        usage = ContextResourceUsage()
+        usage.ru_stime = usage.ru_utime = -1.0
+
+        mock_logging_context = Mock(spec=LoggingContext)
+        mock_logging_context.get_resource_usage.return_value = usage
+
+        process = _BackgroundProcess("test process", mock_logging_context)
+        # Should not raise
+        process.update_metrics()
diff --git a/tests/test_metrics.py b/tests/metrics/test_metrics.py
index 1a70eddc9b..bddc4228bc 100644
--- a/tests/test_metrics.py
+++ b/tests/metrics/test_metrics.py
@@ -12,6 +12,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing_extensions import Protocol
+
 try:
     from importlib import metadata
 except ImportError:
@@ -52,7 +54,11 @@ def get_sample_labels_value(sample):
 
 class TestMauLimit(unittest.TestCase):
     def test_basic(self):
-        gauge = InFlightGauge(
+        class MetricEntry(Protocol):
+            foo: int
+            bar: int
+
+        gauge: InFlightGauge[MetricEntry] = InFlightGauge(
             "test1", "", labels=["test_label"], sub_metrics=["foo", "bar"]
         )
 
@@ -146,7 +152,7 @@ class CacheMetricsTests(unittest.HomeserverTestCase):
         Caches produce metrics reflecting their state when scraped.
         """
         CACHE_NAME = "cache_metrics_test_fgjkbdfg"
-        cache = DeferredCache(CACHE_NAME, max_entries=777)
+        cache: DeferredCache[str, str] = DeferredCache(CACHE_NAME, max_entries=777)
 
         items = {
             x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")