summary refs log tree commit diff
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2021-03-04 16:40:18 +0000
committerGitHub <noreply@github.com>2021-03-04 16:40:18 +0000
commit8a4b3738f3dcf49f70c08204ad457559dcef4112 (patch)
tree8bd53704eba5445498760f74a20538fa90352ef3
parentPrometheus metrics for logins and registrations (#9511) (diff)
downloadsynapse-8a4b3738f3dcf49f70c08204ad457559dcef4112.tar.xz
Replace `last_*_pdu_age` metrics with timestamps (#9540)
Following the advice at
https://prometheus.io/docs/practices/instrumentation/#timestamps-not-time-since,
it's preferable to export unix timestamps, not ages.

There doesn't seem to be any particular naming convention for timestamp
metrics.
-rw-r--r--changelog.d/9540.feature1
-rw-r--r--changelog.d/9540.removal1
-rw-r--r--synapse/federation/federation_server.py10
-rw-r--r--synapse/federation/sender/transaction_manager.py11
4 files changed, 11 insertions, 12 deletions
diff --git a/changelog.d/9540.feature b/changelog.d/9540.feature
new file mode 100644
index 0000000000..5417e51b93
--- /dev/null
+++ b/changelog.d/9540.feature
@@ -0,0 +1 @@
+Add `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time` prometheus metrics, which monitor federation delays by reporting the timestamps of messages sent and received to a set of remote servers.
diff --git a/changelog.d/9540.removal b/changelog.d/9540.removal
new file mode 100644
index 0000000000..d54f553cb9
--- /dev/null
+++ b/changelog.d/9540.removal
@@ -0,0 +1 @@
+The `synapse_federation_last_sent_pdu_age` and `synapse_federation_last_received_pdu_age` prometheus metrics have been removed. They are replaced by `synapse_federation_last_sent_pdu_time` and `synapse_federation_last_received_pdu_time`.
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 2f832b47f6..362895bf42 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -90,10 +90,9 @@ pdu_process_time = Histogram(
     "Time taken to process an event",
 )
 
-
-last_pdu_age_metric = Gauge(
-    "synapse_federation_last_received_pdu_age",
-    "The age (in seconds) of the last PDU successfully received from the given domain",
+last_pdu_ts_metric = Gauge(
+    "synapse_federation_last_received_pdu_time",
+    "The timestamp of the last PDU which was successfully received from the given domain",
     labelnames=("server_name",),
 )
 
@@ -369,8 +368,7 @@ class FederationServer(FederationBase):
         )
 
         if newest_pdu_ts and origin in self._federation_metrics_domains:
-            newest_pdu_age = self._clock.time_msec() - newest_pdu_ts
-            last_pdu_age_metric.labels(server_name=origin).set(newest_pdu_age / 1000)
+            last_pdu_ts_metric.labels(server_name=origin).set(newest_pdu_ts / 1000)
 
         return pdu_results
 
diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py
index 763aff296c..2a9cd063c4 100644
--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
@@ -36,9 +36,9 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-last_pdu_age_metric = Gauge(
-    "synapse_federation_last_sent_pdu_age",
-    "The age (in seconds) of the last PDU successfully sent to the given domain",
+last_pdu_ts_metric = Gauge(
+    "synapse_federation_last_sent_pdu_time",
+    "The timestamp of the last PDU which was successfully sent to the given domain",
     labelnames=("server_name",),
 )
 
@@ -187,9 +187,8 @@ class TransactionManager:
 
             if success and pdus and destination in self._federation_metrics_domains:
                 last_pdu = pdus[-1]
-                last_pdu_age = self.clock.time_msec() - last_pdu.origin_server_ts
-                last_pdu_age_metric.labels(server_name=destination).set(
-                    last_pdu_age / 1000
+                last_pdu_ts_metric.labels(server_name=destination).set(
+                    last_pdu.origin_server_ts / 1000
                 )
 
             set_tag(tags.ERROR, not success)