summary refs log tree commit diff
path: root/synapse/federation/federation_server.py
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2020-10-01 11:09:12 +0100
committerGitHub <noreply@github.com>2020-10-01 11:09:12 +0100
commitc1ef579b63a39d8d6fb31ddc1d3cf173eaf7e5b7 (patch)
treebbdc9609da24ff3313d7415ed88bef045e0b0876 /synapse/federation/federation_server.py
parentMake token serializing/deserializing async (#8427) (diff)
downloadsynapse-c1ef579b63a39d8d6fb31ddc1d3cf173eaf7e5b7.tar.xz
Add prometheus metrics to track federation delays (#8430)
Add a pair of federation metrics to track the delays in sending PDUs to/from 
particular servers.
Diffstat (limited to 'synapse/federation/federation_server.py')
-rw-r--r--synapse/federation/federation_server.py24
1 files changed, 23 insertions, 1 deletions
diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py
index 2dcd081cbc..24329dd0e3 100644
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@@ -28,7 +28,7 @@ from typing import (
     Union,
 )
 
-from prometheus_client import Counter, Histogram
+from prometheus_client import Counter, Gauge, Histogram
 
 from twisted.internet import defer
 from twisted.internet.abstract import isIPAddress
@@ -88,6 +88,13 @@ pdu_process_time = Histogram(
 )
 
 
+last_pdu_age_metric = Gauge(
+    "synapse_federation_last_received_pdu_age",
+    "The age (in seconds) of the last PDU successfully received from the given domain",
+    labelnames=("server_name",),
+)
+
+
 class FederationServer(FederationBase):
     def __init__(self, hs):
         super().__init__(hs)
@@ -118,6 +125,10 @@ class FederationServer(FederationBase):
             hs, "state_ids_resp", timeout_ms=30000
         )
 
+        self._federation_metrics_domains = (
+            hs.get_config().federation.federation_metrics_domains
+        )
+
     async def on_backfill_request(
         self, origin: str, room_id: str, versions: List[str], limit: int
     ) -> Tuple[int, Dict[str, Any]]:
@@ -262,7 +273,11 @@ class FederationServer(FederationBase):
 
         pdus_by_room = {}  # type: Dict[str, List[EventBase]]
 
+        newest_pdu_ts = 0
+
         for p in transaction.pdus:  # type: ignore
+            # FIXME (richardv): I don't think this works:
+            #  https://github.com/matrix-org/synapse/issues/8429
             if "unsigned" in p:
                 unsigned = p["unsigned"]
                 if "age" in unsigned:
@@ -300,6 +315,9 @@ class FederationServer(FederationBase):
             event = event_from_pdu_json(p, room_version)
             pdus_by_room.setdefault(room_id, []).append(event)
 
+            if event.origin_server_ts > newest_pdu_ts:
+                newest_pdu_ts = event.origin_server_ts
+
         pdu_results = {}
 
         # we can process different rooms in parallel (which is useful if they
@@ -340,6 +358,10 @@ class FederationServer(FederationBase):
             process_pdus_for_room, pdus_by_room.keys(), TRANSACTION_CONCURRENCY_LIMIT
         )
 
+        if newest_pdu_ts and origin in self._federation_metrics_domains:
+            newest_pdu_age = self._clock.time_msec() - newest_pdu_ts
+            last_pdu_age_metric.labels(server_name=origin).set(newest_pdu_age / 1000)
+
         return pdu_results
 
     async def _handle_edus_in_txn(self, origin: str, transaction: Transaction):