diff --git a/changelog.d/13533.misc b/changelog.d/13533.misc
new file mode 100644
index 0000000000..ab4b18887a
--- /dev/null
+++ b/changelog.d/13533.misc
@@ -0,0 +1 @@
+Track HTTP response times over 10 seconds from `/messages` (`synapse_room_message_list_rest_servlet_response_time_seconds`).
diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 2f513164cb..d29417fafc 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -19,6 +19,8 @@ import re
from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
from urllib import parse as urlparse
+from prometheus_client.core import Histogram
+
from twisted.web.server import Request
from synapse import event_auth
@@ -60,6 +62,35 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
+# This is an extra metric on top of `synapse_http_server_response_time_seconds`
+# which times the same sort of thing but this one allows us to see values
+# greater than 10s. We use a separate dedicated histogram with its own buckets
+# so that we don't increase the cardinality of the general one because it's
+# multiplied across hundreds of servlets.
+messsages_response_timer = Histogram(
+ "synapse_room_message_list_rest_servlet_response_time_seconds",
+ "sec",
+ [],
+ buckets=(
+ 0.005,
+ 0.01,
+ 0.025,
+ 0.05,
+ 0.1,
+ 0.25,
+ 0.5,
+ 1.0,
+ 2.5,
+ 5.0,
+ 10.0,
+ 30.0,
+ 60.0,
+ 120.0,
+ 180.0,
+ "+Inf",
+ ),
+)
+
class TransactionRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
@@ -560,6 +591,7 @@ class RoomMessageListRestServlet(RestServlet):
self.auth = hs.get_auth()
self.store = hs.get_datastores().main
+ @messsages_response_timer.time()
async def on_GET(
self, request: SynapseRequest, room_id: str
) -> Tuple[int, JsonDict]:
|