diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py
index 3880846e9a..a8adf00176 100644
--- a/synapse/rest/client/room.py
+++ b/synapse/rest/client/room.py
@@ -16,9 +16,12 @@
""" This module contains REST servlets to do with rooms: /rooms/<paths> """
import logging
import re
+from enum import Enum
from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
from urllib import parse as urlparse
+from prometheus_client.core import Histogram
+
from twisted.web.server import Request
from synapse import event_auth
@@ -46,6 +49,7 @@ from synapse.http.servlet import (
parse_strings_from_args,
)
from synapse.http.site import SynapseRequest
+from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.logging.tracing import set_attribute
from synapse.rest.client._base import client_patterns
from synapse.rest.client.transactions import HttpTransactionCache
@@ -61,6 +65,66 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
+class _RoomSize(Enum):
+ """
+ Enum to differentiate sizes of rooms. This is a pretty good approximation
+ about how hard it will be to get events in the room. We could also look at
+ room "complexity".
+ """
+
+ # This doesn't necessarily mean the room is a DM, just that there is a DM
+ # amount of people there.
+ DM_SIZE = "direct_message_size"
+ SMALL = "small"
+ SUBSTANTIAL = "substantial"
+ LARGE = "large"
+
+ @staticmethod
+ def from_member_count(member_count: int) -> "_RoomSize":
+ if member_count <= 2:
+ return _RoomSize.DM_SIZE
+ elif member_count < 100:
+ return _RoomSize.SMALL
+ elif member_count < 1000:
+ return _RoomSize.SUBSTANTIAL
+ else:
+ return _RoomSize.LARGE
+
+
+# This is an extra metric on top of `synapse_http_server_response_time_seconds`
+# which times the same sort of thing but this one allows us to see values
+# greater than 10s. We use a separate dedicated histogram with its own buckets
+# so that we don't increase the cardinality of the general one because it's
+# multiplied across hundreds of servlets.
+messsages_response_timer = Histogram(
+ "synapse_room_message_list_rest_servlet_response_time_seconds",
+ "sec",
+ # We have a label for room size so we can try to see a more realistic
+ # picture of /messages response time for bigger rooms. We don't want the
+ # tiny rooms that can always respond fast skewing our results when we're trying
+ # to optimize the bigger cases.
+ ["room_size"],
+ buckets=(
+ 0.005,
+ 0.01,
+ 0.025,
+ 0.05,
+ 0.1,
+ 0.25,
+ 0.5,
+ 1.0,
+ 2.5,
+ 5.0,
+ 10.0,
+ 30.0,
+ 60.0,
+ 120.0,
+ 180.0,
+ "+Inf",
+ ),
+)
+
+
class TransactionRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
super().__init__()
@@ -556,6 +620,7 @@ class RoomMessageListRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
super().__init__()
self._hs = hs
+ self.clock = hs.get_clock()
self.pagination_handler = hs.get_pagination_handler()
self.auth = hs.get_auth()
self.store = hs.get_datastores().main
@@ -563,6 +628,18 @@ class RoomMessageListRestServlet(RestServlet):
async def on_GET(
self, request: SynapseRequest, room_id: str
) -> Tuple[int, JsonDict]:
+ processing_start_time = self.clock.time_msec()
+ # Fire off and hope that we get a result by the end.
+ #
+ # We're using the mypy type ignore comment because the `@cached`
+ # decorator on `get_number_joined_users_in_room` doesn't play well with
+ # the type system. Maybe in the future, it can use some ParamSpec
+ # wizardry to fix it up.
+ room_member_count_deferred = run_in_background( # type: ignore[call-arg]
+ self.store.get_number_joined_users_in_room,
+ room_id, # type: ignore[arg-type]
+ )
+
requester = await self.auth.get_user_by_req(request, allow_guest=True)
pagination_config = await PaginationConfig.from_request(
self.store, request, default_limit=10
@@ -593,6 +670,12 @@ class RoomMessageListRestServlet(RestServlet):
event_filter=event_filter,
)
+ processing_end_time = self.clock.time_msec()
+ room_member_count = await make_deferred_yieldable(room_member_count_deferred)
+ messsages_response_timer.labels(
+ room_size=_RoomSize.from_member_count(room_member_count)
+ ).observe((processing_start_time - processing_end_time) / 1000)
+
return 200, msgs
|