diff options
author | Erik Johnston <erik@matrix.org> | 2023-04-26 17:00:11 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-26 16:00:11 +0000 |
commit | 9900f7c231f8af536fce229117b0a406dc629293 (patch) | |
tree | a688fbc4cf51b64fa4821d19e200f8f50e3a0427 /synapse | |
parent | Update the `check_schema_delta` script to account for when the schema version... (diff) | |
download | synapse-9900f7c231f8af536fce229117b0a406dc629293.tar.xz |
Add admin endpoint to query room sizes (#15482)
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/rest/admin/__init__.py | 6 | ||||
-rw-r--r-- | synapse/rest/admin/statistics.py | 25 | ||||
-rw-r--r-- | synapse/storage/controllers/__init__.py | 2 | ||||
-rw-r--r-- | synapse/storage/controllers/stats.py | 113 |
4 files changed, 145 insertions, 1 deletions
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 79f22a59f1..770df261ce 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -68,7 +68,10 @@ from synapse.rest.admin.rooms import ( RoomTimestampToEventRestServlet, ) from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet -from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet +from synapse.rest.admin.statistics import ( + LargestRoomsStatistics, + UserMediaStatisticsRestServlet, +) from synapse.rest.admin.username_available import UsernameAvailableRestServlet from synapse.rest.admin.users import ( AccountDataRestServlet, @@ -259,6 +262,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: UserRestServletV2(hs).register(http_server) UsersRestServletV2(hs).register(http_server) UserMediaStatisticsRestServlet(hs).register(http_server) + LargestRoomsStatistics(hs).register(http_server) EventReportDetailRestServlet(hs).register(http_server) EventReportsRestServlet(hs).register(http_server) AccountDataRestServlet(hs).register(http_server) diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py index 9c45f4650d..19780e4b4c 100644 --- a/synapse/rest/admin/statistics.py +++ b/synapse/rest/admin/statistics.py @@ -113,3 +113,28 @@ class UserMediaStatisticsRestServlet(RestServlet): ret["next_token"] = start + len(users_media) return HTTPStatus.OK, ret + + +class LargestRoomsStatistics(RestServlet): + """Get the largest rooms by database size. + + Only works when using PostgreSQL. + """ + + PATTERNS = admin_patterns("/statistics/database/rooms$") + + def __init__(self, hs: "HomeServer"): + self.auth = hs.get_auth() + self.stats_controller = hs.get_storage_controllers().stats + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + + room_sizes = await self.stats_controller.get_room_db_size_estimate() + + return HTTPStatus.OK, { + "rooms": [ + {"room_id": room_id, "estimated_size": size} + for room_id, size in room_sizes + ] + } diff --git a/synapse/storage/controllers/__init__.py b/synapse/storage/controllers/__init__.py index 45101cda7a..0ef8602631 100644 --- a/synapse/storage/controllers/__init__.py +++ b/synapse/storage/controllers/__init__.py @@ -19,6 +19,7 @@ from synapse.storage.controllers.persist_events import ( ) from synapse.storage.controllers.purge_events import PurgeEventsStorageController from synapse.storage.controllers.state import StateStorageController +from synapse.storage.controllers.stats import StatsController from synapse.storage.databases import Databases from synapse.storage.databases.main import DataStore @@ -40,6 +41,7 @@ class StorageControllers: self.purge_events = PurgeEventsStorageController(hs, stores) self.state = StateStorageController(hs, stores) + self.stats = StatsController(hs, stores) self.persistence = None if stores.persist_events: diff --git a/synapse/storage/controllers/stats.py b/synapse/storage/controllers/stats.py new file mode 100644 index 0000000000..988e44c6af --- /dev/null +++ b/synapse/storage/controllers/stats.py @@ -0,0 +1,113 @@ +# Copyright 2023 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from collections import Counter +from typing import TYPE_CHECKING, Collection, List, Tuple + +from synapse.api.errors import SynapseError +from synapse.storage.database import LoggingTransaction +from synapse.storage.databases import Databases +from synapse.storage.engines import PostgresEngine + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class StatsController: + """High level interface for getting statistics.""" + + def __init__(self, hs: "HomeServer", stores: Databases): + self.stores = stores + + async def get_room_db_size_estimate(self) -> List[Tuple[str, int]]: + """Get an estimate of the largest rooms and how much database space they + use, in bytes. + + Only works against PostgreSQL. + + Note: this uses the postgres statistics so is a very rough estimate. + """ + + # Note: We look at both tables on the main and state databases. + if not isinstance(self.stores.main.database_engine, PostgresEngine): + raise SynapseError(400, "Endpoint requires using PostgreSQL") + + if not isinstance(self.stores.state.database_engine, PostgresEngine): + raise SynapseError(400, "Endpoint requires using PostgreSQL") + + # For each "large" table, we go through and get the largest rooms + # and an estimate of how much space they take. We can then sum the + # results and return the top 10. + # + # This isn't the most accurate, but given all of these are estimates + # anyway its good enough. + room_estimates: Counter[str] = Counter() + + # Return size of the table on disk, including indexes and TOAST. + table_sql = """ + SELECT pg_total_relation_size(?) + """ + + # Get an estimate for the largest rooms and their frequency. + # + # Note: the cast here is a hack to cast from `anyarray` to an actual + # type. This ensures that psycopg2 passes us a back a a Python list. + column_sql = """ + SELECT + most_common_vals::TEXT::TEXT[], most_common_freqs::TEXT::NUMERIC[] + FROM pg_stats + WHERE tablename = ? and attname = 'room_id' + """ + + def get_room_db_size_estimate_txn( + txn: LoggingTransaction, + tables: Collection[str], + ) -> None: + for table in tables: + txn.execute(table_sql, (table,)) + row = txn.fetchone() + assert row is not None + (table_size,) = row + + txn.execute(column_sql, (table,)) + row = txn.fetchone() + assert row is not None + vals, freqs = row + + for room_id, freq in zip(vals, freqs): + room_estimates[room_id] += int(freq * table_size) + + await self.stores.main.db_pool.runInteraction( + "get_room_db_size_estimate_main", + get_room_db_size_estimate_txn, + ( + "event_json", + "events", + "event_search", + "event_edges", + "event_push_actions", + "stream_ordering_to_exterm", + ), + ) + + await self.stores.state.db_pool.runInteraction( + "get_room_db_size_estimate_state", + get_room_db_size_estimate_txn, + ("state_groups_state",), + ) + + return room_estimates.most_common(10) |