diff options
Diffstat (limited to 'synapse/storage')
-rw-r--r-- | synapse/storage/controllers/__init__.py | 2 | ||||
-rw-r--r-- | synapse/storage/controllers/stats.py | 113 |
2 files changed, 115 insertions, 0 deletions
diff --git a/synapse/storage/controllers/__init__.py b/synapse/storage/controllers/__init__.py index 45101cda7a..0ef8602631 100644 --- a/synapse/storage/controllers/__init__.py +++ b/synapse/storage/controllers/__init__.py @@ -19,6 +19,7 @@ from synapse.storage.controllers.persist_events import ( ) from synapse.storage.controllers.purge_events import PurgeEventsStorageController from synapse.storage.controllers.state import StateStorageController +from synapse.storage.controllers.stats import StatsController from synapse.storage.databases import Databases from synapse.storage.databases.main import DataStore @@ -40,6 +41,7 @@ class StorageControllers: self.purge_events = PurgeEventsStorageController(hs, stores) self.state = StateStorageController(hs, stores) + self.stats = StatsController(hs, stores) self.persistence = None if stores.persist_events: diff --git a/synapse/storage/controllers/stats.py b/synapse/storage/controllers/stats.py new file mode 100644 index 0000000000..988e44c6af --- /dev/null +++ b/synapse/storage/controllers/stats.py @@ -0,0 +1,113 @@ +# Copyright 2023 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from collections import Counter +from typing import TYPE_CHECKING, Collection, List, Tuple + +from synapse.api.errors import SynapseError +from synapse.storage.database import LoggingTransaction +from synapse.storage.databases import Databases +from synapse.storage.engines import PostgresEngine + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class StatsController: + """High level interface for getting statistics.""" + + def __init__(self, hs: "HomeServer", stores: Databases): + self.stores = stores + + async def get_room_db_size_estimate(self) -> List[Tuple[str, int]]: + """Get an estimate of the largest rooms and how much database space they + use, in bytes. + + Only works against PostgreSQL. + + Note: this uses the postgres statistics so is a very rough estimate. + """ + + # Note: We look at both tables on the main and state databases. + if not isinstance(self.stores.main.database_engine, PostgresEngine): + raise SynapseError(400, "Endpoint requires using PostgreSQL") + + if not isinstance(self.stores.state.database_engine, PostgresEngine): + raise SynapseError(400, "Endpoint requires using PostgreSQL") + + # For each "large" table, we go through and get the largest rooms + # and an estimate of how much space they take. We can then sum the + # results and return the top 10. + # + # This isn't the most accurate, but given all of these are estimates + # anyway its good enough. + room_estimates: Counter[str] = Counter() + + # Return size of the table on disk, including indexes and TOAST. + table_sql = """ + SELECT pg_total_relation_size(?) + """ + + # Get an estimate for the largest rooms and their frequency. + # + # Note: the cast here is a hack to cast from `anyarray` to an actual + # type. This ensures that psycopg2 passes us a back a a Python list. + column_sql = """ + SELECT + most_common_vals::TEXT::TEXT[], most_common_freqs::TEXT::NUMERIC[] + FROM pg_stats + WHERE tablename = ? and attname = 'room_id' + """ + + def get_room_db_size_estimate_txn( + txn: LoggingTransaction, + tables: Collection[str], + ) -> None: + for table in tables: + txn.execute(table_sql, (table,)) + row = txn.fetchone() + assert row is not None + (table_size,) = row + + txn.execute(column_sql, (table,)) + row = txn.fetchone() + assert row is not None + vals, freqs = row + + for room_id, freq in zip(vals, freqs): + room_estimates[room_id] += int(freq * table_size) + + await self.stores.main.db_pool.runInteraction( + "get_room_db_size_estimate_main", + get_room_db_size_estimate_txn, + ( + "event_json", + "events", + "event_search", + "event_edges", + "event_push_actions", + "stream_ordering_to_exterm", + ), + ) + + await self.stores.state.db_pool.runInteraction( + "get_room_db_size_estimate_state", + get_room_db_size_estimate_txn, + ("state_groups_state",), + ) + + return room_estimates.most_common(10) |