diff --git a/synapse/storage/controllers/__init__.py b/synapse/storage/controllers/__init__.py
index 45101cda7a..0ef8602631 100644
--- a/synapse/storage/controllers/__init__.py
+++ b/synapse/storage/controllers/__init__.py
@@ -19,6 +19,7 @@ from synapse.storage.controllers.persist_events import (
)
from synapse.storage.controllers.purge_events import PurgeEventsStorageController
from synapse.storage.controllers.state import StateStorageController
+from synapse.storage.controllers.stats import StatsController
from synapse.storage.databases import Databases
from synapse.storage.databases.main import DataStore
@@ -40,6 +41,7 @@ class StorageControllers:
self.purge_events = PurgeEventsStorageController(hs, stores)
self.state = StateStorageController(hs, stores)
+ self.stats = StatsController(hs, stores)
self.persistence = None
if stores.persist_events:
diff --git a/synapse/storage/controllers/stats.py b/synapse/storage/controllers/stats.py
new file mode 100644
index 0000000000..988e44c6af
--- /dev/null
+++ b/synapse/storage/controllers/stats.py
@@ -0,0 +1,113 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from collections import Counter
+from typing import TYPE_CHECKING, Collection, List, Tuple
+
+from synapse.api.errors import SynapseError
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.databases import Databases
+from synapse.storage.engines import PostgresEngine
+
+if TYPE_CHECKING:
+ from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class StatsController:
+ """High level interface for getting statistics."""
+
+ def __init__(self, hs: "HomeServer", stores: Databases):
+ self.stores = stores
+
+ async def get_room_db_size_estimate(self) -> List[Tuple[str, int]]:
+ """Get an estimate of the largest rooms and how much database space they
+ use, in bytes.
+
+ Only works against PostgreSQL.
+
+ Note: this uses the postgres statistics so is a very rough estimate.
+ """
+
+ # Note: We look at both tables on the main and state databases.
+ if not isinstance(self.stores.main.database_engine, PostgresEngine):
+ raise SynapseError(400, "Endpoint requires using PostgreSQL")
+
+ if not isinstance(self.stores.state.database_engine, PostgresEngine):
+ raise SynapseError(400, "Endpoint requires using PostgreSQL")
+
+ # For each "large" table, we go through and get the largest rooms
+ # and an estimate of how much space they take. We can then sum the
+ # results and return the top 10.
+ #
+ # This isn't the most accurate, but given all of these are estimates
+ # anyway its good enough.
+ room_estimates: Counter[str] = Counter()
+
+ # Return size of the table on disk, including indexes and TOAST.
+ table_sql = """
+ SELECT pg_total_relation_size(?)
+ """
+
+ # Get an estimate for the largest rooms and their frequency.
+ #
+ # Note: the cast here is a hack to cast from `anyarray` to an actual
+ # type. This ensures that psycopg2 passes us a back a a Python list.
+ column_sql = """
+ SELECT
+ most_common_vals::TEXT::TEXT[], most_common_freqs::TEXT::NUMERIC[]
+ FROM pg_stats
+ WHERE tablename = ? and attname = 'room_id'
+ """
+
+ def get_room_db_size_estimate_txn(
+ txn: LoggingTransaction,
+ tables: Collection[str],
+ ) -> None:
+ for table in tables:
+ txn.execute(table_sql, (table,))
+ row = txn.fetchone()
+ assert row is not None
+ (table_size,) = row
+
+ txn.execute(column_sql, (table,))
+ row = txn.fetchone()
+ assert row is not None
+ vals, freqs = row
+
+ for room_id, freq in zip(vals, freqs):
+ room_estimates[room_id] += int(freq * table_size)
+
+ await self.stores.main.db_pool.runInteraction(
+ "get_room_db_size_estimate_main",
+ get_room_db_size_estimate_txn,
+ (
+ "event_json",
+ "events",
+ "event_search",
+ "event_edges",
+ "event_push_actions",
+ "stream_ordering_to_exterm",
+ ),
+ )
+
+ await self.stores.state.db_pool.runInteraction(
+ "get_room_db_size_estimate_state",
+ get_room_db_size_estimate_txn,
+ ("state_groups_state",),
+ )
+
+ return room_estimates.most_common(10)
|