summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2023-04-26 17:00:11 +0100
committerGitHub <noreply@github.com>2023-04-26 16:00:11 +0000
commit9900f7c231f8af536fce229117b0a406dc629293 (patch)
treea688fbc4cf51b64fa4821d19e200f8f50e3a0427 /synapse/storage
parentUpdate the `check_schema_delta` script to account for when the schema version... (diff)
downloadsynapse-9900f7c231f8af536fce229117b0a406dc629293.tar.xz
Add admin endpoint to query room sizes (#15482)
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/controllers/__init__.py2
-rw-r--r--synapse/storage/controllers/stats.py113
2 files changed, 115 insertions, 0 deletions
diff --git a/synapse/storage/controllers/__init__.py b/synapse/storage/controllers/__init__.py
index 45101cda7a..0ef8602631 100644
--- a/synapse/storage/controllers/__init__.py
+++ b/synapse/storage/controllers/__init__.py
@@ -19,6 +19,7 @@ from synapse.storage.controllers.persist_events import (
 )
 from synapse.storage.controllers.purge_events import PurgeEventsStorageController
 from synapse.storage.controllers.state import StateStorageController
+from synapse.storage.controllers.stats import StatsController
 from synapse.storage.databases import Databases
 from synapse.storage.databases.main import DataStore
 
@@ -40,6 +41,7 @@ class StorageControllers:
 
         self.purge_events = PurgeEventsStorageController(hs, stores)
         self.state = StateStorageController(hs, stores)
+        self.stats = StatsController(hs, stores)
 
         self.persistence = None
         if stores.persist_events:
diff --git a/synapse/storage/controllers/stats.py b/synapse/storage/controllers/stats.py
new file mode 100644
index 0000000000..988e44c6af
--- /dev/null
+++ b/synapse/storage/controllers/stats.py
@@ -0,0 +1,113 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from collections import Counter
+from typing import TYPE_CHECKING, Collection, List, Tuple
+
+from synapse.api.errors import SynapseError
+from synapse.storage.database import LoggingTransaction
+from synapse.storage.databases import Databases
+from synapse.storage.engines import PostgresEngine
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class StatsController:
+    """High level interface for getting statistics."""
+
+    def __init__(self, hs: "HomeServer", stores: Databases):
+        self.stores = stores
+
+    async def get_room_db_size_estimate(self) -> List[Tuple[str, int]]:
+        """Get an estimate of the largest rooms and how much database space they
+        use, in bytes.
+
+        Only works against PostgreSQL.
+
+        Note: this uses the postgres statistics so is a very rough estimate.
+        """
+
+        # Note: We look at both tables on the main and state databases.
+        if not isinstance(self.stores.main.database_engine, PostgresEngine):
+            raise SynapseError(400, "Endpoint requires using PostgreSQL")
+
+        if not isinstance(self.stores.state.database_engine, PostgresEngine):
+            raise SynapseError(400, "Endpoint requires using PostgreSQL")
+
+        # For each "large" table, we go through and get the largest rooms
+        # and an estimate of how much space they take. We can then sum the
+        # results and return the top 10.
+        #
+        # This isn't the most accurate, but given all of these are estimates
+        # anyway its good enough.
+        room_estimates: Counter[str] = Counter()
+
+        # Return size of the table on disk, including indexes and TOAST.
+        table_sql = """
+            SELECT pg_total_relation_size(?)
+        """
+
+        # Get an estimate for the largest rooms and their frequency.
+        #
+        # Note: the cast here is a hack to cast from `anyarray` to an actual
+        # type. This ensures that psycopg2 passes us a back a a Python list.
+        column_sql = """
+            SELECT
+                most_common_vals::TEXT::TEXT[], most_common_freqs::TEXT::NUMERIC[]
+            FROM pg_stats
+            WHERE tablename = ? and attname = 'room_id'
+        """
+
+        def get_room_db_size_estimate_txn(
+            txn: LoggingTransaction,
+            tables: Collection[str],
+        ) -> None:
+            for table in tables:
+                txn.execute(table_sql, (table,))
+                row = txn.fetchone()
+                assert row is not None
+                (table_size,) = row
+
+                txn.execute(column_sql, (table,))
+                row = txn.fetchone()
+                assert row is not None
+                vals, freqs = row
+
+                for room_id, freq in zip(vals, freqs):
+                    room_estimates[room_id] += int(freq * table_size)
+
+        await self.stores.main.db_pool.runInteraction(
+            "get_room_db_size_estimate_main",
+            get_room_db_size_estimate_txn,
+            (
+                "event_json",
+                "events",
+                "event_search",
+                "event_edges",
+                "event_push_actions",
+                "stream_ordering_to_exterm",
+            ),
+        )
+
+        await self.stores.state.db_pool.runInteraction(
+            "get_room_db_size_estimate_state",
+            get_room_db_size_estimate_txn,
+            ("state_groups_state",),
+        )
+
+        return room_estimates.most_common(10)