summary refs log tree commit diff
path: root/synapse/storage
diff options
context:
space:
mode:
authorDirk Klimpel <5740567+dklimpel@users.noreply.github.com>2020-11-05 19:59:12 +0100
committerGitHub <noreply@github.com>2020-11-05 18:59:12 +0000
commitc3119d1536582c639bf67bf7e3c914935e3bbd7e (patch)
tree63b981fc57cbff22888e6da4061b3e3e20b3ab90 /synapse/storage
parentAdd `displayname` to Shared-Secret Registration for admins (#8722) (diff)
downloadsynapse-c3119d1536582c639bf67bf7e3c914935e3bbd7e.tar.xz
Add an admin API for users' media statistics (#8700)
Add `GET /_synapse/admin/v1/statistics/users/media` to get statisics about local media usage by users.
Related to #6094
It is the first API for statistics.
Goal is to avoid/reduce usage of sql queries like [Wiki analyzing Synapse](https://github.com/matrix-org/synapse/wiki/SQL-for-analyzing-Synapse-PostgreSQL-database-stats)

Signed-off-by: Dirk Klimpel dirk@klimpel.org
Diffstat (limited to 'synapse/storage')
-rw-r--r--synapse/storage/databases/main/stats.py127
1 files changed, 127 insertions, 0 deletions
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 5beb302be3..0cdb3ec1f7 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -16,15 +16,18 @@
 
 import logging
 from collections import Counter
+from enum import Enum
 from itertools import chain
 from typing import Any, Dict, List, Optional, Tuple
 
 from twisted.internet.defer import DeferredLock
 
 from synapse.api.constants import EventTypes, Membership
+from synapse.api.errors import StoreError
 from synapse.storage.database import DatabasePool
 from synapse.storage.databases.main.state_deltas import StateDeltasStore
 from synapse.storage.engines import PostgresEngine
+from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached
 
 logger = logging.getLogger(__name__)
@@ -59,6 +62,23 @@ TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user
 TYPE_TO_ORIGIN_TABLE = {"room": ("rooms", "room_id"), "user": ("users", "name")}
 
 
+class UserSortOrder(Enum):
+    """
+    Enum to define the sorting method used when returning users
+    with get_users_media_usage_paginate
+
+    MEDIA_LENGTH = ordered by size of uploaded media. Smallest to largest.
+    MEDIA_COUNT = ordered by number of uploaded media. Smallest to largest.
+    USER_ID = ordered alphabetically by `user_id`.
+    DISPLAYNAME = ordered alphabetically by `displayname`
+    """
+
+    MEDIA_LENGTH = "media_length"
+    MEDIA_COUNT = "media_count"
+    USER_ID = "user_id"
+    DISPLAYNAME = "displayname"
+
+
 class StatsStore(StateDeltasStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
@@ -882,3 +902,110 @@ class StatsStore(StateDeltasStore):
             complete_with_stream_id=pos,
             absolute_field_overrides={"joined_rooms": joined_rooms},
         )
+
+    async def get_users_media_usage_paginate(
+        self,
+        start: int,
+        limit: int,
+        from_ts: Optional[int] = None,
+        until_ts: Optional[int] = None,
+        order_by: Optional[UserSortOrder] = UserSortOrder.USER_ID.value,
+        direction: Optional[str] = "f",
+        search_term: Optional[str] = None,
+    ) -> Tuple[List[JsonDict], Dict[str, int]]:
+        """Function to retrieve a paginated list of users and their uploaded local media
+        (size and number). This will return a json list of users and the
+        total number of users matching the filter criteria.
+
+        Args:
+            start: offset to begin the query from
+            limit: number of rows to retrieve
+            from_ts: request only media that are created later than this timestamp (ms)
+            until_ts: request only media that are created earlier than this timestamp (ms)
+            order_by: the sort order of the returned list
+            direction: sort ascending or descending
+            search_term: a string to filter user names by
+        Returns:
+            A list of user dicts and an integer representing the total number of
+            users that exist given this query
+        """
+
+        def get_users_media_usage_paginate_txn(txn):
+            filters = []
+            args = [self.hs.config.server_name]
+
+            if search_term:
+                filters.append("(lmr.user_id LIKE ? OR displayname LIKE ?)")
+                args.extend(["@%" + search_term + "%:%", "%" + search_term + "%"])
+
+            if from_ts:
+                filters.append("created_ts >= ?")
+                args.extend([from_ts])
+            if until_ts:
+                filters.append("created_ts <= ?")
+                args.extend([until_ts])
+
+            # Set ordering
+            if UserSortOrder(order_by) == UserSortOrder.MEDIA_LENGTH:
+                order_by_column = "media_length"
+            elif UserSortOrder(order_by) == UserSortOrder.MEDIA_COUNT:
+                order_by_column = "media_count"
+            elif UserSortOrder(order_by) == UserSortOrder.USER_ID:
+                order_by_column = "lmr.user_id"
+            elif UserSortOrder(order_by) == UserSortOrder.DISPLAYNAME:
+                order_by_column = "displayname"
+            else:
+                raise StoreError(
+                    500, "Incorrect value for order_by provided: %s" % order_by
+                )
+
+            if direction == "b":
+                order = "DESC"
+            else:
+                order = "ASC"
+
+            where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
+
+            sql_base = """
+                FROM local_media_repository as lmr
+                LEFT JOIN profiles AS p ON lmr.user_id = '@' || p.user_id || ':' || ?
+                {}
+                GROUP BY lmr.user_id, displayname
+            """.format(
+                where_clause
+            )
+
+            # SQLite does not support SELECT COUNT(*) OVER()
+            sql = """
+                SELECT COUNT(*) FROM (
+                    SELECT lmr.user_id
+                    {sql_base}
+                ) AS count_user_ids
+            """.format(
+                sql_base=sql_base,
+            )
+            txn.execute(sql, args)
+            count = txn.fetchone()[0]
+
+            sql = """
+                SELECT
+                    lmr.user_id,
+                    displayname,
+                    COUNT(lmr.user_id) as media_count,
+                    SUM(media_length) as media_length
+                    {sql_base}
+                ORDER BY {order_by_column} {order}
+                LIMIT ? OFFSET ?
+            """.format(
+                sql_base=sql_base, order_by_column=order_by_column, order=order,
+            )
+
+            args += [limit, start]
+            txn.execute(sql, args)
+            users = self.db_pool.cursor_to_dict(txn)
+
+            return users, count
+
+        return await self.db_pool.runInteraction(
+            "get_users_media_usage_paginate_txn", get_users_media_usage_paginate_txn
+        )