summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorDirk Klimpel <5740567+dklimpel@users.noreply.github.com>2020-11-05 19:59:12 +0100
committerGitHub <noreply@github.com>2020-11-05 18:59:12 +0000
commitc3119d1536582c639bf67bf7e3c914935e3bbd7e (patch)
tree63b981fc57cbff22888e6da4061b3e3e20b3ab90 /synapse
parentAdd `displayname` to Shared-Secret Registration for admins (#8722) (diff)
downloadsynapse-c3119d1536582c639bf67bf7e3c914935e3bbd7e.tar.xz
Add an admin API for users' media statistics (#8700)
Add `GET /_synapse/admin/v1/statistics/users/media` to get statisics about local media usage by users.
Related to #6094
It is the first API for statistics.
Goal is to avoid/reduce usage of sql queries like [Wiki analyzing Synapse](https://github.com/matrix-org/synapse/wiki/SQL-for-analyzing-Synapse-PostgreSQL-database-stats)

Signed-off-by: Dirk Klimpel dirk@klimpel.org
Diffstat (limited to 'synapse')
-rw-r--r--synapse/rest/admin/__init__.py2
-rw-r--r--synapse/rest/admin/statistics.py122
-rw-r--r--synapse/storage/databases/main/stats.py127
3 files changed, 251 insertions, 0 deletions
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index fa7e9e4043..2a4f7a1740 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -47,6 +47,7 @@ from synapse.rest.admin.rooms import (
     ShutdownRoomRestServlet,
 )
 from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet
+from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet
 from synapse.rest.admin.users import (
     AccountValidityRenewServlet,
     DeactivateAccountRestServlet,
@@ -227,6 +228,7 @@ def register_servlets(hs, http_server):
     DeviceRestServlet(hs).register(http_server)
     DevicesRestServlet(hs).register(http_server)
     DeleteDevicesRestServlet(hs).register(http_server)
+    UserMediaStatisticsRestServlet(hs).register(http_server)
     EventReportDetailRestServlet(hs).register(http_server)
     EventReportsRestServlet(hs).register(http_server)
     PushersRestServlet(hs).register(http_server)
diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py
new file mode 100644
index 0000000000..f2490e382d
--- /dev/null
+++ b/synapse/rest/admin/statistics.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Dirk Klimpel
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from typing import TYPE_CHECKING, Tuple
+
+from synapse.api.errors import Codes, SynapseError
+from synapse.http.servlet import RestServlet, parse_integer, parse_string
+from synapse.http.site import SynapseRequest
+from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin
+from synapse.storage.databases.main.stats import UserSortOrder
+from synapse.types import JsonDict
+
+if TYPE_CHECKING:
+    from synapse.server import HomeServer
+
+logger = logging.getLogger(__name__)
+
+
+class UserMediaStatisticsRestServlet(RestServlet):
+    """
+    Get statistics about uploaded media by users.
+    """
+
+    PATTERNS = admin_patterns("/statistics/users/media$")
+
+    def __init__(self, hs: "HomeServer"):
+        self.hs = hs
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastore()
+
+    async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self.auth, request)
+
+        order_by = parse_string(
+            request, "order_by", default=UserSortOrder.USER_ID.value
+        )
+        if order_by not in (
+            UserSortOrder.MEDIA_LENGTH.value,
+            UserSortOrder.MEDIA_COUNT.value,
+            UserSortOrder.USER_ID.value,
+            UserSortOrder.DISPLAYNAME.value,
+        ):
+            raise SynapseError(
+                400,
+                "Unknown value for order_by: %s" % (order_by,),
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        start = parse_integer(request, "from", default=0)
+        if start < 0:
+            raise SynapseError(
+                400,
+                "Query parameter from must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        limit = parse_integer(request, "limit", default=100)
+        if limit < 0:
+            raise SynapseError(
+                400,
+                "Query parameter limit must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        from_ts = parse_integer(request, "from_ts", default=0)
+        if from_ts < 0:
+            raise SynapseError(
+                400,
+                "Query parameter from_ts must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        until_ts = parse_integer(request, "until_ts")
+        if until_ts is not None:
+            if until_ts < 0:
+                raise SynapseError(
+                    400,
+                    "Query parameter until_ts must be a string representing a positive integer.",
+                    errcode=Codes.INVALID_PARAM,
+                )
+            if until_ts <= from_ts:
+                raise SynapseError(
+                    400,
+                    "Query parameter until_ts must be greater than from_ts.",
+                    errcode=Codes.INVALID_PARAM,
+                )
+
+        search_term = parse_string(request, "search_term")
+        if search_term == "":
+            raise SynapseError(
+                400,
+                "Query parameter search_term cannot be an empty string.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        direction = parse_string(request, "dir", default="f")
+        if direction not in ("f", "b"):
+            raise SynapseError(
+                400, "Unknown direction: %s" % (direction,), errcode=Codes.INVALID_PARAM
+            )
+
+        users_media, total = await self.store.get_users_media_usage_paginate(
+            start, limit, from_ts, until_ts, order_by, direction, search_term
+        )
+        ret = {"users": users_media, "total": total}
+        if (start + limit) < total:
+            ret["next_token"] = start + len(users_media)
+
+        return 200, ret
diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 5beb302be3..0cdb3ec1f7 100644
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -16,15 +16,18 @@
 
 import logging
 from collections import Counter
+from enum import Enum
 from itertools import chain
 from typing import Any, Dict, List, Optional, Tuple
 
 from twisted.internet.defer import DeferredLock
 
 from synapse.api.constants import EventTypes, Membership
+from synapse.api.errors import StoreError
 from synapse.storage.database import DatabasePool
 from synapse.storage.databases.main.state_deltas import StateDeltasStore
 from synapse.storage.engines import PostgresEngine
+from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached
 
 logger = logging.getLogger(__name__)
@@ -59,6 +62,23 @@ TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user
 TYPE_TO_ORIGIN_TABLE = {"room": ("rooms", "room_id"), "user": ("users", "name")}
 
 
+class UserSortOrder(Enum):
+    """
+    Enum to define the sorting method used when returning users
+    with get_users_media_usage_paginate
+
+    MEDIA_LENGTH = ordered by size of uploaded media. Smallest to largest.
+    MEDIA_COUNT = ordered by number of uploaded media. Smallest to largest.
+    USER_ID = ordered alphabetically by `user_id`.
+    DISPLAYNAME = ordered alphabetically by `displayname`
+    """
+
+    MEDIA_LENGTH = "media_length"
+    MEDIA_COUNT = "media_count"
+    USER_ID = "user_id"
+    DISPLAYNAME = "displayname"
+
+
 class StatsStore(StateDeltasStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
@@ -882,3 +902,110 @@ class StatsStore(StateDeltasStore):
             complete_with_stream_id=pos,
             absolute_field_overrides={"joined_rooms": joined_rooms},
         )
+
+    async def get_users_media_usage_paginate(
+        self,
+        start: int,
+        limit: int,
+        from_ts: Optional[int] = None,
+        until_ts: Optional[int] = None,
+        order_by: Optional[UserSortOrder] = UserSortOrder.USER_ID.value,
+        direction: Optional[str] = "f",
+        search_term: Optional[str] = None,
+    ) -> Tuple[List[JsonDict], Dict[str, int]]:
+        """Function to retrieve a paginated list of users and their uploaded local media
+        (size and number). This will return a json list of users and the
+        total number of users matching the filter criteria.
+
+        Args:
+            start: offset to begin the query from
+            limit: number of rows to retrieve
+            from_ts: request only media that are created later than this timestamp (ms)
+            until_ts: request only media that are created earlier than this timestamp (ms)
+            order_by: the sort order of the returned list
+            direction: sort ascending or descending
+            search_term: a string to filter user names by
+        Returns:
+            A list of user dicts and an integer representing the total number of
+            users that exist given this query
+        """
+
+        def get_users_media_usage_paginate_txn(txn):
+            filters = []
+            args = [self.hs.config.server_name]
+
+            if search_term:
+                filters.append("(lmr.user_id LIKE ? OR displayname LIKE ?)")
+                args.extend(["@%" + search_term + "%:%", "%" + search_term + "%"])
+
+            if from_ts:
+                filters.append("created_ts >= ?")
+                args.extend([from_ts])
+            if until_ts:
+                filters.append("created_ts <= ?")
+                args.extend([until_ts])
+
+            # Set ordering
+            if UserSortOrder(order_by) == UserSortOrder.MEDIA_LENGTH:
+                order_by_column = "media_length"
+            elif UserSortOrder(order_by) == UserSortOrder.MEDIA_COUNT:
+                order_by_column = "media_count"
+            elif UserSortOrder(order_by) == UserSortOrder.USER_ID:
+                order_by_column = "lmr.user_id"
+            elif UserSortOrder(order_by) == UserSortOrder.DISPLAYNAME:
+                order_by_column = "displayname"
+            else:
+                raise StoreError(
+                    500, "Incorrect value for order_by provided: %s" % order_by
+                )
+
+            if direction == "b":
+                order = "DESC"
+            else:
+                order = "ASC"
+
+            where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else ""
+
+            sql_base = """
+                FROM local_media_repository as lmr
+                LEFT JOIN profiles AS p ON lmr.user_id = '@' || p.user_id || ':' || ?
+                {}
+                GROUP BY lmr.user_id, displayname
+            """.format(
+                where_clause
+            )
+
+            # SQLite does not support SELECT COUNT(*) OVER()
+            sql = """
+                SELECT COUNT(*) FROM (
+                    SELECT lmr.user_id
+                    {sql_base}
+                ) AS count_user_ids
+            """.format(
+                sql_base=sql_base,
+            )
+            txn.execute(sql, args)
+            count = txn.fetchone()[0]
+
+            sql = """
+                SELECT
+                    lmr.user_id,
+                    displayname,
+                    COUNT(lmr.user_id) as media_count,
+                    SUM(media_length) as media_length
+                    {sql_base}
+                ORDER BY {order_by_column} {order}
+                LIMIT ? OFFSET ?
+            """.format(
+                sql_base=sql_base, order_by_column=order_by_column, order=order,
+            )
+
+            args += [limit, start]
+            txn.execute(sql, args)
+            users = self.db_pool.cursor_to_dict(txn)
+
+            return users, count
+
+        return await self.db_pool.runInteraction(
+            "get_users_media_usage_paginate_txn", get_users_media_usage_paginate_txn
+        )