summary refs log tree commit diff
diff options
context:
space:
mode:
authorDirk Klimpel <5740567+dklimpel@users.noreply.github.com>2020-10-27 15:12:31 +0100
committerGitHub <noreply@github.com>2020-10-27 14:12:31 +0000
commit9b7c28283abf62378cecbde3523b5709448e4140 (patch)
treea8965be5afe0bb9e144dd932b858cf0ad25feb92
parentMerge branch 'master' into develop (diff)
downloadsynapse-9b7c28283abf62378cecbde3523b5709448e4140.tar.xz
Add admin API to list users' local media (#8647)
Add admin API `GET /_synapse/admin/v1/users/<user_id>/media` to get information of users' uploaded files.
-rw-r--r--changelog.d/8647.feature1
-rw-r--r--docs/admin_api/user_admin_api.rst83
-rw-r--r--synapse/rest/admin/__init__.py2
-rw-r--r--synapse/rest/admin/users.py67
-rw-r--r--synapse/storage/databases/main/events_bg_updates.py7
-rw-r--r--synapse/storage/databases/main/media_repository.py51
-rw-r--r--synapse/storage/databases/main/schema/delta/58/22users_have_local_media.sql2
-rw-r--r--tests/rest/admin/test_user.py282
8 files changed, 494 insertions, 1 deletions
diff --git a/changelog.d/8647.feature b/changelog.d/8647.feature
new file mode 100644
index 0000000000..79e98f6e90
--- /dev/null
+++ b/changelog.d/8647.feature
@@ -0,0 +1 @@
+Add an admin API `GET /_synapse/admin/v1/users/<user_id>/media` to get information about uploaded media. Contributed by @dklimpel.
\ No newline at end of file
diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst
index 0f3d99c826..636fc284e4 100644
--- a/docs/admin_api/user_admin_api.rst
+++ b/docs/admin_api/user_admin_api.rst
@@ -341,6 +341,89 @@ The following fields are returned in the JSON response body:
 - ``total`` - Number of rooms.
 
 
+List media of an user
+================================
+Gets a list of all local media that a specific ``user_id`` has created.
+The response is ordered by creation date descending and media ID descending.
+The newest media is on top.
+
+The API is::
+
+  GET /_synapse/admin/v1/users/<user_id>/media
+
+To use it, you will need to authenticate by providing an ``access_token`` for a
+server admin: see `README.rst <README.rst>`_.
+
+A response body like the following is returned:
+
+.. code:: json
+
+    {
+      "media": [
+        {
+          "created_ts": 100400,
+          "last_access_ts": null,
+          "media_id": "qXhyRzulkwLsNHTbpHreuEgo",
+          "media_length": 67,
+          "media_type": "image/png",
+          "quarantined_by": null,
+          "safe_from_quarantine": false,
+          "upload_name": "test1.png"
+        },
+        {
+          "created_ts": 200400,
+          "last_access_ts": null,
+          "media_id": "FHfiSnzoINDatrXHQIXBtahw",
+          "media_length": 67,
+          "media_type": "image/png",
+          "quarantined_by": null,
+          "safe_from_quarantine": false,
+          "upload_name": "test2.png"
+        }
+      ],
+      "next_token": 3,
+      "total": 2
+    }
+
+To paginate, check for ``next_token`` and if present, call the endpoint again
+with ``from`` set to the value of ``next_token``. This will return a new page.
+
+If the endpoint does not return a ``next_token`` then there are no more
+reports to paginate through.
+
+**Parameters**
+
+The following parameters should be set in the URL:
+
+- ``user_id`` - string - fully qualified: for example, ``@user:server.com``.
+- ``limit``: string representing a positive integer - Is optional but is used for pagination,
+  denoting the maximum number of items to return in this call. Defaults to ``100``.
+- ``from``: string representing a positive integer - Is optional but used for pagination,
+  denoting the offset in the returned results. This should be treated as an opaque value and
+  not explicitly set to anything other than the return value of ``next_token`` from a previous call.
+  Defaults to ``0``.
+
+**Response**
+
+The following fields are returned in the JSON response body:
+
+- ``media`` - An array of objects, each containing information about a media.
+  Media objects contain the following fields:
+
+  - ``created_ts`` - integer - Timestamp when the content was uploaded in ms.
+  - ``last_access_ts`` - integer - Timestamp when the content was last accessed in ms.
+  - ``media_id`` - string - The id used to refer to the media.
+  - ``media_length`` - integer - Length of the media in bytes.
+  - ``media_type`` - string - The MIME-type of the media.
+  - ``quarantined_by`` - string - The user ID that initiated the quarantine request
+    for this media.
+
+  - ``safe_from_quarantine`` - bool - Status if this media is safe from quarantining.
+  - ``upload_name`` - string - The name the media was uploaded with.
+
+- ``next_token``: integer - Indication for pagination. See above.
+- ``total`` - integer - Total number of media.
+
 User devices
 ============
 
diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index df14bdf26e..a79996cae1 100644
--- a/synapse/rest/admin/__init__.py
+++ b/synapse/rest/admin/__init__.py
@@ -53,6 +53,7 @@ from synapse.rest.admin.users import (
     ResetPasswordRestServlet,
     SearchUsersRestServlet,
     UserAdminServlet,
+    UserMediaRestServlet,
     UserMembershipRestServlet,
     UserRegisterServlet,
     UserRestServletV2,
@@ -218,6 +219,7 @@ def register_servlets(hs, http_server):
     SendServerNoticeServlet(hs).register(http_server)
     VersionServlet(hs).register(http_server)
     UserAdminServlet(hs).register(http_server)
+    UserMediaRestServlet(hs).register(http_server)
     UserMembershipRestServlet(hs).register(http_server)
     UserRestServletV2(hs).register(http_server)
     UsersRestServletV2(hs).register(http_server)
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index e71d9b0e1c..933bb45346 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -16,6 +16,7 @@ import hashlib
 import hmac
 import logging
 from http import HTTPStatus
+from typing import Tuple
 
 from synapse.api.constants import UserTypes
 from synapse.api.errors import Codes, NotFoundError, SynapseError
@@ -27,13 +28,14 @@ from synapse.http.servlet import (
     parse_json_object_from_request,
     parse_string,
 )
+from synapse.http.site import SynapseRequest
 from synapse.rest.admin._base import (
     admin_patterns,
     assert_requester_is_admin,
     assert_user_is_admin,
     historical_admin_path_patterns,
 )
-from synapse.types import UserID
+from synapse.types import JsonDict, UserID
 
 logger = logging.getLogger(__name__)
 
@@ -709,3 +711,66 @@ class UserMembershipRestServlet(RestServlet):
         room_ids = await self.store.get_rooms_for_user(user_id)
         ret = {"joined_rooms": list(room_ids), "total": len(room_ids)}
         return 200, ret
+
+
+class UserMediaRestServlet(RestServlet):
+    """
+    Gets information about all uploaded local media for a specific `user_id`.
+
+    Example:
+        http://localhost:8008/_synapse/admin/v1/users/
+        @user:server/media
+
+    Args:
+        The parameters `from` and `limit` are required for pagination.
+        By default, a `limit` of 100 is used.
+    Returns:
+        A list of media and an integer representing the total number of
+        media that exist given for this user
+    """
+
+    PATTERNS = admin_patterns("/users/(?P<user_id>[^/]+)/media$")
+
+    def __init__(self, hs):
+        self.is_mine = hs.is_mine
+        self.auth = hs.get_auth()
+        self.store = hs.get_datastore()
+
+    async def on_GET(
+        self, request: SynapseRequest, user_id: str
+    ) -> Tuple[int, JsonDict]:
+        await assert_requester_is_admin(self.auth, request)
+
+        if not self.is_mine(UserID.from_string(user_id)):
+            raise SynapseError(400, "Can only lookup local users")
+
+        user = await self.store.get_user_by_id(user_id)
+        if user is None:
+            raise NotFoundError("Unknown user")
+
+        start = parse_integer(request, "from", default=0)
+        limit = parse_integer(request, "limit", default=100)
+
+        if start < 0:
+            raise SynapseError(
+                400,
+                "Query parameter from must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        if limit < 0:
+            raise SynapseError(
+                400,
+                "Query parameter limit must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        media, total = await self.store.get_local_media_by_user_paginate(
+            start, limit, user_id
+        )
+
+        ret = {"media": media, "total": total}
+        if (start + limit) < total:
+            ret["next_token"] = start + len(media)
+
+        return 200, ret
diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py
index 5e4af2eb51..97b6754846 100644
--- a/synapse/storage/databases/main/events_bg_updates.py
+++ b/synapse/storage/databases/main/events_bg_updates.py
@@ -92,6 +92,13 @@ class EventsBackgroundUpdatesStore(SQLBaseStore):
             where_clause="NOT have_censored",
         )
 
+        self.db_pool.updates.register_background_index_update(
+            "users_have_local_media",
+            index_name="users_have_local_media",
+            table="local_media_repository",
+            columns=["user_id", "created_ts"],
+        )
+
     async def _background_reindex_fields_sender(self, progress, batch_size):
         target_min_stream_id = progress["target_min_stream_id_inclusive"]
         max_stream_id = progress["max_stream_id_exclusive"]
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index 7ef5f1bf2b..daf57675d8 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -116,6 +116,57 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             desc="get_local_media",
         )
 
+    async def get_local_media_by_user_paginate(
+        self, start: int, limit: int, user_id: str
+    ) -> Tuple[List[Dict[str, Any]], int]:
+        """Get a paginated list of metadata for a local piece of media
+        which an user_id has uploaded
+
+        Args:
+            start: offset in the list
+            limit: maximum amount of media_ids to retrieve
+            user_id: fully-qualified user id
+        Returns:
+            A paginated list of all metadata of user's media,
+            plus the total count of all the user's media
+        """
+
+        def get_local_media_by_user_paginate_txn(txn):
+
+            args = [user_id]
+            sql = """
+                SELECT COUNT(*) as total_media
+                FROM local_media_repository
+                WHERE user_id = ?
+            """
+            txn.execute(sql, args)
+            count = txn.fetchone()[0]
+
+            sql = """
+                SELECT
+                    "media_id",
+                    "media_type",
+                    "media_length",
+                    "upload_name",
+                    "created_ts",
+                    "last_access_ts",
+                    "quarantined_by",
+                    "safe_from_quarantine"
+                FROM local_media_repository
+                WHERE user_id = ?
+                ORDER BY created_ts DESC, media_id DESC
+                LIMIT ? OFFSET ?
+            """
+
+            args += [limit, start]
+            txn.execute(sql, args)
+            media = self.db_pool.cursor_to_dict(txn)
+            return media, count
+
+        return await self.db_pool.runInteraction(
+            "get_local_media_by_user_paginate_txn", get_local_media_by_user_paginate_txn
+        )
+
     async def get_local_media_before(
         self, before_ts: int, size_gt: int, keep_profiles: bool,
     ) -> Optional[List[str]]:
diff --git a/synapse/storage/databases/main/schema/delta/58/22users_have_local_media.sql b/synapse/storage/databases/main/schema/delta/58/22users_have_local_media.sql
new file mode 100644
index 0000000000..a2842687f1
--- /dev/null
+++ b/synapse/storage/databases/main/schema/delta/58/22users_have_local_media.sql
@@ -0,0 +1,2 @@
+INSERT INTO background_updates (update_name, progress_json) VALUES
+  ('users_have_local_media', '{}');
\ No newline at end of file
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index d4b7ae21d1..e815b92329 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -17,6 +17,7 @@ import hashlib
 import hmac
 import json
 import urllib.parse
+from binascii import unhexlify
 
 from mock import Mock
 
@@ -1115,3 +1116,284 @@ class UserMembershipRestTestCase(unittest.HomeserverTestCase):
         self.assertEqual(200, channel.code, msg=channel.json_body)
         self.assertEqual(number_rooms, channel.json_body["total"])
         self.assertEqual(number_rooms, len(channel.json_body["joined_rooms"]))
+
+
+class UserMediaRestTestCase(unittest.HomeserverTestCase):
+
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor, clock, hs):
+        self.store = hs.get_datastore()
+        self.media_repo = hs.get_media_repository_resource()
+
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.other_user = self.register_user("user", "pass")
+        self.url = "/_synapse/admin/v1/users/%s/media" % urllib.parse.quote(
+            self.other_user
+        )
+
+    def test_no_auth(self):
+        """
+        Try to list media of an user without authentication.
+        """
+        request, channel = self.make_request("GET", self.url, b"{}")
+        self.render(request)
+
+        self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
+
+    def test_requester_is_no_admin(self):
+        """
+        If the user is not a server admin, an error is returned.
+        """
+        other_user_token = self.login("user", "pass")
+
+        request, channel = self.make_request(
+            "GET", self.url, access_token=other_user_token,
+        )
+        self.render(request)
+
+        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
+
+    def test_user_does_not_exist(self):
+        """
+        Tests that a lookup for a user that does not exist returns a 404
+        """
+        url = "/_synapse/admin/v1/users/@unknown_person:test/media"
+        request, channel = self.make_request(
+            "GET", url, access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(404, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
+
+    def test_user_is_not_local(self):
+        """
+        Tests that a lookup for a user that is not a local returns a 400
+        """
+        url = "/_synapse/admin/v1/users/@unknown_person:unknown_domain/media"
+
+        request, channel = self.make_request(
+            "GET", url, access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, channel.code, msg=channel.json_body)
+        self.assertEqual("Can only lookup local users", channel.json_body["error"])
+
+    def test_limit(self):
+        """
+        Testing list of media with limit
+        """
+
+        number_media = 20
+        other_user_tok = self.login("user", "pass")
+        self._create_media(other_user_tok, number_media)
+
+        request, channel = self.make_request(
+            "GET", self.url + "?limit=5", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(channel.json_body["total"], number_media)
+        self.assertEqual(len(channel.json_body["media"]), 5)
+        self.assertEqual(channel.json_body["next_token"], 5)
+        self._check_fields(channel.json_body["media"])
+
+    def test_from(self):
+        """
+        Testing list of media with a defined starting point (from)
+        """
+
+        number_media = 20
+        other_user_tok = self.login("user", "pass")
+        self._create_media(other_user_tok, number_media)
+
+        request, channel = self.make_request(
+            "GET", self.url + "?from=5", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(channel.json_body["total"], number_media)
+        self.assertEqual(len(channel.json_body["media"]), 15)
+        self.assertNotIn("next_token", channel.json_body)
+        self._check_fields(channel.json_body["media"])
+
+    def test_limit_and_from(self):
+        """
+        Testing list of media with a defined starting point and limit
+        """
+
+        number_media = 20
+        other_user_tok = self.login("user", "pass")
+        self._create_media(other_user_tok, number_media)
+
+        request, channel = self.make_request(
+            "GET", self.url + "?from=5&limit=10", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(channel.json_body["total"], number_media)
+        self.assertEqual(channel.json_body["next_token"], 15)
+        self.assertEqual(len(channel.json_body["media"]), 10)
+        self._check_fields(channel.json_body["media"])
+
+    def test_limit_is_negative(self):
+        """
+        Testing that a negative limit parameter returns a 400
+        """
+
+        request, channel = self.make_request(
+            "GET", self.url + "?limit=-5", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
+
+    def test_from_is_negative(self):
+        """
+        Testing that a negative from parameter returns a 400
+        """
+
+        request, channel = self.make_request(
+            "GET", self.url + "?from=-5", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
+
+    def test_next_token(self):
+        """
+        Testing that `next_token` appears at the right place
+        """
+
+        number_media = 20
+        other_user_tok = self.login("user", "pass")
+        self._create_media(other_user_tok, number_media)
+
+        #  `next_token` does not appear
+        # Number of results is the number of entries
+        request, channel = self.make_request(
+            "GET", self.url + "?limit=20", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(channel.json_body["total"], number_media)
+        self.assertEqual(len(channel.json_body["media"]), number_media)
+        self.assertNotIn("next_token", channel.json_body)
+
+        #  `next_token` does not appear
+        # Number of max results is larger than the number of entries
+        request, channel = self.make_request(
+            "GET", self.url + "?limit=21", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(channel.json_body["total"], number_media)
+        self.assertEqual(len(channel.json_body["media"]), number_media)
+        self.assertNotIn("next_token", channel.json_body)
+
+        #  `next_token` does appear
+        # Number of max results is smaller than the number of entries
+        request, channel = self.make_request(
+            "GET", self.url + "?limit=19", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(channel.json_body["total"], number_media)
+        self.assertEqual(len(channel.json_body["media"]), 19)
+        self.assertEqual(channel.json_body["next_token"], 19)
+
+        # Check
+        # Set `from` to value of `next_token` for request remaining entries
+        #  `next_token` does not appear
+        request, channel = self.make_request(
+            "GET", self.url + "?from=19", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(channel.json_body["total"], number_media)
+        self.assertEqual(len(channel.json_body["media"]), 1)
+        self.assertNotIn("next_token", channel.json_body)
+
+    def test_user_has_no_media(self):
+        """
+        Tests that a normal lookup for media is successfully
+        if user has no media created
+        """
+
+        request, channel = self.make_request(
+            "GET", self.url, access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(0, channel.json_body["total"])
+        self.assertEqual(0, len(channel.json_body["media"]))
+
+    def test_get_media(self):
+        """
+        Tests that a normal lookup for media is successfully
+        """
+
+        number_media = 5
+        other_user_tok = self.login("user", "pass")
+        self._create_media(other_user_tok, number_media)
+
+        request, channel = self.make_request(
+            "GET", self.url, access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(number_media, channel.json_body["total"])
+        self.assertEqual(number_media, len(channel.json_body["media"]))
+        self.assertNotIn("next_token", channel.json_body)
+        self._check_fields(channel.json_body["media"])
+
+    def _create_media(self, user_token, number_media):
+        """
+        Create a number of media for a specific user
+        """
+        upload_resource = self.media_repo.children[b"upload"]
+        for i in range(number_media):
+            # file size is 67 Byte
+            image_data = unhexlify(
+                b"89504e470d0a1a0a0000000d4948445200000001000000010806"
+                b"0000001f15c4890000000a49444154789c63000100000500010d"
+                b"0a2db40000000049454e44ae426082"
+            )
+
+            # Upload some media into the room
+            self.helper.upload_media(
+                upload_resource, image_data, tok=user_token, expect_code=200
+            )
+
+    def _check_fields(self, content):
+        """Checks that all attributes are present in content
+        """
+        for m in content:
+            self.assertIn("media_id", m)
+            self.assertIn("media_type", m)
+            self.assertIn("media_length", m)
+            self.assertIn("upload_name", m)
+            self.assertIn("created_ts", m)
+            self.assertIn("last_access_ts", m)
+            self.assertIn("quarantined_by", m)
+            self.assertIn("safe_from_quarantine", m)