summary refs log tree commit diff
diff options
context:
space:
mode:
authorDirk Klimpel <5740567+dklimpel@users.noreply.github.com>2021-02-22 20:38:51 +0100
committerGitHub <noreply@github.com>2021-02-22 14:38:51 -0500
commit71c9f8de6d9fb5323c8625167f716e1ec381cb87 (patch)
tree7e2ac29585f6ab44a490ab27ddf5b9df85a83ca0
parentexample systemd config: propagate reloads to units (#9463) (diff)
downloadsynapse-71c9f8de6d9fb5323c8625167f716e1ec381cb87.tar.xz
Add an `order_by` field to list users' media admin API. (#8978)
-rw-r--r--changelog.d/8978.feature1
-rw-r--r--docs/admin_api/user_admin_api.rst38
-rw-r--r--synapse/rest/admin/users.py28
-rw-r--r--synapse/storage/databases/main/media_repository.py41
-rw-r--r--tests/rest/admin/test_user.py246
5 files changed, 325 insertions, 29 deletions
diff --git a/changelog.d/8978.feature b/changelog.d/8978.feature
new file mode 100644
index 0000000000..042e257bf0
--- /dev/null
+++ b/changelog.d/8978.feature
@@ -0,0 +1 @@
+Add `order_by` to the admin API `GET /_synapse/admin/v1/users/<user_id>/media`. Contributed by @dklimpel.
\ No newline at end of file
diff --git a/docs/admin_api/user_admin_api.rst b/docs/admin_api/user_admin_api.rst
index 33dfbcfb49..8d4ec5a6f9 100644
--- a/docs/admin_api/user_admin_api.rst
+++ b/docs/admin_api/user_admin_api.rst
@@ -379,11 +379,12 @@ The following fields are returned in the JSON response body:
 - ``total`` - Number of rooms.
 
 
-List media of an user
-================================
+List media of a user
+====================
 Gets a list of all local media that a specific ``user_id`` has created.
-The response is ordered by creation date descending and media ID descending.
-The newest media is on top.
+By default, the response is ordered by descending creation date and ascending media ID.
+The newest media is on top. You can change the order with parameters
+``order_by`` and ``dir``.
 
 The API is::
 
@@ -440,6 +441,35 @@ The following parameters should be set in the URL:
   denoting the offset in the returned results. This should be treated as an opaque value and
   not explicitly set to anything other than the return value of ``next_token`` from a previous call.
   Defaults to ``0``.
+- ``order_by`` - The method by which to sort the returned list of media.
+  If the ordered field has duplicates, the second order is always by ascending ``media_id``,
+  which guarantees a stable ordering. Valid values are:
+
+  - ``media_id`` - Media are ordered alphabetically by ``media_id``.
+  - ``upload_name`` - Media are ordered alphabetically by name the media was uploaded with.
+  - ``created_ts`` - Media are ordered by when the content was uploaded in ms.
+    Smallest to largest. This is the default.
+  - ``last_access_ts`` - Media are ordered by when the content was last accessed in ms.
+    Smallest to largest.
+  - ``media_length`` - Media are ordered by length of the media in bytes.
+    Smallest to largest.
+  - ``media_type`` - Media are ordered alphabetically by MIME-type.
+  - ``quarantined_by`` - Media are ordered alphabetically by the user ID that
+    initiated the quarantine request for this media.
+  - ``safe_from_quarantine`` - Media are ordered by the status if this media is safe
+    from quarantining.
+
+- ``dir`` - Direction of media order. Either ``f`` for forwards or ``b`` for backwards.
+  Setting this value to ``b`` will reverse the above sort order. Defaults to ``f``.
+
+If neither ``order_by`` nor ``dir`` is set, the default order is newest media on top
+(corresponds to ``order_by`` = ``created_ts`` and ``dir`` = ``b``).
+
+Caution. The database only has indexes on the columns ``media_id``,
+``user_id`` and ``created_ts``. This means that if a different sort order is used
+(``upload_name``, ``last_access_ts``, ``media_length``, ``media_type``,
+``quarantined_by`` or ``safe_from_quarantine``), this can cause a large load on the
+database, especially for large environments.
 
 **Response**
 
diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 998a0ef671..9c701c7348 100644
--- a/synapse/rest/admin/users.py
+++ b/synapse/rest/admin/users.py
@@ -35,6 +35,7 @@ from synapse.rest.admin._base import (
     assert_user_is_admin,
 )
 from synapse.rest.client.v2_alpha._base import client_patterns
+from synapse.storage.databases.main.media_repository import MediaSortOrder
 from synapse.types import JsonDict, UserID
 
 if TYPE_CHECKING:
@@ -832,8 +833,33 @@ class UserMediaRestServlet(RestServlet):
                 errcode=Codes.INVALID_PARAM,
             )
 
+        # If neither `order_by` nor `dir` is set, set the default order
+        # to newest media is on top for backward compatibility.
+        if b"order_by" not in request.args and b"dir" not in request.args:
+            order_by = MediaSortOrder.CREATED_TS.value
+            direction = "b"
+        else:
+            order_by = parse_string(
+                request,
+                "order_by",
+                default=MediaSortOrder.CREATED_TS.value,
+                allowed_values=(
+                    MediaSortOrder.MEDIA_ID.value,
+                    MediaSortOrder.UPLOAD_NAME.value,
+                    MediaSortOrder.CREATED_TS.value,
+                    MediaSortOrder.LAST_ACCESS_TS.value,
+                    MediaSortOrder.MEDIA_LENGTH.value,
+                    MediaSortOrder.MEDIA_TYPE.value,
+                    MediaSortOrder.QUARANTINED_BY.value,
+                    MediaSortOrder.SAFE_FROM_QUARANTINE.value,
+                ),
+            )
+            direction = parse_string(
+                request, "dir", default="f", allowed_values=("f", "b")
+            )
+
         media, total = await self.store.get_local_media_by_user_paginate(
-            start, limit, user_id
+            start, limit, user_id, order_by, direction
         )
 
         ret = {"media": media, "total": total}
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index 9ee642c668..274f8de595 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from enum import Enum
 from typing import Any, Dict, Iterable, List, Optional, Tuple
 
 from synapse.storage._base import SQLBaseStore
@@ -23,6 +24,22 @@ BG_UPDATE_REMOVE_MEDIA_REPO_INDEX_WITHOUT_METHOD = (
 )
 
 
+class MediaSortOrder(Enum):
+    """
+    Enum to define the sorting method used when returning media with
+    get_local_media_by_user_paginate
+    """
+
+    MEDIA_ID = "media_id"
+    UPLOAD_NAME = "upload_name"
+    CREATED_TS = "created_ts"
+    LAST_ACCESS_TS = "last_access_ts"
+    MEDIA_LENGTH = "media_length"
+    MEDIA_TYPE = "media_type"
+    QUARANTINED_BY = "quarantined_by"
+    SAFE_FROM_QUARANTINE = "safe_from_quarantine"
+
+
 class MediaRepositoryBackgroundUpdateStore(SQLBaseStore):
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
@@ -118,7 +135,12 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
         )
 
     async def get_local_media_by_user_paginate(
-        self, start: int, limit: int, user_id: str
+        self,
+        start: int,
+        limit: int,
+        user_id: str,
+        order_by: MediaSortOrder = MediaSortOrder.CREATED_TS.value,
+        direction: str = "f",
     ) -> Tuple[List[Dict[str, Any]], int]:
         """Get a paginated list of metadata for a local piece of media
         which an user_id has uploaded
@@ -127,6 +149,8 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             start: offset in the list
             limit: maximum amount of media_ids to retrieve
             user_id: fully-qualified user id
+            order_by: the sort order of the returned list
+            direction: sort ascending or descending
         Returns:
             A paginated list of all metadata of user's media,
             plus the total count of all the user's media
@@ -134,6 +158,14 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
 
         def get_local_media_by_user_paginate_txn(txn):
 
+            # Set ordering
+            order_by_column = MediaSortOrder(order_by).value
+
+            if direction == "b":
+                order = "DESC"
+            else:
+                order = "ASC"
+
             args = [user_id]
             sql = """
                 SELECT COUNT(*) as total_media
@@ -155,9 +187,12 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
                     "safe_from_quarantine"
                 FROM local_media_repository
                 WHERE user_id = ?
-                ORDER BY created_ts DESC, media_id DESC
+                ORDER BY {order_by_column} {order}, media_id ASC
                 LIMIT ? OFFSET ?
-            """
+            """.format(
+                order_by_column=order_by_column,
+                order=order,
+            )
 
             args += [limit, start]
             txn.execute(sql, args)
diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py
index ba26895391..e58d5cf0db 100644
--- a/tests/rest/admin/test_user.py
+++ b/tests/rest/admin/test_user.py
@@ -18,7 +18,7 @@ import hmac
 import json
 import urllib.parse
 from binascii import unhexlify
-from typing import Optional
+from typing import List, Optional
 
 from mock import Mock
 
@@ -31,6 +31,7 @@ from synapse.rest.client.v2_alpha import devices, sync
 from synapse.types import JsonDict
 
 from tests import unittest
+from tests.server import FakeSite, make_request
 from tests.test_utils import make_awaitable
 from tests.unittest import override_config
 
@@ -1954,6 +1955,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
     ]
 
     def prepare(self, reactor, clock, hs):
+        self.store = hs.get_datastore()
         self.media_repo = hs.get_media_repository_resource()
 
         self.admin_user = self.register_user("admin", "pass", admin=True)
@@ -2024,7 +2026,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
 
         number_media = 20
         other_user_tok = self.login("user", "pass")
-        self._create_media(other_user_tok, number_media)
+        self._create_media_for_user(other_user_tok, number_media)
 
         channel = self.make_request(
             "GET",
@@ -2045,7 +2047,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
 
         number_media = 20
         other_user_tok = self.login("user", "pass")
-        self._create_media(other_user_tok, number_media)
+        self._create_media_for_user(other_user_tok, number_media)
 
         channel = self.make_request(
             "GET",
@@ -2066,7 +2068,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
 
         number_media = 20
         other_user_tok = self.login("user", "pass")
-        self._create_media(other_user_tok, number_media)
+        self._create_media_for_user(other_user_tok, number_media)
 
         channel = self.make_request(
             "GET",
@@ -2080,11 +2082,31 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
         self.assertEqual(len(channel.json_body["media"]), 10)
         self._check_fields(channel.json_body["media"])
 
-    def test_limit_is_negative(self):
+    def test_invalid_parameter(self):
         """
-        Testing that a negative limit parameter returns a 400
+        If parameters are invalid, an error is returned.
         """
+        # unkown order_by
+        channel = self.make_request(
+            "GET",
+            self.url + "?order_by=bar",
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"])
 
+        # invalid search order
+        channel = self.make_request(
+            "GET",
+            self.url + "?dir=bar",
+            access_token=self.admin_user_tok,
+        )
+
+        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"])
+
+        # negative limit
         channel = self.make_request(
             "GET",
             self.url + "?limit=-5",
@@ -2094,11 +2116,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
         self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
         self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
 
-    def test_from_is_negative(self):
-        """
-        Testing that a negative from parameter returns a 400
-        """
-
+        # negative from
         channel = self.make_request(
             "GET",
             self.url + "?from=-5",
@@ -2115,7 +2133,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
 
         number_media = 20
         other_user_tok = self.login("user", "pass")
-        self._create_media(other_user_tok, number_media)
+        self._create_media_for_user(other_user_tok, number_media)
 
         #  `next_token` does not appear
         # Number of results is the number of entries
@@ -2193,7 +2211,7 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
 
         number_media = 5
         other_user_tok = self.login("user", "pass")
-        self._create_media(other_user_tok, number_media)
+        self._create_media_for_user(other_user_tok, number_media)
 
         channel = self.make_request(
             "GET",
@@ -2207,11 +2225,118 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
         self.assertNotIn("next_token", channel.json_body)
         self._check_fields(channel.json_body["media"])
 
-    def _create_media(self, user_token, number_media):
+    def test_order_by(self):
+        """
+        Testing order list with parameter `order_by`
+        """
+
+        other_user_tok = self.login("user", "pass")
+
+        # Resolution: 1×1, MIME type: image/png, Extension: png, Size: 67 B
+        image_data1 = unhexlify(
+            b"89504e470d0a1a0a0000000d4948445200000001000000010806"
+            b"0000001f15c4890000000a49444154789c63000100000500010d"
+            b"0a2db40000000049454e44ae426082"
+        )
+        # Resolution: 1×1, MIME type: image/gif, Extension: gif, Size: 35 B
+        image_data2 = unhexlify(
+            b"47494638376101000100800100000000"
+            b"ffffff2c00000000010001000002024c"
+            b"01003b"
+        )
+        # Resolution: 1×1, MIME type: image/bmp, Extension: bmp, Size: 54 B
+        image_data3 = unhexlify(
+            b"424d3a0000000000000036000000280000000100000001000000"
+            b"0100180000000000040000000000000000000000000000000000"
+            b"0000"
+        )
+
+        # create media and make sure they do not have the same timestamp
+        media1 = self._create_media_and_access(other_user_tok, image_data1, "image.png")
+        self.pump(1.0)
+        media2 = self._create_media_and_access(other_user_tok, image_data2, "image.gif")
+        self.pump(1.0)
+        media3 = self._create_media_and_access(other_user_tok, image_data3, "image.bmp")
+        self.pump(1.0)
+
+        # Mark one media as safe from quarantine.
+        self.get_success(self.store.mark_local_media_as_safe(media2))
+        # Quarantine one media
+        self.get_success(
+            self.store.quarantine_media_by_id("test", media3, self.admin_user)
+        )
+
+        # order by default ("created_ts")
+        # default is backwards
+        self._order_test([media3, media2, media1], None)
+        self._order_test([media1, media2, media3], None, "f")
+        self._order_test([media3, media2, media1], None, "b")
+
+        # sort by media_id
+        sorted_media = sorted([media1, media2, media3], reverse=False)
+        sorted_media_reverse = sorted(sorted_media, reverse=True)
+
+        # order by media_id
+        self._order_test(sorted_media, "media_id")
+        self._order_test(sorted_media, "media_id", "f")
+        self._order_test(sorted_media_reverse, "media_id", "b")
+
+        # order by upload_name
+        self._order_test([media3, media2, media1], "upload_name")
+        self._order_test([media3, media2, media1], "upload_name", "f")
+        self._order_test([media1, media2, media3], "upload_name", "b")
+
+        # order by media_type
+        # result is ordered by media_id
+        # because of uploaded media_type is always 'application/json'
+        self._order_test(sorted_media, "media_type")
+        self._order_test(sorted_media, "media_type", "f")
+        self._order_test(sorted_media, "media_type", "b")
+
+        # order by media_length
+        self._order_test([media2, media3, media1], "media_length")
+        self._order_test([media2, media3, media1], "media_length", "f")
+        self._order_test([media1, media3, media2], "media_length", "b")
+
+        # order by created_ts
+        self._order_test([media1, media2, media3], "created_ts")
+        self._order_test([media1, media2, media3], "created_ts", "f")
+        self._order_test([media3, media2, media1], "created_ts", "b")
+
+        # order by last_access_ts
+        self._order_test([media1, media2, media3], "last_access_ts")
+        self._order_test([media1, media2, media3], "last_access_ts", "f")
+        self._order_test([media3, media2, media1], "last_access_ts", "b")
+
+        # order by quarantined_by
+        # one media is in quarantine, others are ordered by media_ids
+
+        # Different sort order of SQlite and PostreSQL
+        # If a media is not in quarantine `quarantined_by` is NULL
+        # SQLite considers NULL to be smaller than any other value.
+        # PostreSQL considers NULL to be larger than any other value.
+
+        # self._order_test(sorted([media1, media2]) + [media3], "quarantined_by")
+        # self._order_test(sorted([media1, media2]) + [media3], "quarantined_by", "f")
+        # self._order_test([media3] + sorted([media1, media2]), "quarantined_by", "b")
+
+        # order by safe_from_quarantine
+        # one media is safe from quarantine, others are ordered by media_ids
+        self._order_test(sorted([media1, media3]) + [media2], "safe_from_quarantine")
+        self._order_test(
+            sorted([media1, media3]) + [media2], "safe_from_quarantine", "f"
+        )
+        self._order_test(
+            [media2] + sorted([media1, media3]), "safe_from_quarantine", "b"
+        )
+
+    def _create_media_for_user(self, user_token: str, number_media: int):
         """
         Create a number of media for a specific user
+        Args:
+            user_token: Access token of the user
+            number_media: Number of media to be created for the user
         """
-        upload_resource = self.media_repo.children[b"upload"]
         for i in range(number_media):
             # file size is 67 Byte
             image_data = unhexlify(
@@ -2220,13 +2345,60 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
                 b"0a2db40000000049454e44ae426082"
             )
 
-            # Upload some media into the room
-            self.helper.upload_media(
-                upload_resource, image_data, tok=user_token, expect_code=200
-            )
+            self._create_media_and_access(user_token, image_data)
+
+    def _create_media_and_access(
+        self,
+        user_token: str,
+        image_data: bytes,
+        filename: str = "image1.png",
+    ) -> str:
+        """
+        Create one media for a specific user, access and returns `media_id`
+        Args:
+            user_token: Access token of the user
+            image_data: binary data of image
+            filename: The filename of the media to be uploaded
+        Returns:
+            The ID of the newly created media.
+        """
+        upload_resource = self.media_repo.children[b"upload"]
+        download_resource = self.media_repo.children[b"download"]
+
+        # Upload some media into the room
+        response = self.helper.upload_media(
+            upload_resource, image_data, user_token, filename, expect_code=200
+        )
+
+        # Extract media ID from the response
+        server_and_media_id = response["content_uri"][6:]  # Cut off 'mxc://'
+        media_id = server_and_media_id.split("/")[1]
+
+        # Try to access a media and to create `last_access_ts`
+        channel = make_request(
+            self.reactor,
+            FakeSite(download_resource),
+            "GET",
+            server_and_media_id,
+            shorthand=False,
+            access_token=user_token,
+        )
+
+        self.assertEqual(
+            200,
+            channel.code,
+            msg=(
+                "Expected to receive a 200 on accessing media: %s" % server_and_media_id
+            ),
+        )
 
-    def _check_fields(self, content):
-        """Checks that all attributes are present in content"""
+        return media_id
+
+    def _check_fields(self, content: JsonDict):
+        """Checks that the expected user attributes are present in content
+        Args:
+            content: List that is checked for content
+        """
         for m in content:
             self.assertIn("media_id", m)
             self.assertIn("media_type", m)
@@ -2237,6 +2409,38 @@ class UserMediaRestTestCase(unittest.HomeserverTestCase):
             self.assertIn("quarantined_by", m)
             self.assertIn("safe_from_quarantine", m)
 
+    def _order_test(
+        self,
+        expected_media_list: List[str],
+        order_by: Optional[str],
+        dir: Optional[str] = None,
+    ):
+        """Request the list of media in a certain order. Assert that order is what
+        we expect
+        Args:
+            expected_media_list: The list of media_ids in the order we expect to get
+                back from the server
+            order_by: The type of ordering to give the server
+            dir: The direction of ordering to give the server
+        """
+
+        url = self.url + "?"
+        if order_by is not None:
+            url += "order_by=%s&" % (order_by,)
+        if dir is not None and dir in ("b", "f"):
+            url += "dir=%s" % (dir,)
+        channel = self.make_request(
+            "GET",
+            url.encode("ascii"),
+            access_token=self.admin_user_tok,
+        )
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(channel.json_body["total"], len(expected_media_list))
+
+        returned_order = [row["media_id"] for row in channel.json_body["media"]]
+        self.assertEqual(expected_media_list, returned_order)
+        self._check_fields(channel.json_body["media"])
+
 
 class UserTokenRestTestCase(unittest.HomeserverTestCase):
     """Test for /_synapse/admin/v1/users/<user>/login"""