summary refs log tree commit diff
diff options
context:
space:
mode:
authorDirk Klimpel <5740567+dklimpel@users.noreply.github.com>2020-10-26 18:02:28 +0100
committerGitHub <noreply@github.com>2020-10-26 17:02:28 +0000
commit49d72dea2a6804e4795fb9e1cbc1f1bb1354f08f (patch)
treec39cbcd086b1a9beb9ced705f6d07ae17345aca0
parentFix filepath of Dex example config (#8657) (diff)
downloadsynapse-49d72dea2a6804e4795fb9e1cbc1f1bb1354f08f.tar.xz
Add an admin api to delete local media. (#8519)
Related to: #6459, #3479

Add `DELETE /_synapse/admin/v1/media/<server_name>/<media_id>` to delete
a single file from server.
Diffstat (limited to '')
-rw-r--r--changelog.d/8519.feature1
-rw-r--r--docs/admin_api/media_admin_api.md79
-rw-r--r--synapse/rest/admin/media.py81
-rw-r--r--synapse/rest/media/v1/filepath.py17
-rw-r--r--synapse/rest/media/v1/media_repository.py72
-rw-r--r--synapse/storage/databases/main/media_repository.py53
-rw-r--r--tests/rest/admin/test_media.py568
7 files changed, 868 insertions, 3 deletions
diff --git a/changelog.d/8519.feature b/changelog.d/8519.feature
new file mode 100644
index 0000000000..e2ab548681
--- /dev/null
+++ b/changelog.d/8519.feature
@@ -0,0 +1 @@
+Add an admin api to delete a single file or files were not used for a defined time from server. Contributed by @dklimpel.
\ No newline at end of file
diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md
index 26948770d8..3994e1f1a9 100644
--- a/docs/admin_api/media_admin_api.md
+++ b/docs/admin_api/media_admin_api.md
@@ -100,3 +100,82 @@ Response:
   "num_quarantined": 10  # The number of media items successfully quarantined
 }
 ```
+
+# Delete local media
+This API deletes the *local* media from the disk of your own server.
+This includes any local thumbnails and copies of media downloaded from
+remote homeservers.
+This API will not affect media that has been uploaded to external
+media repositories (e.g https://github.com/turt2live/matrix-media-repo/).
+See also [purge_remote_media.rst](purge_remote_media.rst).
+
+## Delete a specific local media
+Delete a specific `media_id`.
+
+Request:
+
+```
+DELETE /_synapse/admin/v1/media/<server_name>/<media_id>
+
+{}
+```
+
+URL Parameters
+
+* `server_name`: string - The name of your local server (e.g `matrix.org`)
+* `media_id`: string - The ID of the media (e.g `abcdefghijklmnopqrstuvwx`)
+
+Response:
+
+```json
+    {
+       "deleted_media": [
+          "abcdefghijklmnopqrstuvwx"
+       ],
+       "total": 1
+    }
+```
+
+The following fields are returned in the JSON response body:
+
+* `deleted_media`: an array of strings - List of deleted `media_id`
+* `total`: integer - Total number of deleted `media_id`
+
+## Delete local media by date or size
+
+Request:
+
+```
+POST /_synapse/admin/v1/media/<server_name>/delete?before_ts=<before_ts>
+
+{}
+```
+
+URL Parameters
+
+* `server_name`: string - The name of your local server (e.g `matrix.org`).
+* `before_ts`: string representing a positive integer - Unix timestamp in ms.
+Files that were last used before this timestamp will be deleted. It is the timestamp of
+last access and not the timestamp creation. 
+* `size_gt`: Optional - string representing a positive integer - Size of the media in bytes.
+Files that are larger will be deleted. Defaults to `0`.
+* `keep_profiles`: Optional - string representing a boolean - Switch to also delete files
+that are still used in image data (e.g user profile, room avatar).
+If `false` these files will be deleted. Defaults to `true`.
+
+Response:
+
+```json
+    {
+       "deleted_media": [
+          "abcdefghijklmnopqrstuvwx",
+          "abcdefghijklmnopqrstuvwz"
+       ],
+       "total": 2
+    }
+```
+
+The following fields are returned in the JSON response body:
+
+* `deleted_media`: an array of strings - List of deleted `media_id`
+* `total`: integer - Total number of deleted `media_id`
diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py
index ee75095c0e..ba50cb876d 100644
--- a/synapse/rest/admin/media.py
+++ b/synapse/rest/admin/media.py
@@ -16,9 +16,10 @@
 
 import logging
 
-from synapse.api.errors import AuthError
-from synapse.http.servlet import RestServlet, parse_integer
+from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
+from synapse.http.servlet import RestServlet, parse_boolean, parse_integer
 from synapse.rest.admin._base import (
+    admin_patterns,
     assert_requester_is_admin,
     assert_user_is_admin,
     historical_admin_path_patterns,
@@ -150,6 +151,80 @@ class PurgeMediaCacheRestServlet(RestServlet):
         return 200, ret
 
 
+class DeleteMediaByID(RestServlet):
+    """Delete local media by a given ID. Removes it from this server.
+    """
+
+    PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)")
+
+    def __init__(self, hs):
+        self.store = hs.get_datastore()
+        self.auth = hs.get_auth()
+        self.server_name = hs.hostname
+        self.media_repository = hs.get_media_repository()
+
+    async def on_DELETE(self, request, server_name: str, media_id: str):
+        await assert_requester_is_admin(self.auth, request)
+
+        if self.server_name != server_name:
+            raise SynapseError(400, "Can only delete local media")
+
+        if await self.store.get_local_media(media_id) is None:
+            raise NotFoundError("Unknown media")
+
+        logging.info("Deleting local media by ID: %s", media_id)
+
+        deleted_media, total = await self.media_repository.delete_local_media(media_id)
+        return 200, {"deleted_media": deleted_media, "total": total}
+
+
+class DeleteMediaByDateSize(RestServlet):
+    """Delete local media and local copies of remote media by
+    timestamp and size.
+    """
+
+    PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/delete")
+
+    def __init__(self, hs):
+        self.store = hs.get_datastore()
+        self.auth = hs.get_auth()
+        self.server_name = hs.hostname
+        self.media_repository = hs.get_media_repository()
+
+    async def on_POST(self, request, server_name: str):
+        await assert_requester_is_admin(self.auth, request)
+
+        before_ts = parse_integer(request, "before_ts", required=True)
+        size_gt = parse_integer(request, "size_gt", default=0)
+        keep_profiles = parse_boolean(request, "keep_profiles", default=True)
+
+        if before_ts < 0:
+            raise SynapseError(
+                400,
+                "Query parameter before_ts must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+        if size_gt < 0:
+            raise SynapseError(
+                400,
+                "Query parameter size_gt must be a string representing a positive integer.",
+                errcode=Codes.INVALID_PARAM,
+            )
+
+        if self.server_name != server_name:
+            raise SynapseError(400, "Can only delete local media")
+
+        logging.info(
+            "Deleting local media by timestamp: %s, size larger than: %s, keep profile media: %s"
+            % (before_ts, size_gt, keep_profiles)
+        )
+
+        deleted_media, total = await self.media_repository.delete_old_local_media(
+            before_ts, size_gt, keep_profiles
+        )
+        return 200, {"deleted_media": deleted_media, "total": total}
+
+
 def register_servlets_for_media_repo(hs, http_server):
     """
     Media repo specific APIs.
@@ -159,3 +234,5 @@ def register_servlets_for_media_repo(hs, http_server):
     QuarantineMediaByID(hs).register(http_server)
     QuarantineMediaByUser(hs).register(http_server)
     ListMediaInRoom(hs).register(http_server)
+    DeleteMediaByID(hs).register(http_server)
+    DeleteMediaByDateSize(hs).register(http_server)
diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py
index 7447eeaebe..9e079f672f 100644
--- a/synapse/rest/media/v1/filepath.py
+++ b/synapse/rest/media/v1/filepath.py
@@ -69,6 +69,23 @@ class MediaFilePaths:
 
     local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)
 
+    def local_media_thumbnail_dir(self, media_id: str) -> str:
+        """
+        Retrieve the local store path of thumbnails of a given media_id
+
+        Args:
+            media_id: The media ID to query.
+        Returns:
+            Path of local_thumbnails from media_id
+        """
+        return os.path.join(
+            self.base_path,
+            "local_thumbnails",
+            media_id[0:2],
+            media_id[2:4],
+            media_id[4:],
+        )
+
     def remote_media_filepath_rel(self, server_name, file_id):
         return os.path.join(
             "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:]
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index e1192b47cd..5cce7237a0 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -18,7 +18,7 @@ import errno
 import logging
 import os
 import shutil
-from typing import IO, Dict, Optional, Tuple
+from typing import IO, Dict, List, Optional, Tuple
 
 import twisted.internet.error
 import twisted.web.http
@@ -767,6 +767,76 @@ class MediaRepository:
 
         return {"deleted": deleted}
 
+    async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]:
+        """
+        Delete the given local or remote media ID from this server
+
+        Args:
+            media_id: The media ID to delete.
+        Returns:
+            A tuple of (list of deleted media IDs, total deleted media IDs).
+        """
+        return await self._remove_local_media_from_disk([media_id])
+
+    async def delete_old_local_media(
+        self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True,
+    ) -> Tuple[List[str], int]:
+        """
+        Delete local or remote media from this server by size and timestamp. Removes
+        media files, any thumbnails and cached URLs.
+
+        Args:
+            before_ts: Unix timestamp in ms.
+                       Files that were last used before this timestamp will be deleted
+            size_gt: Size of the media in bytes. Files that are larger will be deleted
+            keep_profiles: Switch to delete also files that are still used in image data
+                           (e.g user profile, room avatar)
+                           If false these files will be deleted
+        Returns:
+            A tuple of (list of deleted media IDs, total deleted media IDs).
+        """
+        old_media = await self.store.get_local_media_before(
+            before_ts, size_gt, keep_profiles,
+        )
+        return await self._remove_local_media_from_disk(old_media)
+
+    async def _remove_local_media_from_disk(
+        self, media_ids: List[str]
+    ) -> Tuple[List[str], int]:
+        """
+        Delete local or remote media from this server. Removes media files,
+        any thumbnails and cached URLs.
+
+        Args:
+            media_ids: List of media_id to delete
+        Returns:
+            A tuple of (list of deleted media IDs, total deleted media IDs).
+        """
+        removed_media = []
+        for media_id in media_ids:
+            logger.info("Deleting media with ID '%s'", media_id)
+            full_path = self.filepaths.local_media_filepath(media_id)
+            try:
+                os.remove(full_path)
+            except OSError as e:
+                logger.warning("Failed to remove file: %r: %s", full_path, e)
+                if e.errno == errno.ENOENT:
+                    pass
+                else:
+                    continue
+
+            thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id)
+            shutil.rmtree(thumbnail_dir, ignore_errors=True)
+
+            await self.store.delete_remote_media(self.server_name, media_id)
+
+            await self.store.delete_url_cache((media_id,))
+            await self.store.delete_url_cache_media((media_id,))
+
+            removed_media.append(media_id)
+
+        return removed_media, len(removed_media)
+
 
 class MediaRepositoryResource(Resource):
     """File uploading and downloading.
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index cc538c5c10..7ef5f1bf2b 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -93,6 +93,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
 
     def __init__(self, database: DatabasePool, db_conn, hs):
         super().__init__(database, db_conn, hs)
+        self.server_name = hs.hostname
 
     async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]:
         """Get the metadata for a local piece of media
@@ -115,6 +116,58 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             desc="get_local_media",
         )
 
+    async def get_local_media_before(
+        self, before_ts: int, size_gt: int, keep_profiles: bool,
+    ) -> Optional[List[str]]:
+
+        # to find files that have never been accessed (last_access_ts IS NULL)
+        # compare with `created_ts`
+        sql = """
+            SELECT media_id
+            FROM local_media_repository AS lmr
+            WHERE
+                ( last_access_ts < ?
+                OR ( created_ts < ? AND last_access_ts IS NULL ) )
+                AND media_length > ?
+        """
+
+        if keep_profiles:
+            sql_keep = """
+                AND (
+                    NOT EXISTS
+                        (SELECT 1
+                         FROM profiles
+                         WHERE profiles.avatar_url = '{media_prefix}' || lmr.media_id)
+                    AND NOT EXISTS
+                        (SELECT 1
+                         FROM groups
+                         WHERE groups.avatar_url = '{media_prefix}' || lmr.media_id)
+                    AND NOT EXISTS
+                        (SELECT 1
+                         FROM room_memberships
+                         WHERE room_memberships.avatar_url = '{media_prefix}' || lmr.media_id)
+                    AND NOT EXISTS
+                        (SELECT 1
+                         FROM user_directory
+                         WHERE user_directory.avatar_url = '{media_prefix}' || lmr.media_id)
+                    AND NOT EXISTS
+                        (SELECT 1
+                         FROM room_stats_state
+                         WHERE room_stats_state.avatar = '{media_prefix}' || lmr.media_id)
+                )
+            """.format(
+                media_prefix="mxc://%s/" % (self.server_name,),
+            )
+            sql += sql_keep
+
+        def _get_local_media_before_txn(txn):
+            txn.execute(sql, (before_ts, before_ts, size_gt))
+            return [row[0] for row in txn]
+
+        return await self.db_pool.runInteraction(
+            "get_local_media_before", _get_local_media_before_txn
+        )
+
     async def store_local_media(
         self,
         media_id,
diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py
new file mode 100644
index 0000000000..721fa1ed51
--- /dev/null
+++ b/tests/rest/admin/test_media.py
@@ -0,0 +1,568 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Dirk Klimpel
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+from binascii import unhexlify
+
+import synapse.rest.admin
+from synapse.api.errors import Codes
+from synapse.rest.client.v1 import login, profile, room
+from synapse.rest.media.v1.filepath import MediaFilePaths
+
+from tests import unittest
+
+
+class DeleteMediaByIDTestCase(unittest.HomeserverTestCase):
+
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        synapse.rest.admin.register_servlets_for_media_repo,
+        login.register_servlets,
+    ]
+
+    def prepare(self, reactor, clock, hs):
+        self.handler = hs.get_device_handler()
+        self.media_repo = hs.get_media_repository_resource()
+        self.server_name = hs.hostname
+
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.filepaths = MediaFilePaths(hs.config.media_store_path)
+
+    def test_no_auth(self):
+        """
+        Try to delete media without authentication.
+        """
+        url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345")
+
+        request, channel = self.make_request("DELETE", url, b"{}")
+        self.render(request)
+
+        self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
+
+    def test_requester_is_no_admin(self):
+        """
+        If the user is not a server admin, an error is returned.
+        """
+        self.other_user = self.register_user("user", "pass")
+        self.other_user_token = self.login("user", "pass")
+
+        url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345")
+
+        request, channel = self.make_request(
+            "DELETE", url, access_token=self.other_user_token,
+        )
+        self.render(request)
+
+        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
+
+    def test_media_does_not_exist(self):
+        """
+        Tests that a lookup for a media that does not exist returns a 404
+        """
+        url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345")
+
+        request, channel = self.make_request(
+            "DELETE", url, access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(404, channel.code, msg=channel.json_body)
+        self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"])
+
+    def test_media_is_not_local(self):
+        """
+        Tests that a lookup for a media that is not a local returns a 400
+        """
+        url = "/_synapse/admin/v1/media/%s/%s" % ("unknown_domain", "12345")
+
+        request, channel = self.make_request(
+            "DELETE", url, access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, channel.code, msg=channel.json_body)
+        self.assertEqual("Can only delete local media", channel.json_body["error"])
+
+    def test_delete_media(self):
+        """
+        Tests that delete a media is successfully
+        """
+
+        download_resource = self.media_repo.children[b"download"]
+        upload_resource = self.media_repo.children[b"upload"]
+        image_data = unhexlify(
+            b"89504e470d0a1a0a0000000d4948445200000001000000010806"
+            b"0000001f15c4890000000a49444154789c63000100000500010d"
+            b"0a2db40000000049454e44ae426082"
+        )
+
+        # Upload some media into the room
+        response = self.helper.upload_media(
+            upload_resource, image_data, tok=self.admin_user_tok, expect_code=200
+        )
+        # Extract media ID from the response
+        server_and_media_id = response["content_uri"][6:]  # Cut off 'mxc://'
+        server_name, media_id = server_and_media_id.split("/")
+
+        self.assertEqual(server_name, self.server_name)
+
+        # Attempt to access media
+        request, channel = self.make_request(
+            "GET",
+            server_and_media_id,
+            shorthand=False,
+            access_token=self.admin_user_tok,
+        )
+        request.render(download_resource)
+        self.pump(1.0)
+
+        # Should be successful
+        self.assertEqual(
+            200,
+            channel.code,
+            msg=(
+                "Expected to receive a 200 on accessing media: %s" % server_and_media_id
+            ),
+        )
+
+        # Test if the file exists
+        local_path = self.filepaths.local_media_filepath(media_id)
+        self.assertTrue(os.path.exists(local_path))
+
+        url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, media_id)
+
+        # Delete media
+        request, channel = self.make_request(
+            "DELETE", url, access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(1, channel.json_body["total"])
+        self.assertEqual(
+            media_id, channel.json_body["deleted_media"][0],
+        )
+
+        # Attempt to access media
+        request, channel = self.make_request(
+            "GET",
+            server_and_media_id,
+            shorthand=False,
+            access_token=self.admin_user_tok,
+        )
+        request.render(download_resource)
+        self.pump(1.0)
+        self.assertEqual(
+            404,
+            channel.code,
+            msg=(
+                "Expected to receive a 404 on accessing deleted media: %s"
+                % server_and_media_id
+            ),
+        )
+
+        # Test if the file is deleted
+        self.assertFalse(os.path.exists(local_path))
+
+
+class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase):
+
+    servlets = [
+        synapse.rest.admin.register_servlets,
+        synapse.rest.admin.register_servlets_for_media_repo,
+        login.register_servlets,
+        profile.register_servlets,
+        room.register_servlets,
+    ]
+
+    def prepare(self, reactor, clock, hs):
+        self.handler = hs.get_device_handler()
+        self.media_repo = hs.get_media_repository_resource()
+        self.server_name = hs.hostname
+        self.clock = hs.clock
+
+        self.admin_user = self.register_user("admin", "pass", admin=True)
+        self.admin_user_tok = self.login("admin", "pass")
+
+        self.filepaths = MediaFilePaths(hs.config.media_store_path)
+        self.url = "/_synapse/admin/v1/media/%s/delete" % self.server_name
+
+    def test_no_auth(self):
+        """
+        Try to delete media without authentication.
+        """
+
+        request, channel = self.make_request("POST", self.url, b"{}")
+        self.render(request)
+
+        self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"])
+
+    def test_requester_is_no_admin(self):
+        """
+        If the user is not a server admin, an error is returned.
+        """
+        self.other_user = self.register_user("user", "pass")
+        self.other_user_token = self.login("user", "pass")
+
+        request, channel = self.make_request(
+            "POST", self.url, access_token=self.other_user_token,
+        )
+        self.render(request)
+
+        self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"])
+
+    def test_media_is_not_local(self):
+        """
+        Tests that a lookup for media that is not local returns a 400
+        """
+        url = "/_synapse/admin/v1/media/%s/delete" % "unknown_domain"
+
+        request, channel = self.make_request(
+            "POST", url + "?before_ts=1234", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, channel.code, msg=channel.json_body)
+        self.assertEqual("Can only delete local media", channel.json_body["error"])
+
+    def test_missing_parameter(self):
+        """
+        If the parameter `before_ts` is missing, an error is returned.
+        """
+        request, channel = self.make_request(
+            "POST", self.url, access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.MISSING_PARAM, channel.json_body["errcode"])
+        self.assertEqual(
+            "Missing integer query parameter b'before_ts'", channel.json_body["error"]
+        )
+
+    def test_invalid_parameter(self):
+        """
+        If parameters are invalid, an error is returned.
+        """
+        request, channel = self.make_request(
+            "POST", self.url + "?before_ts=-1234", access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
+        self.assertEqual(
+            "Query parameter before_ts must be a string representing a positive integer.",
+            channel.json_body["error"],
+        )
+
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=1234&size_gt=-1234",
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"])
+        self.assertEqual(
+            "Query parameter size_gt must be a string representing a positive integer.",
+            channel.json_body["error"],
+        )
+
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=1234&keep_profiles=not_bool",
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+
+        self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"])
+        self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"])
+        self.assertEqual(
+            "Boolean query parameter b'keep_profiles' must be one of ['true', 'false']",
+            channel.json_body["error"],
+        )
+
+    def test_delete_media_never_accessed(self):
+        """
+        Tests that media deleted if it is older than `before_ts` and never accessed
+        `last_access_ts` is `NULL` and `created_ts` < `before_ts`
+        """
+
+        # upload and do not access
+        server_and_media_id = self._create_media()
+        self.pump(1.0)
+
+        # test that the file exists
+        media_id = server_and_media_id.split("/")[1]
+        local_path = self.filepaths.local_media_filepath(media_id)
+        self.assertTrue(os.path.exists(local_path))
+
+        # timestamp after upload/create
+        now_ms = self.clock.time_msec()
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms),
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(1, channel.json_body["total"])
+        self.assertEqual(
+            media_id, channel.json_body["deleted_media"][0],
+        )
+
+        self._access_media(server_and_media_id, False)
+
+    def test_keep_media_by_date(self):
+        """
+        Tests that media is not deleted if it is newer than `before_ts`
+        """
+
+        # timestamp before upload
+        now_ms = self.clock.time_msec()
+        server_and_media_id = self._create_media()
+
+        self._access_media(server_and_media_id)
+
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms),
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(0, channel.json_body["total"])
+
+        self._access_media(server_and_media_id)
+
+        # timestamp after upload
+        now_ms = self.clock.time_msec()
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms),
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(1, channel.json_body["total"])
+        self.assertEqual(
+            server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0],
+        )
+
+        self._access_media(server_and_media_id, False)
+
+    def test_keep_media_by_size(self):
+        """
+        Tests that media is not deleted if its size is smaller than or equal
+        to `size_gt`
+        """
+        server_and_media_id = self._create_media()
+
+        self._access_media(server_and_media_id)
+
+        now_ms = self.clock.time_msec()
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms) + "&size_gt=67",
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(0, channel.json_body["total"])
+
+        self._access_media(server_and_media_id)
+
+        now_ms = self.clock.time_msec()
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms) + "&size_gt=66",
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(1, channel.json_body["total"])
+        self.assertEqual(
+            server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0],
+        )
+
+        self._access_media(server_and_media_id, False)
+
+    def test_keep_media_by_user_avatar(self):
+        """
+        Tests that we do not delete media if is used as a user avatar
+        Tests parameter `keep_profiles`
+        """
+        server_and_media_id = self._create_media()
+
+        self._access_media(server_and_media_id)
+
+        # set media as avatar
+        request, channel = self.make_request(
+            "PUT",
+            "/profile/%s/avatar_url" % (self.admin_user,),
+            content=json.dumps({"avatar_url": "mxc://%s" % (server_and_media_id,)}),
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        now_ms = self.clock.time_msec()
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=true",
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(0, channel.json_body["total"])
+
+        self._access_media(server_and_media_id)
+
+        now_ms = self.clock.time_msec()
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=false",
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(1, channel.json_body["total"])
+        self.assertEqual(
+            server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0],
+        )
+
+        self._access_media(server_and_media_id, False)
+
+    def test_keep_media_by_room_avatar(self):
+        """
+        Tests that we do not delete media if it is used as a room avatar
+        Tests parameter `keep_profiles`
+        """
+        server_and_media_id = self._create_media()
+
+        self._access_media(server_and_media_id)
+
+        # set media as room avatar
+        room_id = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok)
+        request, channel = self.make_request(
+            "PUT",
+            "/rooms/%s/state/m.room.avatar" % (room_id,),
+            content=json.dumps({"url": "mxc://%s" % (server_and_media_id,)}),
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+
+        now_ms = self.clock.time_msec()
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=true",
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(0, channel.json_body["total"])
+
+        self._access_media(server_and_media_id)
+
+        now_ms = self.clock.time_msec()
+        request, channel = self.make_request(
+            "POST",
+            self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=false",
+            access_token=self.admin_user_tok,
+        )
+        self.render(request)
+        self.assertEqual(200, channel.code, msg=channel.json_body)
+        self.assertEqual(1, channel.json_body["total"])
+        self.assertEqual(
+            server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0],
+        )
+
+        self._access_media(server_and_media_id, False)
+
+    def _create_media(self):
+        """
+        Create a media and return media_id and server_and_media_id
+        """
+        upload_resource = self.media_repo.children[b"upload"]
+        # file size is 67 Byte
+        image_data = unhexlify(
+            b"89504e470d0a1a0a0000000d4948445200000001000000010806"
+            b"0000001f15c4890000000a49444154789c63000100000500010d"
+            b"0a2db40000000049454e44ae426082"
+        )
+
+        # Upload some media into the room
+        response = self.helper.upload_media(
+            upload_resource, image_data, tok=self.admin_user_tok, expect_code=200
+        )
+        # Extract media ID from the response
+        server_and_media_id = response["content_uri"][6:]  # Cut off 'mxc://'
+        server_name = server_and_media_id.split("/")[0]
+
+        # Check that new media is a local and not remote
+        self.assertEqual(server_name, self.server_name)
+
+        return server_and_media_id
+
+    def _access_media(self, server_and_media_id, expect_success=True):
+        """
+        Try to access a media and check the result
+        """
+        download_resource = self.media_repo.children[b"download"]
+
+        media_id = server_and_media_id.split("/")[1]
+        local_path = self.filepaths.local_media_filepath(media_id)
+
+        request, channel = self.make_request(
+            "GET",
+            server_and_media_id,
+            shorthand=False,
+            access_token=self.admin_user_tok,
+        )
+        request.render(download_resource)
+        self.pump(1.0)
+
+        if expect_success:
+            self.assertEqual(
+                200,
+                channel.code,
+                msg=(
+                    "Expected to receive a 200 on accessing media: %s"
+                    % server_and_media_id
+                ),
+            )
+            # Test that the file exists
+            self.assertTrue(os.path.exists(local_path))
+        else:
+            self.assertEqual(
+                404,
+                channel.code,
+                msg=(
+                    "Expected to receive a 404 on accessing deleted media: %s"
+                    % (server_and_media_id)
+                ),
+            )
+            # Test that the file is deleted
+            self.assertFalse(os.path.exists(local_path))