summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorAndrew Morgan <1342360+anoadragon453@users.noreply.github.com>2022-06-07 11:53:47 +0100
committerGitHub <noreply@github.com>2022-06-07 10:53:47 +0000
commita47636c570c80928af7652d70073496f602511bc (patch)
treefe7d7c6471940dbaf73d27e3ace667bcfb2725bd /synapse
parentRemove remaining pieces of groups code. (#12966) (diff)
downloadsynapse-a47636c570c80928af7652d70073496f602511bc.tar.xz
Prevent local quarantined media from being claimed by media retention (#12972)
Diffstat (limited to 'synapse')
-rw-r--r--synapse/rest/admin/media.py8
-rw-r--r--synapse/rest/media/v1/media_repository.py22
-rw-r--r--synapse/storage/databases/main/media_repository.py68
3 files changed, 82 insertions, 16 deletions
diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py
index 8ca57bdb28..19d4a008e8 100644
--- a/synapse/rest/admin/media.py
+++ b/synapse/rest/admin/media.py
@@ -83,7 +83,7 @@ class QuarantineMediaByUser(RestServlet):
         requester = await self.auth.get_user_by_req(request)
         await assert_user_is_admin(self.auth, requester.user)
 
-        logging.info("Quarantining local media by user: %s", user_id)
+        logging.info("Quarantining media by user: %s", user_id)
 
         # Quarantine all media this user has uploaded
         num_quarantined = await self.store.quarantine_media_ids_by_user(
@@ -112,7 +112,7 @@ class QuarantineMediaByID(RestServlet):
         requester = await self.auth.get_user_by_req(request)
         await assert_user_is_admin(self.auth, requester.user)
 
-        logging.info("Quarantining local media by ID: %s/%s", server_name, media_id)
+        logging.info("Quarantining media by ID: %s/%s", server_name, media_id)
 
         # Quarantine this media id
         await self.store.quarantine_media_by_id(
@@ -140,9 +140,7 @@ class UnquarantineMediaByID(RestServlet):
     ) -> Tuple[int, JsonDict]:
         await assert_requester_is_admin(self.auth, request)
 
-        logging.info(
-            "Remove from quarantine local media by ID: %s/%s", server_name, media_id
-        )
+        logging.info("Remove from quarantine media by ID: %s/%s", server_name, media_id)
 
         # Remove from quarantine this media id
         await self.store.quarantine_media_by_id(server_name, media_id, None)
diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py
index a551458a9f..7435fd9130 100644
--- a/synapse/rest/media/v1/media_repository.py
+++ b/synapse/rest/media/v1/media_repository.py
@@ -919,10 +919,14 @@ class MediaRepository:
             await self.delete_old_local_media(
                 before_ts=local_media_threshold_timestamp_ms,
                 keep_profiles=True,
+                delete_quarantined_media=False,
+                delete_protected_media=False,
             )
 
     async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
-        old_media = await self.store.get_remote_media_before(before_ts)
+        old_media = await self.store.get_remote_media_ids(
+            before_ts, include_quarantined_media=False
+        )
 
         deleted = 0
 
@@ -975,6 +979,8 @@ class MediaRepository:
         before_ts: int,
         size_gt: int = 0,
         keep_profiles: bool = True,
+        delete_quarantined_media: bool = False,
+        delete_protected_media: bool = False,
     ) -> Tuple[List[str], int]:
         """
         Delete local or remote media from this server by size and timestamp. Removes
@@ -982,18 +988,22 @@ class MediaRepository:
 
         Args:
             before_ts: Unix timestamp in ms.
-                       Files that were last used before this timestamp will be deleted
-            size_gt: Size of the media in bytes. Files that are larger will be deleted
+                Files that were last used before this timestamp will be deleted.
+            size_gt: Size of the media in bytes. Files that are larger will be deleted.
             keep_profiles: Switch to delete also files that are still used in image data
-                           (e.g user profile, room avatar)
-                           If false these files will be deleted
+                (e.g user profile, room avatar). If false these files will be deleted.
+            delete_quarantined_media: If True, media marked as quarantined will be deleted.
+            delete_protected_media: If True, media marked as protected will be deleted.
+
         Returns:
             A tuple of (list of deleted media IDs, total deleted media IDs).
         """
-        old_media = await self.store.get_local_media_before(
+        old_media = await self.store.get_local_media_ids(
             before_ts,
             size_gt,
             keep_profiles,
+            include_quarantined_media=delete_quarantined_media,
+            include_protected_media=delete_protected_media,
         )
         return await self._remove_local_media_from_disk(old_media)
 
diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py
index deffdc19ce..3c585c555a 100644
--- a/synapse/storage/databases/main/media_repository.py
+++ b/synapse/storage/databases/main/media_repository.py
@@ -251,12 +251,36 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             "get_local_media_by_user_paginate_txn", get_local_media_by_user_paginate_txn
         )
 
-    async def get_local_media_before(
+    async def get_local_media_ids(
         self,
         before_ts: int,
         size_gt: int,
         keep_profiles: bool,
+        include_quarantined_media: bool,
+        include_protected_media: bool,
     ) -> List[str]:
+        """
+        Retrieve a list of media IDs from the local media store.
+
+        Args:
+            before_ts: Only retrieve IDs from media that was either last accessed
+                (or if never accessed, created) before the given UNIX timestamp in ms.
+            size_gt: Only retrieve IDs from media that has a size (in bytes) greater than
+                the given integer.
+            keep_profiles: If True, exclude media IDs from the results that are used in the
+                following situations:
+                    * global profile user avatar
+                    * per-room profile user avatar
+                    * room avatar
+                    * a user's avatar in the user directory
+            include_quarantined_media: If False, exclude media IDs from the results that have
+                been marked as quarantined.
+            include_protected_media: If False, exclude media IDs from the results that have
+                been marked as protected from quarantine.
+
+        Returns:
+            A list of local media IDs.
+        """
 
         # to find files that have never been accessed (last_access_ts IS NULL)
         # compare with `created_ts`
@@ -294,12 +318,24 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             )
             sql += sql_keep
 
-        def _get_local_media_before_txn(txn: LoggingTransaction) -> List[str]:
+        if include_quarantined_media is False:
+            # Do not include media that has been quarantined
+            sql += """
+                AND quarantined_by IS NULL
+            """
+
+        if include_protected_media is False:
+            # Do not include media that has been protected from quarantine
+            sql += """
+                AND safe_from_quarantine = false
+            """
+
+        def _get_local_media_ids_txn(txn: LoggingTransaction) -> List[str]:
             txn.execute(sql, (before_ts, before_ts, size_gt))
             return [row[0] for row in txn]
 
         return await self.db_pool.runInteraction(
-            "get_local_media_before", _get_local_media_before_txn
+            "get_local_media_ids", _get_local_media_ids_txn
         )
 
     async def store_local_media(
@@ -599,15 +635,37 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
             desc="store_remote_media_thumbnail",
         )
 
-    async def get_remote_media_before(self, before_ts: int) -> List[Dict[str, str]]:
+    async def get_remote_media_ids(
+        self, before_ts: int, include_quarantined_media: bool
+    ) -> List[Dict[str, str]]:
+        """
+        Retrieve a list of server name, media ID tuples from the remote media cache.
+
+        Args:
+            before_ts: Only retrieve IDs from media that was either last accessed
+                (or if never accessed, created) before the given UNIX timestamp in ms.
+            include_quarantined_media: If False, exclude media IDs from the results that have
+                been marked as quarantined.
+
+        Returns:
+            A list of tuples containing:
+                * The server name of homeserver where the media originates from,
+                * The ID of the media.
+        """
         sql = (
             "SELECT media_origin, media_id, filesystem_id"
             " FROM remote_media_cache"
             " WHERE last_access_ts < ?"
         )
 
+        if include_quarantined_media is False:
+            # Only include media that has not been quarantined
+            sql += """
+            AND quarantined_by IS NULL
+            """
+
         return await self.db_pool.execute(
-            "get_remote_media_before", self.db_pool.cursor_to_dict, sql, before_ts
+            "get_remote_media_ids", self.db_pool.cursor_to_dict, sql, before_ts
         )
 
     async def delete_remote_media(self, media_origin: str, media_id: str) -> None: