From a47636c570c80928af7652d70073496f602511bc Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 7 Jun 2022 11:53:47 +0100 Subject: Prevent local quarantined media from being claimed by media retention (#12972) --- synapse/rest/admin/media.py | 8 +-- synapse/rest/media/v1/media_repository.py | 22 +++++-- synapse/storage/databases/main/media_repository.py | 68 ++++++++++++++++++++-- 3 files changed, 82 insertions(+), 16 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index 8ca57bdb28..19d4a008e8 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -83,7 +83,7 @@ class QuarantineMediaByUser(RestServlet): requester = await self.auth.get_user_by_req(request) await assert_user_is_admin(self.auth, requester.user) - logging.info("Quarantining local media by user: %s", user_id) + logging.info("Quarantining media by user: %s", user_id) # Quarantine all media this user has uploaded num_quarantined = await self.store.quarantine_media_ids_by_user( @@ -112,7 +112,7 @@ class QuarantineMediaByID(RestServlet): requester = await self.auth.get_user_by_req(request) await assert_user_is_admin(self.auth, requester.user) - logging.info("Quarantining local media by ID: %s/%s", server_name, media_id) + logging.info("Quarantining media by ID: %s/%s", server_name, media_id) # Quarantine this media id await self.store.quarantine_media_by_id( @@ -140,9 +140,7 @@ class UnquarantineMediaByID(RestServlet): ) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) - logging.info( - "Remove from quarantine local media by ID: %s/%s", server_name, media_id - ) + logging.info("Remove from quarantine media by ID: %s/%s", server_name, media_id) # Remove from quarantine this media id await self.store.quarantine_media_by_id(server_name, media_id, None) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a551458a9f..7435fd9130 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -919,10 +919,14 @@ class MediaRepository: await self.delete_old_local_media( before_ts=local_media_threshold_timestamp_ms, keep_profiles=True, + delete_quarantined_media=False, + delete_protected_media=False, ) async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]: - old_media = await self.store.get_remote_media_before(before_ts) + old_media = await self.store.get_remote_media_ids( + before_ts, include_quarantined_media=False + ) deleted = 0 @@ -975,6 +979,8 @@ class MediaRepository: before_ts: int, size_gt: int = 0, keep_profiles: bool = True, + delete_quarantined_media: bool = False, + delete_protected_media: bool = False, ) -> Tuple[List[str], int]: """ Delete local or remote media from this server by size and timestamp. Removes @@ -982,18 +988,22 @@ class MediaRepository: Args: before_ts: Unix timestamp in ms. - Files that were last used before this timestamp will be deleted - size_gt: Size of the media in bytes. Files that are larger will be deleted + Files that were last used before this timestamp will be deleted. + size_gt: Size of the media in bytes. Files that are larger will be deleted. keep_profiles: Switch to delete also files that are still used in image data - (e.g user profile, room avatar) - If false these files will be deleted + (e.g user profile, room avatar). If false these files will be deleted. + delete_quarantined_media: If True, media marked as quarantined will be deleted. + delete_protected_media: If True, media marked as protected will be deleted. + Returns: A tuple of (list of deleted media IDs, total deleted media IDs). """ - old_media = await self.store.get_local_media_before( + old_media = await self.store.get_local_media_ids( before_ts, size_gt, keep_profiles, + include_quarantined_media=delete_quarantined_media, + include_protected_media=delete_protected_media, ) return await self._remove_local_media_from_disk(old_media) diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index deffdc19ce..3c585c555a 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -251,12 +251,36 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): "get_local_media_by_user_paginate_txn", get_local_media_by_user_paginate_txn ) - async def get_local_media_before( + async def get_local_media_ids( self, before_ts: int, size_gt: int, keep_profiles: bool, + include_quarantined_media: bool, + include_protected_media: bool, ) -> List[str]: + """ + Retrieve a list of media IDs from the local media store. + + Args: + before_ts: Only retrieve IDs from media that was either last accessed + (or if never accessed, created) before the given UNIX timestamp in ms. + size_gt: Only retrieve IDs from media that has a size (in bytes) greater than + the given integer. + keep_profiles: If True, exclude media IDs from the results that are used in the + following situations: + * global profile user avatar + * per-room profile user avatar + * room avatar + * a user's avatar in the user directory + include_quarantined_media: If False, exclude media IDs from the results that have + been marked as quarantined. + include_protected_media: If False, exclude media IDs from the results that have + been marked as protected from quarantine. + + Returns: + A list of local media IDs. + """ # to find files that have never been accessed (last_access_ts IS NULL) # compare with `created_ts` @@ -294,12 +318,24 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): ) sql += sql_keep - def _get_local_media_before_txn(txn: LoggingTransaction) -> List[str]: + if include_quarantined_media is False: + # Do not include media that has been quarantined + sql += """ + AND quarantined_by IS NULL + """ + + if include_protected_media is False: + # Do not include media that has been protected from quarantine + sql += """ + AND safe_from_quarantine = false + """ + + def _get_local_media_ids_txn(txn: LoggingTransaction) -> List[str]: txn.execute(sql, (before_ts, before_ts, size_gt)) return [row[0] for row in txn] return await self.db_pool.runInteraction( - "get_local_media_before", _get_local_media_before_txn + "get_local_media_ids", _get_local_media_ids_txn ) async def store_local_media( @@ -599,15 +635,37 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): desc="store_remote_media_thumbnail", ) - async def get_remote_media_before(self, before_ts: int) -> List[Dict[str, str]]: + async def get_remote_media_ids( + self, before_ts: int, include_quarantined_media: bool + ) -> List[Dict[str, str]]: + """ + Retrieve a list of server name, media ID tuples from the remote media cache. + + Args: + before_ts: Only retrieve IDs from media that was either last accessed + (or if never accessed, created) before the given UNIX timestamp in ms. + include_quarantined_media: If False, exclude media IDs from the results that have + been marked as quarantined. + + Returns: + A list of tuples containing: + * The server name of homeserver where the media originates from, + * The ID of the media. + """ sql = ( "SELECT media_origin, media_id, filesystem_id" " FROM remote_media_cache" " WHERE last_access_ts < ?" ) + if include_quarantined_media is False: + # Only include media that has not been quarantined + sql += """ + AND quarantined_by IS NULL + """ + return await self.db_pool.execute( - "get_remote_media_before", self.db_pool.cursor_to_dict, sql, before_ts + "get_remote_media_ids", self.db_pool.cursor_to_dict, sql, before_ts ) async def delete_remote_media(self, media_origin: str, media_id: str) -> None: -- cgit 1.4.1