From f7768f62cbf7579a1a91e694f83d47d275373369 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Mon, 27 Sep 2021 12:55:27 +0100 Subject: Avoid storing URL cache files in storage providers (#10911) URL cache files are short-lived and it does not make sense to offload them (eg. to the cloud) or back them up. --- synapse/rest/media/v1/filepath.py | 11 ++++++----- synapse/rest/media/v1/preview_url_resource.py | 1 - synapse/rest/media/v1/storage_provider.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 6 deletions(-) (limited to 'synapse/rest/media/v1') diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 39bbe4e874..08bd85f664 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -195,23 +195,24 @@ class MediaFilePaths: url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel) - def url_cache_thumbnail_directory(self, media_id: str) -> str: + def url_cache_thumbnail_directory_rel(self, media_id: str) -> str: # Media id is of the form # E.g.: 2017-09-28-fsdRDt24DS234dsf if NEW_FORMAT_ID_RE.match(media_id): - return os.path.join( - self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:] - ) + return os.path.join("url_cache_thumbnails", media_id[:10], media_id[11:]) else: return os.path.join( - self.base_path, "url_cache_thumbnails", media_id[0:2], media_id[2:4], media_id[4:], ) + url_cache_thumbnail_directory = _wrap_in_base_path( + url_cache_thumbnail_directory_rel + ) + def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]: "The dirs to try and remove if we delete the media_id thumbnails" # Media id is of the form diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 0b0c4d6469..79a42b2455 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -485,7 +485,6 @@ class PreviewUrlResource(DirectServeJsonResource): async def _expire_url_cache_data(self) -> None: """Clean up expired url cache content, media and thumbnails.""" - # TODO: Delete from backup media store assert self._worker_run_media_background_jobs diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py index da78fcee5e..18bf977d3d 100644 --- a/synapse/rest/media/v1/storage_provider.py +++ b/synapse/rest/media/v1/storage_provider.py @@ -93,6 +93,11 @@ class StorageProviderWrapper(StorageProvider): if file_info.server_name and not self.store_remote: return None + if file_info.url_cache: + # The URL preview cache is short lived and not worth offloading or + # backing up. + return None + if self.store_synchronous: # store_file is supposed to return an Awaitable, but guard # against improper implementations. @@ -110,6 +115,11 @@ class StorageProviderWrapper(StorageProvider): run_in_background(store) async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]: + if file_info.url_cache: + # Files in the URL preview cache definitely aren't stored here, + # so avoid any potentially slow I/O or network access. + return None + # store_file is supposed to return an Awaitable, but guard # against improper implementations. return await maybe_awaitable(self.backend.fetch(path, file_info)) -- cgit 1.4.1