summary refs log tree commit diff
path: root/synapse/rest/media
diff options
context:
space:
mode:
authorSean Quah <8349537+squahtx@users.noreply.github.com>2021-09-27 12:55:27 +0100
committerGitHub <noreply@github.com>2021-09-27 12:55:27 +0100
commitf7768f62cbf7579a1a91e694f83d47d275373369 (patch)
treea8a605329df286cdc475e25aa537049a52af0c47 /synapse/rest/media
parentFix race conditions when creating media store and config directories (#10913) (diff)
downloadsynapse-f7768f62cbf7579a1a91e694f83d47d275373369.tar.xz
Avoid storing URL cache files in storage providers (#10911)
URL cache files are short-lived and it does not make sense to offload
them (eg. to the cloud) or back them up.
Diffstat (limited to 'synapse/rest/media')
-rw-r--r--synapse/rest/media/v1/filepath.py11
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py1
-rw-r--r--synapse/rest/media/v1/storage_provider.py10
3 files changed, 16 insertions, 6 deletions
diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py
index 39bbe4e874..08bd85f664 100644
--- a/synapse/rest/media/v1/filepath.py
+++ b/synapse/rest/media/v1/filepath.py
@@ -195,23 +195,24 @@ class MediaFilePaths:
 
     url_cache_thumbnail = _wrap_in_base_path(url_cache_thumbnail_rel)
 
-    def url_cache_thumbnail_directory(self, media_id: str) -> str:
+    def url_cache_thumbnail_directory_rel(self, media_id: str) -> str:
         # Media id is of the form <DATE><RANDOM_STRING>
         # E.g.: 2017-09-28-fsdRDt24DS234dsf
 
         if NEW_FORMAT_ID_RE.match(media_id):
-            return os.path.join(
-                self.base_path, "url_cache_thumbnails", media_id[:10], media_id[11:]
-            )
+            return os.path.join("url_cache_thumbnails", media_id[:10], media_id[11:])
         else:
             return os.path.join(
-                self.base_path,
                 "url_cache_thumbnails",
                 media_id[0:2],
                 media_id[2:4],
                 media_id[4:],
             )
 
+    url_cache_thumbnail_directory = _wrap_in_base_path(
+        url_cache_thumbnail_directory_rel
+    )
+
     def url_cache_thumbnail_dirs_to_delete(self, media_id: str) -> List[str]:
         "The dirs to try and remove if we delete the media_id thumbnails"
         # Media id is of the form <DATE><RANDOM_STRING>
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 0b0c4d6469..79a42b2455 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -485,7 +485,6 @@ class PreviewUrlResource(DirectServeJsonResource):
 
     async def _expire_url_cache_data(self) -> None:
         """Clean up expired url cache content, media and thumbnails."""
-        # TODO: Delete from backup media store
 
         assert self._worker_run_media_background_jobs
 
diff --git a/synapse/rest/media/v1/storage_provider.py b/synapse/rest/media/v1/storage_provider.py
index da78fcee5e..18bf977d3d 100644
--- a/synapse/rest/media/v1/storage_provider.py
+++ b/synapse/rest/media/v1/storage_provider.py
@@ -93,6 +93,11 @@ class StorageProviderWrapper(StorageProvider):
         if file_info.server_name and not self.store_remote:
             return None
 
+        if file_info.url_cache:
+            # The URL preview cache is short lived and not worth offloading or
+            # backing up.
+            return None
+
         if self.store_synchronous:
             # store_file is supposed to return an Awaitable, but guard
             # against improper implementations.
@@ -110,6 +115,11 @@ class StorageProviderWrapper(StorageProvider):
             run_in_background(store)
 
     async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
+        if file_info.url_cache:
+            # Files in the URL preview cache definitely aren't stored here,
+            # so avoid any potentially slow I/O or network access.
+            return None
+
         # store_file is supposed to return an Awaitable, but guard
         # against improper implementations.
         return await maybe_awaitable(self.backend.fetch(path, file_info))