diff --git a/changelog.d/10924.bugfix b/changelog.d/10924.bugfix
new file mode 100644
index 0000000000..c73a51e32f
--- /dev/null
+++ b/changelog.d/10924.bugfix
@@ -0,0 +1 @@
+Fix a bug where empty `yyyy-mm-dd/` directories would be left behind in the media store's `url_cache_thumbnails/` directory.
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 79a42b2455..044f44a397 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -73,6 +73,7 @@ OG_TAG_VALUE_MAXLEN = 1000
ONE_HOUR = 60 * 60 * 1000
ONE_DAY = 24 * ONE_HOUR
+IMAGE_CACHE_EXPIRY_MS = 2 * ONE_DAY
@attr.s(slots=True, frozen=True, auto_attribs=True)
@@ -496,6 +497,27 @@ class PreviewUrlResource(DirectServeJsonResource):
logger.info("Still running DB updates; skipping expiry")
return
+ def try_remove_parent_dirs(dirs: Iterable[str]) -> None:
+ """Attempt to remove the given chain of parent directories
+
+ Args:
+ dirs: The list of directory paths to delete, with children appearing
+ before their parents.
+ """
+ for dir in dirs:
+ try:
+ os.rmdir(dir)
+ except FileNotFoundError:
+ # Already deleted, continue with deleting the rest
+ pass
+ except OSError as e:
+ # Failed, skip deleting the rest of the parent dirs
+ if e.errno != errno.ENOTEMPTY:
+ logger.warning(
+ "Failed to remove media directory: %r: %s", dir, e
+ )
+ break
+
# First we delete expired url cache entries
media_ids = await self.store.get_expired_url_cache(now)
@@ -504,20 +526,16 @@ class PreviewUrlResource(DirectServeJsonResource):
fname = self.filepaths.url_cache_filepath(media_id)
try:
os.remove(fname)
+ except FileNotFoundError:
+ pass # If the path doesn't exist, meh
except OSError as e:
- # If the path doesn't exist, meh
- if e.errno != errno.ENOENT:
- logger.warning("Failed to remove media: %r: %s", media_id, e)
- continue
+ logger.warning("Failed to remove media: %r: %s", media_id, e)
+ continue
removed_media.append(media_id)
- try:
- dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
- for dir in dirs:
- os.rmdir(dir)
- except Exception:
- pass
+ dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
+ try_remove_parent_dirs(dirs)
await self.store.delete_url_cache(removed_media)
@@ -530,7 +548,7 @@ class PreviewUrlResource(DirectServeJsonResource):
# These may be cached for a bit on the client (i.e., they
# may have a room open with a preview url thing open).
# So we wait a couple of days before deleting, just in case.
- expire_before = now - 2 * ONE_DAY
+ expire_before = now - IMAGE_CACHE_EXPIRY_MS
media_ids = await self.store.get_url_cache_media_before(expire_before)
removed_media = []
@@ -538,36 +556,30 @@ class PreviewUrlResource(DirectServeJsonResource):
fname = self.filepaths.url_cache_filepath(media_id)
try:
os.remove(fname)
+ except FileNotFoundError:
+ pass # If the path doesn't exist, meh
except OSError as e:
- # If the path doesn't exist, meh
- if e.errno != errno.ENOENT:
- logger.warning("Failed to remove media: %r: %s", media_id, e)
- continue
+ logger.warning("Failed to remove media: %r: %s", media_id, e)
+ continue
- try:
- dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
- for dir in dirs:
- os.rmdir(dir)
- except Exception:
- pass
+ dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id)
+ try_remove_parent_dirs(dirs)
thumbnail_dir = self.filepaths.url_cache_thumbnail_directory(media_id)
try:
shutil.rmtree(thumbnail_dir)
+ except FileNotFoundError:
+ pass # If the path doesn't exist, meh
except OSError as e:
- # If the path doesn't exist, meh
- if e.errno != errno.ENOENT:
- logger.warning("Failed to remove media: %r: %s", media_id, e)
- continue
+ logger.warning("Failed to remove media: %r: %s", media_id, e)
+ continue
removed_media.append(media_id)
- try:
- dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
- for dir in dirs:
- os.rmdir(dir)
- except Exception:
- pass
+ dirs = self.filepaths.url_cache_thumbnail_dirs_to_delete(media_id)
+ # Note that one of the directories to be deleted has already been
+ # removed by the `rmtree` above.
+ try_remove_parent_dirs(dirs)
await self.store.delete_url_cache_media(removed_media)
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 4d09b5d07e..ce43de780b 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -21,11 +21,13 @@ from twisted.internet.error import DNSLookupError
from twisted.test.proto_helpers import AccumulatingProtocol
from synapse.config.oembed import OEmbedEndpointConfig
+from synapse.rest.media.v1.preview_url_resource import IMAGE_CACHE_EXPIRY_MS
from synapse.util.stringutils import parse_and_validate_mxc_uri
from tests import unittest
from tests.server import FakeTransport
from tests.test_utils import SMALL_PNG
+from tests.utils import MockClock
try:
import lxml
@@ -851,3 +853,32 @@ class URLPreviewTests(unittest.HomeserverTestCase):
404,
"URL cache thumbnail was unexpectedly retrieved from a storage provider",
)
+
+ def test_cache_expiry(self):
+ """Test that URL cache files and thumbnails are cleaned up properly on expiry."""
+ self.preview_url.clock = MockClock()
+
+ _host, media_id = self._download_image()
+
+ file_path = self.preview_url.filepaths.url_cache_filepath(media_id)
+ file_dirs = self.preview_url.filepaths.url_cache_filepath_dirs_to_delete(
+ media_id
+ )
+ thumbnail_dir = self.preview_url.filepaths.url_cache_thumbnail_directory(
+ media_id
+ )
+ thumbnail_dirs = self.preview_url.filepaths.url_cache_thumbnail_dirs_to_delete(
+ media_id
+ )
+
+ self.assertTrue(os.path.isfile(file_path))
+ self.assertTrue(os.path.isdir(thumbnail_dir))
+
+ self.preview_url.clock.advance_time_msec(IMAGE_CACHE_EXPIRY_MS + 1)
+ self.get_success(self.preview_url._expire_url_cache_data())
+
+ for path in [file_path] + file_dirs + [thumbnail_dir] + thumbnail_dirs:
+ self.assertFalse(
+ os.path.exists(path),
+ f"{os.path.relpath(path, self.media_store_path)} was not deleted",
+ )
|