summary refs log tree commit diff
path: root/tests/rest/admin/test_admin.py
diff options
context:
space:
mode:
authorWill Hunt <will@half-shot.uk>2025-03-27 17:26:34 +0000
committerGitHub <noreply@github.com>2025-03-27 17:26:34 +0000
commitd17295e5c3de642ba2c4e47f1bb2be7b2e4c9c06 (patch)
tree122cff5e2886532fc6d52dc5d48d36d3c2523070 /tests/rest/admin/test_admin.py
parentAdd DB delta to remove the old state group deletion job (#18284) (diff)
downloadsynapse-d17295e5c3de642ba2c4e47f1bb2be7b2e4c9c06.tar.xz
Store hashes of media files, and allow quarantining by hash. (#18277)
This PR makes a few radical changes to media. This now stores the SHA256
hash of each file stored in the database (excluding thumbnails, more on
that later). If a set of media is quarantined, any additional uploads of
the same file contents or any other files with the same hash will be
quarantined at the same time.

Currently this does NOT:
 - De-duplicate media, although a future extension could be to do that.
- Run any background jobs to identify the hashes of older files. This
could also be a future extension, though the value of doing so is
limited to combat the abuse of recent media.
- Hash thumbnails. It's assumed that thumbnails are parented to some
form of media, so you'd likely be wanting to quarantine the media and
the thumbnail at the same time.
Diffstat (limited to 'tests/rest/admin/test_admin.py')
-rw-r--r--tests/rest/admin/test_admin.py21
1 files changed, 19 insertions, 2 deletions
diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py

index 5483f8f37f..fc2a6c569b 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py
@@ -20,7 +20,7 @@ # import urllib.parse -from typing import Dict +from typing import Dict, cast from parameterized import parameterized @@ -32,6 +32,7 @@ from synapse.http.server import JsonResource from synapse.rest.admin import VersionServlet from synapse.rest.client import login, media, room from synapse.server import HomeServer +from synapse.types import UserID from synapse.util import Clock from tests import unittest @@ -227,10 +228,25 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase): # Upload some media response_1 = self.helper.upload_media(SMALL_PNG, tok=non_admin_user_tok) response_2 = self.helper.upload_media(SMALL_PNG, tok=non_admin_user_tok) + response_3 = self.helper.upload_media(SMALL_PNG, tok=non_admin_user_tok) # Extract media IDs server_and_media_id_1 = response_1["content_uri"][6:] server_and_media_id_2 = response_2["content_uri"][6:] + server_and_media_id_3 = response_3["content_uri"][6:] + + # Remove the hash from the media to simulate historic media. + self.get_success( + self.hs.get_datastores().main.update_local_media( + media_id=server_and_media_id_3.split("/")[1], + media_type="image/png", + upload_name=None, + media_length=123, + user_id=UserID.from_string(non_admin_user), + # Hack to force some media to have no hash. + sha256=cast(str, None), + ) + ) # Quarantine all media by this user url = "/_synapse/admin/v1/user/%s/media/quarantine" % urllib.parse.quote( @@ -244,12 +260,13 @@ class QuarantineMediaTestCase(unittest.HomeserverTestCase): self.pump(1.0) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual( - channel.json_body, {"num_quarantined": 2}, "Expected 2 quarantined items" + channel.json_body, {"num_quarantined": 3}, "Expected 3 quarantined items" ) # Attempt to access each piece of media self._ensure_quarantined(admin_user_tok, server_and_media_id_1) self._ensure_quarantined(admin_user_tok, server_and_media_id_2) + self._ensure_quarantined(admin_user_tok, server_and_media_id_3) def test_cannot_quarantine_safe_media(self) -> None: self.register_user("user_admin", "pass", admin=True)