From d17295e5c3de642ba2c4e47f1bb2be7b2e4c9c06 Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Thu, 27 Mar 2025 17:26:34 +0000 Subject: Store hashes of media files, and allow quarantining by hash. (#18277) This PR makes a few radical changes to media. This now stores the SHA256 hash of each file stored in the database (excluding thumbnails, more on that later). If a set of media is quarantined, any additional uploads of the same file contents or any other files with the same hash will be quarantined at the same time. Currently this does NOT: - De-duplicate media, although a future extension could be to do that. - Run any background jobs to identify the hashes of older files. This could also be a future extension, though the value of doing so is limited to combat the abuse of recent media. - Hash thumbnails. It's assumed that thumbnails are parented to some form of media, so you'd likely be wanting to quarantine the media and the thumbnail at the same time. --- tests/rest/media/test_domain_blocking.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tests/rest/media/test_domain_blocking.py') diff --git a/tests/rest/media/test_domain_blocking.py b/tests/rest/media/test_domain_blocking.py index 49d81f4b28..26453f70dd 100644 --- a/tests/rest/media/test_domain_blocking.py +++ b/tests/rest/media/test_domain_blocking.py @@ -61,6 +61,7 @@ class MediaDomainBlockingTests(unittest.HomeserverTestCase): time_now_ms=clock.time_msec(), upload_name="test.png", filesystem_id=file_id, + sha256=file_id, ) ) -- cgit 1.5.1