summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/15197.feature1
-rw-r--r--docs/usage/configuration/config_documentation.md24
-rw-r--r--synapse/config/repository.py4
-rw-r--r--synapse/media/media_repository.py9
-rw-r--r--synapse/rest/media/thumbnail_resource.py9
-rw-r--r--tests/rest/media/test_domain_blocking.py139
6 files changed, 186 insertions, 0 deletions
diff --git a/changelog.d/15197.feature b/changelog.d/15197.feature
new file mode 100644
index 0000000000..c8a6f114e8
--- /dev/null
+++ b/changelog.d/15197.feature
@@ -0,0 +1 @@
+Add an option to prevent media downloads from configured domains.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 14c21f73fe..6dd1a639ed 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1768,6 +1768,30 @@ Example configuration:
 max_image_pixels: 35M
 ```
 ---
+### `prevent_media_downloads_from`
+
+A list of domains to never download media from. Media from these
+domains that is already downloaded will not be deleted, but will be
+inaccessible to users. This option does not affect admin APIs trying
+to download/operate on media.
+
+This will not prevent the listed domains from accessing media themselves.
+It simply prevents users on this server from downloading media originating
+from the listed servers.
+
+This will have no effect on media originating from the local server.
+This only affects media downloaded from other Matrix servers, to
+block domains from URL previews see [`url_preview_url_blacklist`](#url_preview_url_blacklist).
+
+Defaults to an empty list (nothing blocked).
+
+Example configuration:
+```yaml
+prevent_media_downloads_from:
+  - evil.example.org
+  - evil2.example.org
+```
+---
 ### `dynamic_thumbnails`
 
 Whether to generate new thumbnails on the fly to precisely match
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index ecb3edbe3a..655f06505b 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -137,6 +137,10 @@ class ContentRepositoryConfig(Config):
         self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M"))
         self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M"))
 
+        self.prevent_media_downloads_from = config.get(
+            "prevent_media_downloads_from", []
+        )
+
         self.media_store_path = self.ensure_directory(
             config.get("media_store_path", "media_store")
         )
diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py
index b81e3c2b0c..e81c987b10 100644
--- a/synapse/media/media_repository.py
+++ b/synapse/media/media_repository.py
@@ -93,6 +93,7 @@ class MediaRepository:
         self.federation_domain_whitelist = (
             hs.config.federation.federation_domain_whitelist
         )
+        self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
 
         # List of StorageProviders where we should search for media and
         # potentially upload to.
@@ -276,6 +277,14 @@ class MediaRepository:
         ):
             raise FederationDeniedError(server_name)
 
+        # Don't let users download media from domains listed in the config, even
+        # if we might have the media to serve. This is Trust & Safety tooling to
+        # block some servers' media from being accessible to local users.
+        # See `prevent_media_downloads_from` config docs for more info.
+        if server_name in self.prevent_media_downloads_from:
+            respond_404(request)
+            return
+
         self.mark_recently_accessed(server_name, media_id)
 
         # We linearize here to ensure that we don't try and download remote
diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py
index a6396fb05a..661e604b85 100644
--- a/synapse/rest/media/thumbnail_resource.py
+++ b/synapse/rest/media/thumbnail_resource.py
@@ -60,6 +60,7 @@ class ThumbnailResource(DirectServeJsonResource):
         self.media_storage = media_storage
         self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
         self._is_mine_server_name = hs.is_mine_server_name
+        self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
 
     async def _async_render_GET(self, request: SynapseRequest) -> None:
         set_cors_headers(request)
@@ -82,6 +83,14 @@ class ThumbnailResource(DirectServeJsonResource):
                 )
             self.media_repo.mark_recently_accessed(None, media_id)
         else:
+            # Don't let users download media from configured domains, even if it
+            # is already downloaded. This is Trust & Safety tooling to make some
+            # media inaccessible to local users.
+            # See `prevent_media_downloads_from` config docs for more info.
+            if server_name in self.prevent_media_downloads_from:
+                respond_404(request)
+                return
+
             if self.dynamic_thumbnails:
                 await self._select_or_generate_remote_thumbnail(
                     request, server_name, media_id, width, height, method, m_type
diff --git a/tests/rest/media/test_domain_blocking.py b/tests/rest/media/test_domain_blocking.py
new file mode 100644
index 0000000000..9beeeab843
--- /dev/null
+++ b/tests/rest/media/test_domain_blocking.py
@@ -0,0 +1,139 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict
+
+from twisted.test.proto_helpers import MemoryReactor
+from twisted.web.resource import Resource
+
+from synapse.media._base import FileInfo
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.test_utils import SMALL_PNG
+from tests.unittest import override_config
+
+
+class MediaDomainBlockingTests(unittest.HomeserverTestCase):
+    remote_media_id = "doesnotmatter"
+    remote_server_name = "evil.com"
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+        # Inject a piece of media. We'll use this to ensure we're returning a sane
+        # response when we're not supposed to block it, distinguishing a media block
+        # from a regular 404.
+        file_id = "abcdefg12345"
+        file_info = FileInfo(server_name=self.remote_server_name, file_id=file_id)
+        with hs.get_media_repository().media_storage.store_into_file(file_info) as (
+            f,
+            fname,
+            finish,
+        ):
+            f.write(SMALL_PNG)
+            self.get_success(finish())
+
+        self.get_success(
+            self.store.store_cached_remote_media(
+                origin=self.remote_server_name,
+                media_id=self.remote_media_id,
+                media_type="image/png",
+                media_length=1,
+                time_now_ms=clock.time_msec(),
+                upload_name="test.png",
+                filesystem_id=file_id,
+            )
+        )
+
+    def create_resource_dict(self) -> Dict[str, Resource]:
+        # We need to manually set the resource tree to include media, the
+        # default only does `/_matrix/client` APIs.
+        return {"/_matrix/media": self.hs.get_media_repository_resource()}
+
+    @override_config(
+        {
+            # Disable downloads from the domain we'll be trying to download from.
+            # Should result in a 404.
+            "prevent_media_downloads_from": ["evil.com"]
+        }
+    )
+    def test_cannot_download_blocked_media(self) -> None:
+        """
+        Tests to ensure that remote media which is blocked cannot be downloaded.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 404)
+
+    @override_config(
+        {
+            # Disable downloads from a domain we won't be requesting downloads from.
+            # This proves we haven't broken anything.
+            "prevent_media_downloads_from": ["not-listed.com"]
+        }
+    )
+    def test_remote_media_normally_unblocked(self) -> None:
+        """
+        Tests to ensure that remote media is normally able to be downloaded
+        when no domain block is in place.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 200)
+
+    @override_config(
+        {
+            # Disable downloads from the domain we'll be trying to download from.
+            # Should result in a 404.
+            "prevent_media_downloads_from": ["evil.com"],
+            "dynamic_thumbnails": True,
+        }
+    )
+    def test_cannot_download_blocked_media_thumbnail(self) -> None:
+        """
+        Same test as test_cannot_download_blocked_media but for thumbnails.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
+            shorthand=False,
+            content={"width": 100, "height": 100},
+        )
+        self.assertEqual(response.code, 404)
+
+    @override_config(
+        {
+            # Disable downloads from a domain we won't be requesting downloads from.
+            # This proves we haven't broken anything.
+            "prevent_media_downloads_from": ["not-listed.com"],
+            "dynamic_thumbnails": True,
+        }
+    )
+    def test_remote_media_thumbnail_normally_unblocked(self) -> None:
+        """
+        Same test as test_remote_media_normally_unblocked but for thumbnails.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 200)