summary refs log tree commit diff
diff options
context:
space:
mode:
authorTravis Ralston <travisr@matrix.org>2023-05-09 12:08:51 -0600
committerGitHub <noreply@github.com>2023-05-09 14:08:51 -0400
commitab4535b6082db97e8c48a69ea6674fe3b7c5e956 (patch)
tree68c545c1409c39ba43070ab21b2315e45d1415ea
parentMerge branch 'master' into develop (diff)
downloadsynapse-ab4535b6082db97e8c48a69ea6674fe3b7c5e956.tar.xz
Add config option to prevent media downloads from listed domains. (#15197)
This stops media (and thumbnails) from being accessed from the
listed domains. It does not delete any already locally cached media,
but will prevent accessing it.

Note that admin APIs are unaffected by this change.
-rw-r--r--changelog.d/15197.feature1
-rw-r--r--docs/usage/configuration/config_documentation.md24
-rw-r--r--synapse/config/repository.py4
-rw-r--r--synapse/media/media_repository.py9
-rw-r--r--synapse/rest/media/thumbnail_resource.py9
-rw-r--r--tests/rest/media/test_domain_blocking.py139
6 files changed, 186 insertions, 0 deletions
diff --git a/changelog.d/15197.feature b/changelog.d/15197.feature
new file mode 100644
index 0000000000..c8a6f114e8
--- /dev/null
+++ b/changelog.d/15197.feature
@@ -0,0 +1 @@
+Add an option to prevent media downloads from configured domains.
\ No newline at end of file
diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md
index 14c21f73fe..6dd1a639ed 100644
--- a/docs/usage/configuration/config_documentation.md
+++ b/docs/usage/configuration/config_documentation.md
@@ -1768,6 +1768,30 @@ Example configuration:
 max_image_pixels: 35M
 ```
 ---
+### `prevent_media_downloads_from`
+
+A list of domains to never download media from. Media from these
+domains that is already downloaded will not be deleted, but will be
+inaccessible to users. This option does not affect admin APIs trying
+to download/operate on media.
+
+This will not prevent the listed domains from accessing media themselves.
+It simply prevents users on this server from downloading media originating
+from the listed servers.
+
+This will have no effect on media originating from the local server.
+This only affects media downloaded from other Matrix servers, to
+block domains from URL previews see [`url_preview_url_blacklist`](#url_preview_url_blacklist).
+
+Defaults to an empty list (nothing blocked).
+
+Example configuration:
+```yaml
+prevent_media_downloads_from:
+  - evil.example.org
+  - evil2.example.org
+```
+---
 ### `dynamic_thumbnails`
 
 Whether to generate new thumbnails on the fly to precisely match
diff --git a/synapse/config/repository.py b/synapse/config/repository.py
index ecb3edbe3a..655f06505b 100644
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@@ -137,6 +137,10 @@ class ContentRepositoryConfig(Config):
         self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M"))
         self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M"))
 
+        self.prevent_media_downloads_from = config.get(
+            "prevent_media_downloads_from", []
+        )
+
         self.media_store_path = self.ensure_directory(
             config.get("media_store_path", "media_store")
         )
diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py
index b81e3c2b0c..e81c987b10 100644
--- a/synapse/media/media_repository.py
+++ b/synapse/media/media_repository.py
@@ -93,6 +93,7 @@ class MediaRepository:
         self.federation_domain_whitelist = (
             hs.config.federation.federation_domain_whitelist
         )
+        self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
 
         # List of StorageProviders where we should search for media and
         # potentially upload to.
@@ -276,6 +277,14 @@ class MediaRepository:
         ):
             raise FederationDeniedError(server_name)
 
+        # Don't let users download media from domains listed in the config, even
+        # if we might have the media to serve. This is Trust & Safety tooling to
+        # block some servers' media from being accessible to local users.
+        # See `prevent_media_downloads_from` config docs for more info.
+        if server_name in self.prevent_media_downloads_from:
+            respond_404(request)
+            return
+
         self.mark_recently_accessed(server_name, media_id)
 
         # We linearize here to ensure that we don't try and download remote
diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py
index a6396fb05a..661e604b85 100644
--- a/synapse/rest/media/thumbnail_resource.py
+++ b/synapse/rest/media/thumbnail_resource.py
@@ -60,6 +60,7 @@ class ThumbnailResource(DirectServeJsonResource):
         self.media_storage = media_storage
         self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
         self._is_mine_server_name = hs.is_mine_server_name
+        self.prevent_media_downloads_from = hs.config.media.prevent_media_downloads_from
 
     async def _async_render_GET(self, request: SynapseRequest) -> None:
         set_cors_headers(request)
@@ -82,6 +83,14 @@ class ThumbnailResource(DirectServeJsonResource):
                 )
             self.media_repo.mark_recently_accessed(None, media_id)
         else:
+            # Don't let users download media from configured domains, even if it
+            # is already downloaded. This is Trust & Safety tooling to make some
+            # media inaccessible to local users.
+            # See `prevent_media_downloads_from` config docs for more info.
+            if server_name in self.prevent_media_downloads_from:
+                respond_404(request)
+                return
+
             if self.dynamic_thumbnails:
                 await self._select_or_generate_remote_thumbnail(
                     request, server_name, media_id, width, height, method, m_type
diff --git a/tests/rest/media/test_domain_blocking.py b/tests/rest/media/test_domain_blocking.py
new file mode 100644
index 0000000000..9beeeab843
--- /dev/null
+++ b/tests/rest/media/test_domain_blocking.py
@@ -0,0 +1,139 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict
+
+from twisted.test.proto_helpers import MemoryReactor
+from twisted.web.resource import Resource
+
+from synapse.media._base import FileInfo
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.test_utils import SMALL_PNG
+from tests.unittest import override_config
+
+
+class MediaDomainBlockingTests(unittest.HomeserverTestCase):
+    remote_media_id = "doesnotmatter"
+    remote_server_name = "evil.com"
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        self.store = hs.get_datastores().main
+
+        # Inject a piece of media. We'll use this to ensure we're returning a sane
+        # response when we're not supposed to block it, distinguishing a media block
+        # from a regular 404.
+        file_id = "abcdefg12345"
+        file_info = FileInfo(server_name=self.remote_server_name, file_id=file_id)
+        with hs.get_media_repository().media_storage.store_into_file(file_info) as (
+            f,
+            fname,
+            finish,
+        ):
+            f.write(SMALL_PNG)
+            self.get_success(finish())
+
+        self.get_success(
+            self.store.store_cached_remote_media(
+                origin=self.remote_server_name,
+                media_id=self.remote_media_id,
+                media_type="image/png",
+                media_length=1,
+                time_now_ms=clock.time_msec(),
+                upload_name="test.png",
+                filesystem_id=file_id,
+            )
+        )
+
+    def create_resource_dict(self) -> Dict[str, Resource]:
+        # We need to manually set the resource tree to include media, the
+        # default only does `/_matrix/client` APIs.
+        return {"/_matrix/media": self.hs.get_media_repository_resource()}
+
+    @override_config(
+        {
+            # Disable downloads from the domain we'll be trying to download from.
+            # Should result in a 404.
+            "prevent_media_downloads_from": ["evil.com"]
+        }
+    )
+    def test_cannot_download_blocked_media(self) -> None:
+        """
+        Tests to ensure that remote media which is blocked cannot be downloaded.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 404)
+
+    @override_config(
+        {
+            # Disable downloads from a domain we won't be requesting downloads from.
+            # This proves we haven't broken anything.
+            "prevent_media_downloads_from": ["not-listed.com"]
+        }
+    )
+    def test_remote_media_normally_unblocked(self) -> None:
+        """
+        Tests to ensure that remote media is normally able to be downloaded
+        when no domain block is in place.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 200)
+
+    @override_config(
+        {
+            # Disable downloads from the domain we'll be trying to download from.
+            # Should result in a 404.
+            "prevent_media_downloads_from": ["evil.com"],
+            "dynamic_thumbnails": True,
+        }
+    )
+    def test_cannot_download_blocked_media_thumbnail(self) -> None:
+        """
+        Same test as test_cannot_download_blocked_media but for thumbnails.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
+            shorthand=False,
+            content={"width": 100, "height": 100},
+        )
+        self.assertEqual(response.code, 404)
+
+    @override_config(
+        {
+            # Disable downloads from a domain we won't be requesting downloads from.
+            # This proves we haven't broken anything.
+            "prevent_media_downloads_from": ["not-listed.com"],
+            "dynamic_thumbnails": True,
+        }
+    )
+    def test_remote_media_thumbnail_normally_unblocked(self) -> None:
+        """
+        Same test as test_remote_media_normally_unblocked but for thumbnails.
+        """
+        response = self.make_request(
+            "GET",
+            f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
+            shorthand=False,
+        )
+        self.assertEqual(response.code, 200)