summary refs log tree commit diff
path: root/tests/media/test_url_previewer.py
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2023-05-16 16:25:01 -0400
committerGitHub <noreply@github.com>2023-05-16 16:25:01 -0400
commit4ee82c0576baed6358e3818e8c22e01bde6afd02 (patch)
treea7d02126b479f4a0d3ee5eece78dce52f06d167f /tests/media/test_url_previewer.py
parentUpdate code to refer to "workers". (#15606) (diff)
downloadsynapse-4ee82c0576baed6358e3818e8c22e01bde6afd02.tar.xz
Apply url_preview_url_blacklist to oEmbed and pre-cached images (#15601)
There are two situations which were previously not properly checked:

1. If the requested URL was replaced with an oEmbed URL, then the
   oEmbed URL was not checked against url_preview_url_blacklist.
2. Follow-up URLs (either via autodiscovery of oEmbed or to pre-cache
   images) were not checked against url_preview_url_blacklist.
Diffstat (limited to 'tests/media/test_url_previewer.py')
-rw-r--r--tests/media/test_url_previewer.py113
1 files changed, 113 insertions, 0 deletions
diff --git a/tests/media/test_url_previewer.py b/tests/media/test_url_previewer.py
new file mode 100644
index 0000000000..3c4c7d6765
--- /dev/null
+++ b/tests/media/test_url_previewer.py
@@ -0,0 +1,113 @@
+# Copyright 2023 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+from twisted.test.proto_helpers import MemoryReactor
+
+from synapse.server import HomeServer
+from synapse.util import Clock
+
+from tests import unittest
+from tests.unittest import override_config
+
+try:
+    import lxml
+except ImportError:
+    lxml = None
+
+
+class URLPreviewTests(unittest.HomeserverTestCase):
+    if not lxml:
+        skip = "url preview feature requires lxml"
+
+    def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
+        config = self.default_config()
+        config["url_preview_enabled"] = True
+        config["max_spider_size"] = 9999999
+        config["url_preview_ip_range_blacklist"] = (
+            "192.168.1.1",
+            "1.0.0.0/8",
+            "3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
+            "2001:800::/21",
+        )
+
+        self.storage_path = self.mktemp()
+        self.media_store_path = self.mktemp()
+        os.mkdir(self.storage_path)
+        os.mkdir(self.media_store_path)
+        config["media_store_path"] = self.media_store_path
+
+        provider_config = {
+            "module": "synapse.media.storage_provider.FileStorageProviderBackend",
+            "store_local": True,
+            "store_synchronous": False,
+            "store_remote": True,
+            "config": {"directory": self.storage_path},
+        }
+
+        config["media_storage_providers"] = [provider_config]
+
+        return self.setup_test_homeserver(config=config)
+
+    def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
+        media_repo_resource = hs.get_media_repository_resource()
+        preview_url = media_repo_resource.children[b"preview_url"]
+        self.url_previewer = preview_url._url_previewer
+
+    def test_all_urls_allowed(self) -> None:
+        self.assertFalse(self.url_previewer._is_url_blocked("http://matrix.org"))
+        self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org"))
+        self.assertFalse(self.url_previewer._is_url_blocked("http://localhost:8000"))
+        self.assertFalse(
+            self.url_previewer._is_url_blocked("http://user:pass@matrix.org")
+        )
+
+    @override_config(
+        {
+            "url_preview_url_blacklist": [
+                {"username": "user"},
+                {"scheme": "http", "netloc": "matrix.org"},
+            ]
+        }
+    )
+    def test_blocked_url(self) -> None:
+        # Blocked via scheme and URL.
+        self.assertTrue(self.url_previewer._is_url_blocked("http://matrix.org"))
+        # Not blocked because all components must match.
+        self.assertFalse(self.url_previewer._is_url_blocked("https://matrix.org"))
+
+        # Blocked due to the user.
+        self.assertTrue(
+            self.url_previewer._is_url_blocked("http://user:pass@example.com")
+        )
+        self.assertTrue(self.url_previewer._is_url_blocked("http://user@example.com"))
+
+    @override_config({"url_preview_url_blacklist": [{"netloc": "*.example.com"}]})
+    def test_glob_blocked_url(self) -> None:
+        # All subdomains are blocked.
+        self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com"))
+        self.assertTrue(self.url_previewer._is_url_blocked("http://.example.com"))
+
+        # The TLD is not blocked.
+        self.assertFalse(self.url_previewer._is_url_blocked("https://example.com"))
+
+    @override_config({"url_preview_url_blacklist": [{"netloc": "^.+\\.example\\.com"}]})
+    def test_regex_blocked_urL(self) -> None:
+        # All subdomains are blocked.
+        self.assertTrue(self.url_previewer._is_url_blocked("http://foo.example.com"))
+        # Requires a non-empty subdomain.
+        self.assertFalse(self.url_previewer._is_url_blocked("http://.example.com"))
+
+        # The TLD is not blocked.
+        self.assertFalse(self.url_previewer._is_url_blocked("https://example.com"))