summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/12333.bugfix1
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py9
-rw-r--r--tests/rest/media/v1/test_url_preview.py43
3 files changed, 49 insertions, 4 deletions
diff --git a/changelog.d/12333.bugfix b/changelog.d/12333.bugfix
new file mode 100644
index 0000000000..2c073a77d5
--- /dev/null
+++ b/changelog.d/12333.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug affecting URL previews that would generate a 500 response instead of a 403 if the previewed URL includes a port that isn't allowed by the relevant blacklist.
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index d47af8ead6..50383bdbd1 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -200,12 +200,17 @@ class PreviewUrlResource(DirectServeJsonResource):
                     match = False
                     continue
 
+                # Some attributes might not be parsed as strings by urlsplit (such as the
+                # port, which is parsed as an int). Because we use match functions that
+                # expect strings, we want to make sure that's what we give them.
+                value_str = str(value)
+
                 if pattern.startswith("^"):
-                    if not re.match(pattern, getattr(url_tuple, attrib)):
+                    if not re.match(pattern, value_str):
                         match = False
                         continue
                 else:
-                    if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
+                    if not fnmatch.fnmatch(value_str, pattern):
                         match = False
                         continue
             if match:
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 5148c39874..3b24d0ace6 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -17,7 +17,7 @@ import json
 import os
 import re
 from typing import Any, Dict, Optional, Sequence, Tuple, Type
-from urllib.parse import urlencode
+from urllib.parse import quote, urlencode
 
 from twisted.internet._resolver import HostResolution
 from twisted.internet.address import IPv4Address, IPv6Address
@@ -69,7 +69,6 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             "2001:800::/21",
         )
         config["url_preview_ip_range_whitelist"] = ("1.1.1.1",)
-        config["url_preview_url_blacklist"] = []
         config["url_preview_accept_language"] = [
             "en-UK",
             "en-US;q=0.9",
@@ -1123,3 +1122,43 @@ class URLPreviewTests(unittest.HomeserverTestCase):
                 os.path.exists(path),
                 f"{os.path.relpath(path, self.media_store_path)} was not deleted",
             )
+
+    @unittest.override_config({"url_preview_url_blacklist": [{"port": "*"}]})
+    def test_blacklist_port(self) -> None:
+        """Tests that blacklisting URLs with a port makes previewing such URLs
+        fail with a 403 error and doesn't impact other previews.
+        """
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        bad_url = quote("http://matrix.org:8888/foo")
+        good_url = quote("http://matrix.org/foo")
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=" + bad_url,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+        self.assertEqual(channel.code, 403, channel.result)
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=" + good_url,
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n"
+            % (len(self.end_content),)
+            + self.end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)