summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/15092.bugfix1
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py33
-rw-r--r--tests/rest/media/v1/test_url_preview.py44
3 files changed, 65 insertions, 13 deletions
diff --git a/changelog.d/15092.bugfix b/changelog.d/15092.bugfix
new file mode 100644
index 0000000000..67509c5c69
--- /dev/null
+++ b/changelog.d/15092.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where a URL preview would break if the discovered oEmbed failed to download.
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index a8f6fd6b35..4a594ab9d8 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -163,6 +163,10 @@ class PreviewUrlResource(DirectServeJsonResource):
        7. Stores the result in the database cache.
     4. Returns the result.
 
+    If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
+    image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
+    does not fail. As much information as possible is returned.
+
     The in-memory cache expires after 1 hour.
 
     Expired entries in the database cache (and their associated media files) are
@@ -364,16 +368,25 @@ class PreviewUrlResource(DirectServeJsonResource):
                 oembed_url = self._oembed.autodiscover_from_html(tree)
                 og_from_oembed: JsonDict = {}
                 if oembed_url:
-                    oembed_info = await self._handle_url(
-                        oembed_url, user, allow_data_urls=True
-                    )
-                    (
-                        og_from_oembed,
-                        author_name,
-                        expiration_ms,
-                    ) = await self._handle_oembed_response(
-                        url, oembed_info, expiration_ms
-                    )
+                    try:
+                        oembed_info = await self._handle_url(
+                            oembed_url, user, allow_data_urls=True
+                        )
+                    except Exception as e:
+                        # Fetching the oEmbed info failed, don't block the entire URL preview.
+                        logger.warning(
+                            "oEmbed fetch failed during URL preview: %s errored with %s",
+                            oembed_url,
+                            e,
+                        )
+                    else:
+                        (
+                            og_from_oembed,
+                            author_name,
+                            expiration_ms,
+                        ) = await self._handle_oembed_response(
+                            url, oembed_info, expiration_ms
+                        )
 
                 # Parse Open Graph information from the HTML in case the oEmbed
                 # response failed or is incomplete.
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 6fcf60ce19..2acfccec61 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -657,7 +657,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         """If the preview image doesn't exist, ensure some data is returned."""
         self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
 
-        end_content = (
+        result = (
             b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
         )
 
@@ -678,8 +678,8 @@ class URLPreviewTests(unittest.HomeserverTestCase):
                 b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
                 b'Content-Type: text/html; charset="utf8"\r\n\r\n'
             )
-            % (len(end_content),)
-            + end_content
+            % (len(result),)
+            + result
         )
 
         self.pump()
@@ -688,6 +688,44 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         # The image should not be in the result.
         self.assertNotIn("og:image", channel.json_body)
 
+    def test_oembed_failure(self) -> None:
+        """If the autodiscovered oEmbed URL fails, ensure some data is returned."""
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        result = b"""
+        <title>oEmbed Autodiscovery Fail</title>
+        <link rel="alternate" type="application/json+oembed"
+            href="http://example.com/oembed?url=http%3A%2F%2Fmatrix.org&format=json"
+            title="matrixdotorg" />
+        """
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        # The image should not be in the result.
+        self.assertEqual(channel.json_body["og:title"], "oEmbed Autodiscovery Fail")
+
     def test_data_url(self) -> None:
         """
         Requesting to preview a data URL is not supported.