summary refs log tree commit diff
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2023-02-23 16:08:53 -0500
committerGitHub <noreply@github.com>2023-02-23 16:08:53 -0500
commit682151a464f688768d5bd8308e16bd4024ad2e57 (patch)
treeec5e161c14b59e268922495233e912108160c7ef
parentStop parsing the unspecced type parameter on thumbnail requests. (#15137) (diff)
downloadsynapse-682151a464f688768d5bd8308e16bd4024ad2e57.tar.xz
Do not fail completely if oEmbed autodiscovery fails. (#15092)
Previously if an autodiscovered oEmbed request failed (e.g. the
oEmbed endpoint is down or does not exist) then the entire URL
preview would fail. Instead we now return everything we can, even
if this additional request fails.
-rw-r--r--changelog.d/15092.bugfix1
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py33
-rw-r--r--tests/rest/media/v1/test_url_preview.py44
3 files changed, 65 insertions, 13 deletions
diff --git a/changelog.d/15092.bugfix b/changelog.d/15092.bugfix
new file mode 100644
index 0000000000..67509c5c69
--- /dev/null
+++ b/changelog.d/15092.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where a URL preview would break if the discovered oEmbed failed to download.
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index a8f6fd6b35..4a594ab9d8 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -163,6 +163,10 @@ class PreviewUrlResource(DirectServeJsonResource):
        7. Stores the result in the database cache.
     4. Returns the result.
 
+    If any additional requests (e.g. from oEmbed autodiscovery, step 5.3 or
+    image thumbnailing, step 5.4 or 6.4) fails then the URL preview as a whole
+    does not fail. As much information as possible is returned.
+
     The in-memory cache expires after 1 hour.
 
     Expired entries in the database cache (and their associated media files) are
@@ -364,16 +368,25 @@ class PreviewUrlResource(DirectServeJsonResource):
                 oembed_url = self._oembed.autodiscover_from_html(tree)
                 og_from_oembed: JsonDict = {}
                 if oembed_url:
-                    oembed_info = await self._handle_url(
-                        oembed_url, user, allow_data_urls=True
-                    )
-                    (
-                        og_from_oembed,
-                        author_name,
-                        expiration_ms,
-                    ) = await self._handle_oembed_response(
-                        url, oembed_info, expiration_ms
-                    )
+                    try:
+                        oembed_info = await self._handle_url(
+                            oembed_url, user, allow_data_urls=True
+                        )
+                    except Exception as e:
+                        # Fetching the oEmbed info failed, don't block the entire URL preview.
+                        logger.warning(
+                            "oEmbed fetch failed during URL preview: %s errored with %s",
+                            oembed_url,
+                            e,
+                        )
+                    else:
+                        (
+                            og_from_oembed,
+                            author_name,
+                            expiration_ms,
+                        ) = await self._handle_oembed_response(
+                            url, oembed_info, expiration_ms
+                        )
 
                 # Parse Open Graph information from the HTML in case the oEmbed
                 # response failed or is incomplete.
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 6fcf60ce19..2acfccec61 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -657,7 +657,7 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         """If the preview image doesn't exist, ensure some data is returned."""
         self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
 
-        end_content = (
+        result = (
             b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
         )
 
@@ -678,8 +678,8 @@ class URLPreviewTests(unittest.HomeserverTestCase):
                 b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
                 b'Content-Type: text/html; charset="utf8"\r\n\r\n'
             )
-            % (len(end_content),)
-            + end_content
+            % (len(result),)
+            + result
         )
 
         self.pump()
@@ -688,6 +688,44 @@ class URLPreviewTests(unittest.HomeserverTestCase):
         # The image should not be in the result.
         self.assertNotIn("og:image", channel.json_body)
 
+    def test_oembed_failure(self) -> None:
+        """If the autodiscovered oEmbed URL fails, ensure some data is returned."""
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        result = b"""
+        <title>oEmbed Autodiscovery Fail</title>
+        <link rel="alternate" type="application/json+oembed"
+            href="http://example.com/oembed?url=http%3A%2F%2Fmatrix.org&format=json"
+            title="matrixdotorg" />
+        """
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        # The image should not be in the result.
+        self.assertEqual(channel.json_body["og:title"], "oEmbed Autodiscovery Fail")
+
     def test_data_url(self) -> None:
         """
         Requesting to preview a data URL is not supported.