summary refs log tree commit diff
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2022-06-06 07:46:04 -0400
committerGitHub <noreply@github.com>2022-06-06 07:46:04 -0400
commit148fe58a247d61ffb76c566ba397285480d93f74 (patch)
treea9aa3adf457d35815930d026396b6837efe367b2
parentImplement MSC3816, consider the root event for thread participation. (#12766) (diff)
downloadsynapse-148fe58a247d61ffb76c566ba397285480d93f74.tar.xz
Do not break URL previews if an image is unreachable. (#12950)
Avoid breaking a URL preview completely if the chosen image 404s
or is unreachable for some other reason (e.g. DNS).
-rw-r--r--changelog.d/12950.bugfix1
-rw-r--r--synapse/rest/media/v1/preview_url_resource.py23
-rw-r--r--tests/rest/media/v1/test_url_preview.py35
3 files changed, 53 insertions, 6 deletions
diff --git a/changelog.d/12950.bugfix b/changelog.d/12950.bugfix
new file mode 100644
index 0000000000..e835d9aa72
--- /dev/null
+++ b/changelog.d/12950.bugfix
@@ -0,0 +1 @@
+Fix a long-standing bug where a URL preview would break if the image failed to download.
diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 2b2db63bf7..54a849eac9 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -586,12 +586,16 @@ class PreviewUrlResource(DirectServeJsonResource):
             og: The Open Graph dictionary. This is modified with image information.
         """
         # If there's no image or it is blank, there's nothing to do.
-        if "og:image" not in og or not og["og:image"]:
+        if "og:image" not in og:
+            return
+
+        # Remove the raw image URL, this will be replaced with an MXC URL, if successful.
+        image_url = og.pop("og:image")
+        if not image_url:
             return
 
         # The image URL from the HTML might be relative to the previewed page,
         # convert it to an URL which can be requested directly.
-        image_url = og["og:image"]
         url_parts = urlparse(image_url)
         if url_parts.scheme != "data":
             image_url = urljoin(media_info.uri, image_url)
@@ -599,7 +603,16 @@ class PreviewUrlResource(DirectServeJsonResource):
         # FIXME: it might be cleaner to use the same flow as the main /preview_url
         # request itself and benefit from the same caching etc.  But for now we
         # just rely on the caching on the master request to speed things up.
-        image_info = await self._handle_url(image_url, user, allow_data_urls=True)
+        try:
+            image_info = await self._handle_url(image_url, user, allow_data_urls=True)
+        except Exception as e:
+            # Pre-caching the image failed, don't block the entire URL preview.
+            logger.warning(
+                "Pre-caching image failed during URL preview: %s errored with %s",
+                image_url,
+                e,
+            )
+            return
 
         if _is_media(image_info.media_type):
             # TODO: make sure we don't choke on white-on-transparent images
@@ -611,13 +624,11 @@ class PreviewUrlResource(DirectServeJsonResource):
                 og["og:image:width"] = dims["width"]
                 og["og:image:height"] = dims["height"]
             else:
-                logger.warning("Couldn't get dims for %s", og["og:image"])
+                logger.warning("Couldn't get dims for %s", image_url)
 
             og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
             og["og:image:type"] = image_info.media_type
             og["matrix:image:size"] = image_info.media_length
-        else:
-            del og["og:image"]
 
     async def _handle_oembed_response(
         self, url: str, media_info: MediaInfo, expiration_ms: int
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index 3b24d0ace6..2c321f8d04 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -656,6 +656,41 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             server.data,
         )
 
+    def test_nonexistent_image(self) -> None:
+        """If the preview image doesn't exist, ensure some data is returned."""
+        self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]
+
+        end_content = (
+            b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
+        )
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://matrix.org",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(end_content),)
+            + end_content
+        )
+
+        self.pump()
+        self.assertEqual(channel.code, 200)
+
+        # The image should not be in the result.
+        self.assertNotIn("og:image", channel.json_body)
+
     def test_data_url(self) -> None:
         """
         Requesting to preview a data URL is not supported.