summary refs log tree commit diff
path: root/tests
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2021-10-08 14:14:42 -0400
committerGitHub <noreply@github.com>2021-10-08 14:14:42 -0400
commit1b112840d2c6dafa131eba4f0285409bb7345661 (patch)
treecead0015dbaceb5a188b3b95e8a6cfe2d82960dc /tests
parentRevert accidental push to develop. (diff)
downloadsynapse-1b112840d2c6dafa131eba4f0285409bb7345661.tar.xz
Autodiscover oEmbed endpoint from returned HTML (#10822)
Searches the returned HTML for an oEmbed endpoint using the
autodiscovery mechanism (`<link rel=...>`), and will request it
to generate the preview.
Diffstat (limited to 'tests')
-rw-r--r--tests/rest/media/v1/test_url_preview.py100
-rw-r--r--tests/test_preview.py40
2 files changed, 123 insertions, 17 deletions
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index ce43de780b..8698135a76 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -725,9 +725,107 @@ class URLPreviewTests(unittest.HomeserverTestCase):
             },
         )
 
+    def test_oembed_autodiscovery(self):
+        """
+        Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL.
+        1. Request a preview of a URL which is not known to the oEmbed code.
+        2. It returns HTML including a link to an oEmbed preview.
+        3. The oEmbed preview is requested and returns a URL for an image.
+        4. The image is requested for thumbnailing.
+        """
+        # This is a little cheesy in that we use the www subdomain (which isn't the
+        # list of oEmbed patterns) to get "raw" HTML response.
+        self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+        self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+        self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+        result = b"""
+        <link rel="alternate" type="application/json+oembed"
+            href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json"
+            title="matrixdotorg" />
+        """
+
+        channel = self.make_request(
+            "GET",
+            "preview_url?url=http://www.twitter.com/matrixdotorg/status/12345",
+            shorthand=False,
+            await_result=False,
+        )
+        self.pump()
+
+        client = self.reactor.tcpClients[0][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+            )
+            % (len(result),)
+            + result
+        )
+
+        self.pump()
+
+        # The oEmbed response.
+        result2 = {
+            "version": "1.0",
+            "type": "photo",
+            "url": "http://cdn.twitter.com/matrixdotorg",
+        }
+        oembed_content = json.dumps(result2).encode("utf-8")
+
+        # Ensure a second request is made to the oEmbed URL.
+        client = self.reactor.tcpClients[1][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+            )
+            % (len(oembed_content),)
+            + oembed_content
+        )
+
+        self.pump()
+
+        # Ensure the URL is what was requested.
+        self.assertIn(b"/oembed?", server.data)
+
+        # Ensure a third request is made to the photo URL.
+        client = self.reactor.tcpClients[2][2].buildProtocol(None)
+        server = AccumulatingProtocol()
+        server.makeConnection(FakeTransport(client, self.reactor))
+        client.makeConnection(FakeTransport(server, self.reactor))
+        client.dataReceived(
+            (
+                b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+                b"Content-Type: image/png\r\n\r\n"
+            )
+            % (len(SMALL_PNG),)
+            + SMALL_PNG
+        )
+
+        self.pump()
+
+        # Ensure the URL is what was requested.
+        self.assertIn(b"/matrixdotorg", server.data)
+
+        self.assertEqual(channel.code, 200)
+        body = channel.json_body
+        self.assertEqual(
+            body["og:url"], "http://www.twitter.com/matrixdotorg/status/12345"
+        )
+        self.assertTrue(body["og:image"].startswith("mxc://"))
+        self.assertEqual(body["og:image:height"], 1)
+        self.assertEqual(body["og:image:width"], 1)
+        self.assertEqual(body["og:image:type"], "image/png")
+
     def _download_image(self):
         """Downloads an image into the URL cache.
-
         Returns:
             A (host, media_id) tuple representing the MXC URI of the image.
         """
diff --git a/tests/test_preview.py b/tests/test_preview.py
index 48e792b55b..09e017b4d9 100644
--- a/tests/test_preview.py
+++ b/tests/test_preview.py
@@ -13,7 +13,8 @@
 # limitations under the License.
 
 from synapse.rest.media.v1.preview_url_resource import (
-    decode_and_calc_og,
+    _calc_og,
+    decode_body,
     get_html_media_encoding,
     summarize_paragraphs,
 )
@@ -158,7 +159,8 @@ class CalcOgTestCase(unittest.TestCase):
         </html>
         """
 
-        og = decode_and_calc_og(html, "http://example.com/test.html")
+        tree = decode_body(html)
+        og = _calc_og(tree, "http://example.com/test.html")
 
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
 
@@ -173,7 +175,8 @@ class CalcOgTestCase(unittest.TestCase):
         </html>
         """
 
-        og = decode_and_calc_og(html, "http://example.com/test.html")
+        tree = decode_body(html)
+        og = _calc_og(tree, "http://example.com/test.html")
 
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
 
@@ -191,7 +194,8 @@ class CalcOgTestCase(unittest.TestCase):
         </html>
         """
 
-        og = decode_and_calc_og(html, "http://example.com/test.html")
+        tree = decode_body(html)
+        og = _calc_og(tree, "http://example.com/test.html")
 
         self.assertEqual(
             og,
@@ -212,7 +216,8 @@ class CalcOgTestCase(unittest.TestCase):
         </html>
         """
 
-        og = decode_and_calc_og(html, "http://example.com/test.html")
+        tree = decode_body(html)
+        og = _calc_og(tree, "http://example.com/test.html")
 
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
 
@@ -225,7 +230,8 @@ class CalcOgTestCase(unittest.TestCase):
         </html>
         """
 
-        og = decode_and_calc_og(html, "http://example.com/test.html")
+        tree = decode_body(html)
+        og = _calc_og(tree, "http://example.com/test.html")
 
         self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
 
@@ -239,7 +245,8 @@ class CalcOgTestCase(unittest.TestCase):
         </html>
         """
 
-        og = decode_and_calc_og(html, "http://example.com/test.html")
+        tree = decode_body(html)
+        og = _calc_og(tree, "http://example.com/test.html")
 
         self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
 
@@ -253,21 +260,22 @@ class CalcOgTestCase(unittest.TestCase):
         </html>
         """
 
-        og = decode_and_calc_og(html, "http://example.com/test.html")
+        tree = decode_body(html)
+        og = _calc_og(tree, "http://example.com/test.html")
 
         self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
 
     def test_empty(self):
         """Test a body with no data in it."""
         html = b""
-        og = decode_and_calc_og(html, "http://example.com/test.html")
-        self.assertEqual(og, {})
+        tree = decode_body(html)
+        self.assertIsNone(tree)
 
     def test_no_tree(self):
         """A valid body with no tree in it."""
         html = b"\x00"
-        og = decode_and_calc_og(html, "http://example.com/test.html")
-        self.assertEqual(og, {})
+        tree = decode_body(html)
+        self.assertIsNone(tree)
 
     def test_invalid_encoding(self):
         """An invalid character encoding should be ignored and treated as UTF-8, if possible."""
@@ -279,9 +287,8 @@ class CalcOgTestCase(unittest.TestCase):
         </body>
         </html>
         """
-        og = decode_and_calc_og(
-            html, "http://example.com/test.html", "invalid-encoding"
-        )
+        tree = decode_body(html, "invalid-encoding")
+        og = _calc_og(tree, "http://example.com/test.html")
         self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
 
     def test_invalid_encoding2(self):
@@ -295,7 +302,8 @@ class CalcOgTestCase(unittest.TestCase):
         </body>
         </html>
         """
-        og = decode_and_calc_og(html, "http://example.com/test.html")
+        tree = decode_body(html)
+        og = _calc_og(tree, "http://example.com/test.html")
         self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})