diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
index ce43de780b..8698135a76 100644
--- a/tests/rest/media/v1/test_url_preview.py
+++ b/tests/rest/media/v1/test_url_preview.py
@@ -725,9 +725,107 @@ class URLPreviewTests(unittest.HomeserverTestCase):
},
)
+ def test_oembed_autodiscovery(self):
+ """
+ Autodiscovery works by finding the link in the HTML response and then requesting an oEmbed URL.
+ 1. Request a preview of a URL which is not known to the oEmbed code.
+ 2. It returns HTML including a link to an oEmbed preview.
+ 3. The oEmbed preview is requested and returns a URL for an image.
+ 4. The image is requested for thumbnailing.
+ """
+ # This is a little cheesy in that we use the www subdomain (which isn't the
+ # list of oEmbed patterns) to get "raw" HTML response.
+ self.lookups["www.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+ self.lookups["publish.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+ self.lookups["cdn.twitter.com"] = [(IPv4Address, "10.1.2.3")]
+
+ result = b"""
+ <link rel="alternate" type="application/json+oembed"
+ href="http://publish.twitter.com/oembed?url=http%3A%2F%2Fcdn.twitter.com%2Fmatrixdotorg%2Fstatus%2F12345&format=json"
+ title="matrixdotorg" />
+ """
+
+ channel = self.make_request(
+ "GET",
+ "preview_url?url=http://www.twitter.com/matrixdotorg/status/12345",
+ shorthand=False,
+ await_result=False,
+ )
+ self.pump()
+
+ client = self.reactor.tcpClients[0][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: text/html; charset="utf8"\r\n\r\n'
+ )
+ % (len(result),)
+ + result
+ )
+
+ self.pump()
+
+ # The oEmbed response.
+ result2 = {
+ "version": "1.0",
+ "type": "photo",
+ "url": "http://cdn.twitter.com/matrixdotorg",
+ }
+ oembed_content = json.dumps(result2).encode("utf-8")
+
+ # Ensure a second request is made to the oEmbed URL.
+ client = self.reactor.tcpClients[1][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b'Content-Type: application/json; charset="utf8"\r\n\r\n'
+ )
+ % (len(oembed_content),)
+ + oembed_content
+ )
+
+ self.pump()
+
+ # Ensure the URL is what was requested.
+ self.assertIn(b"/oembed?", server.data)
+
+ # Ensure a third request is made to the photo URL.
+ client = self.reactor.tcpClients[2][2].buildProtocol(None)
+ server = AccumulatingProtocol()
+ server.makeConnection(FakeTransport(client, self.reactor))
+ client.makeConnection(FakeTransport(server, self.reactor))
+ client.dataReceived(
+ (
+ b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
+ b"Content-Type: image/png\r\n\r\n"
+ )
+ % (len(SMALL_PNG),)
+ + SMALL_PNG
+ )
+
+ self.pump()
+
+ # Ensure the URL is what was requested.
+ self.assertIn(b"/matrixdotorg", server.data)
+
+ self.assertEqual(channel.code, 200)
+ body = channel.json_body
+ self.assertEqual(
+ body["og:url"], "http://www.twitter.com/matrixdotorg/status/12345"
+ )
+ self.assertTrue(body["og:image"].startswith("mxc://"))
+ self.assertEqual(body["og:image:height"], 1)
+ self.assertEqual(body["og:image:width"], 1)
+ self.assertEqual(body["og:image:type"], "image/png")
+
def _download_image(self):
"""Downloads an image into the URL cache.
-
Returns:
A (host, media_id) tuple representing the MXC URI of the image.
"""
diff --git a/tests/test_preview.py b/tests/test_preview.py
index 48e792b55b..09e017b4d9 100644
--- a/tests/test_preview.py
+++ b/tests/test_preview.py
@@ -13,7 +13,8 @@
# limitations under the License.
from synapse.rest.media.v1.preview_url_resource import (
- decode_and_calc_og,
+ _calc_og,
+ decode_body,
get_html_media_encoding,
summarize_paragraphs,
)
@@ -158,7 +159,8 @@ class CalcOgTestCase(unittest.TestCase):
</html>
"""
- og = decode_and_calc_og(html, "http://example.com/test.html")
+ tree = decode_body(html)
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
@@ -173,7 +175,8 @@ class CalcOgTestCase(unittest.TestCase):
</html>
"""
- og = decode_and_calc_og(html, "http://example.com/test.html")
+ tree = decode_body(html)
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
@@ -191,7 +194,8 @@ class CalcOgTestCase(unittest.TestCase):
</html>
"""
- og = decode_and_calc_og(html, "http://example.com/test.html")
+ tree = decode_body(html)
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(
og,
@@ -212,7 +216,8 @@ class CalcOgTestCase(unittest.TestCase):
</html>
"""
- og = decode_and_calc_og(html, "http://example.com/test.html")
+ tree = decode_body(html)
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
@@ -225,7 +230,8 @@ class CalcOgTestCase(unittest.TestCase):
</html>
"""
- og = decode_and_calc_og(html, "http://example.com/test.html")
+ tree = decode_body(html)
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
@@ -239,7 +245,8 @@ class CalcOgTestCase(unittest.TestCase):
</html>
"""
- og = decode_and_calc_og(html, "http://example.com/test.html")
+ tree = decode_body(html)
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."})
@@ -253,21 +260,22 @@ class CalcOgTestCase(unittest.TestCase):
</html>
"""
- og = decode_and_calc_og(html, "http://example.com/test.html")
+ tree = decode_body(html)
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(og, {"og:title": None, "og:description": "Some text."})
def test_empty(self):
"""Test a body with no data in it."""
html = b""
- og = decode_and_calc_og(html, "http://example.com/test.html")
- self.assertEqual(og, {})
+ tree = decode_body(html)
+ self.assertIsNone(tree)
def test_no_tree(self):
"""A valid body with no tree in it."""
html = b"\x00"
- og = decode_and_calc_og(html, "http://example.com/test.html")
- self.assertEqual(og, {})
+ tree = decode_body(html)
+ self.assertIsNone(tree)
def test_invalid_encoding(self):
"""An invalid character encoding should be ignored and treated as UTF-8, if possible."""
@@ -279,9 +287,8 @@ class CalcOgTestCase(unittest.TestCase):
</body>
</html>
"""
- og = decode_and_calc_og(
- html, "http://example.com/test.html", "invalid-encoding"
- )
+ tree = decode_body(html, "invalid-encoding")
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
def test_invalid_encoding2(self):
@@ -295,7 +302,8 @@ class CalcOgTestCase(unittest.TestCase):
</body>
</html>
"""
- og = decode_and_calc_og(html, "http://example.com/test.html")
+ tree = decode_body(html)
+ og = _calc_og(tree, "http://example.com/test.html")
self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})
|