From df758e155dac18602c34f63df56907de081a7220 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Thu, 15 Nov 2018 11:05:08 -0600 Subject: Use tags to discover the per-page encoding of html previews (#4183) --- tests/rest/media/v1/test_url_preview.py | 77 +++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'tests') diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 29579cf091..86c813200a 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -162,3 +162,80 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.assertEqual( channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} ) + + def test_non_ascii_preview_httpequiv(self): + + request, channel = self.make_request( + "GET", "url_preview?url=matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # We've made one fetch + self.assertEqual(len(self.fetches), 1) + + end_content = ( + b'' + b'' + b'' + b'' + b'' + ) + + self.fetches[0][0].callback( + ( + end_content, + ( + len(end_content), + { + b"Content-Length": [b"%d" % (len(end_content))], + # This charset=utf-8 should be ignored, because the + # document has a meta tag overriding it. + b"Content-Type": [b'text/html; charset="utf8"'], + }, + "https://example.com", + 200, + ), + ) + ) + + self.pump() + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["og:title"], u"\u0434\u043a\u0430") + + def test_non_ascii_preview_content_type(self): + + request, channel = self.make_request( + "GET", "url_preview?url=matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # We've made one fetch + self.assertEqual(len(self.fetches), 1) + + end_content = ( + b'' + b'' + b'' + b'' + ) + + self.fetches[0][0].callback( + ( + end_content, + ( + len(end_content), + { + b"Content-Length": [b"%d" % (len(end_content))], + b"Content-Type": [b'text/html; charset="windows-1251"'], + }, + "https://example.com", + 200, + ), + ) + ) + + self.pump() + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["og:title"], u"\u0434\u043a\u0430") -- cgit 1.4.1