diff options
author | Patrick Cloke <clokep@users.noreply.github.com> | 2021-12-13 12:55:07 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-13 17:55:07 +0000 |
commit | eb39da6782b57c939450839097f32a14cba3ebfc (patch) | |
tree | 8dbd3921d86cb87b7d97ebe7f0b61e374b5868ca /tests | |
parent | Type hint the constructors of the data store classes (#11555) (diff) | |
download | synapse-eb39da6782b57c939450839097f32a14cba3ebfc.tar.xz |
Move HTML parsing to a separate file for URL previews. (#11566)
* Splits the logic for parsing HTML from the resource handling code. * Fix a circular import in the oEmbed code (which uses the HTML parsing code). * Renames some of the HTML parsing methods to: * Make it clear which methods are "internal" to the module. * Clarify what the methods do.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/rest/media/v1/test_url_preview.py | 1 | ||||
-rw-r--r-- | tests/test_preview.py | 46 |
2 files changed, 24 insertions, 23 deletions
diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 8698135a76..16e904f15b 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -1,4 +1,5 @@ # Copyright 2018 New Vector Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_preview.py b/tests/test_preview.py index 40b89fb2ef..46e02f483f 100644 --- a/tests/test_preview.py +++ b/tests/test_preview.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.rest.media.v1.preview_url_resource import ( - _calc_og, +from synapse.rest.media.v1.preview_html import ( + _get_html_media_encodings, decode_body, - get_html_media_encodings, + parse_html_to_open_graph, summarize_paragraphs, ) @@ -160,7 +160,7 @@ class CalcOgTestCase(unittest.TestCase): """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -176,7 +176,7 @@ class CalcOgTestCase(unittest.TestCase): """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -195,7 +195,7 @@ class CalcOgTestCase(unittest.TestCase): """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual( og, @@ -217,7 +217,7 @@ class CalcOgTestCase(unittest.TestCase): """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -231,7 +231,7 @@ class CalcOgTestCase(unittest.TestCase): """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": None, "og:description": "Some text."}) @@ -246,7 +246,7 @@ class CalcOgTestCase(unittest.TestCase): """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."}) @@ -261,7 +261,7 @@ class CalcOgTestCase(unittest.TestCase): """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": None, "og:description": "Some text."}) @@ -289,7 +289,7 @@ class CalcOgTestCase(unittest.TestCase): <head><title>Foo</title></head><body>Some text.</body></html> """.strip() tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) def test_invalid_encoding(self): @@ -303,7 +303,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ tree = decode_body(html, "http://example.com/test.html", "invalid-encoding") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) def test_invalid_encoding2(self): @@ -318,7 +318,7 @@ class CalcOgTestCase(unittest.TestCase): </html> """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."}) def test_windows_1252(self): @@ -332,14 +332,14 @@ class CalcOgTestCase(unittest.TestCase): </html> """ tree = decode_body(html, "http://example.com/test.html") - og = _calc_og(tree, "http://example.com/test.html") + og = parse_html_to_open_graph(tree, "http://example.com/test.html") self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."}) class MediaEncodingTestCase(unittest.TestCase): def test_meta_charset(self): """A character encoding is found via the meta tag.""" - encodings = get_html_media_encodings( + encodings = _get_html_media_encodings( b""" <html> <head><meta charset="ascii"> @@ -351,7 +351,7 @@ class MediaEncodingTestCase(unittest.TestCase): self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) # A less well-formed version. - encodings = get_html_media_encodings( + encodings = _get_html_media_encodings( b""" <html> <head>< meta charset = ascii> @@ -364,7 +364,7 @@ class MediaEncodingTestCase(unittest.TestCase): def test_meta_charset_underscores(self): """A character encoding contains underscore.""" - encodings = get_html_media_encodings( + encodings = _get_html_media_encodings( b""" <html> <head><meta charset="Shift_JIS"> @@ -377,7 +377,7 @@ class MediaEncodingTestCase(unittest.TestCase): def test_xml_encoding(self): """A character encoding is found via the meta tag.""" - encodings = get_html_media_encodings( + encodings = _get_html_media_encodings( b""" <?xml version="1.0" encoding="ascii"?> <html> @@ -389,7 +389,7 @@ class MediaEncodingTestCase(unittest.TestCase): def test_meta_xml_encoding(self): """Meta tags take precedence over XML encoding.""" - encodings = get_html_media_encodings( + encodings = _get_html_media_encodings( b""" <?xml version="1.0" encoding="ascii"?> <html> @@ -413,17 +413,17 @@ class MediaEncodingTestCase(unittest.TestCase): 'text/html; charset=ascii";', ) for header in headers: - encodings = get_html_media_encodings(b"", header) + encodings = _get_html_media_encodings(b"", header) self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) def test_fallback(self): """A character encoding cannot be found in the body or header.""" - encodings = get_html_media_encodings(b"", "text/html") + encodings = _get_html_media_encodings(b"", "text/html") self.assertEqual(list(encodings), ["utf-8", "cp1252"]) def test_duplicates(self): """Ensure each encoding is only attempted once.""" - encodings = get_html_media_encodings( + encodings = _get_html_media_encodings( b""" <?xml version="1.0" encoding="utf8"?> <html> @@ -437,7 +437,7 @@ class MediaEncodingTestCase(unittest.TestCase): def test_unknown_invalid(self): """A character encoding should be ignored if it is unknown or invalid.""" - encodings = get_html_media_encodings( + encodings = _get_html_media_encodings( b""" <html> <head><meta charset="invalid"> |