summary refs log tree commit diff
path: root/tests/test_preview.py
diff options
context:
space:
mode:
authorPatrick Cloke <clokep@users.noreply.github.com>2021-01-26 07:32:17 -0500
committerGitHub <noreply@github.com>2021-01-26 07:32:17 -0500
commit4937fe3d6be94222b02760866496781f8cc88751 (patch)
tree40dd8def98be0b515054d67776a1452d2cc41a21 /tests/test_preview.py
parentUpdate isort to v5.7.0 (#9222) (diff)
downloadsynapse-4937fe3d6be94222b02760866496781f8cc88751.tar.xz
Try to recover from unknown encodings when previewing media. (#9164)
Treat unknown encodings (according to lxml) as UTF-8
when generating a preview for HTML documents. This
isn't fully accurate, but will hopefully give a reasonable
title and summary.
Diffstat (limited to 'tests/test_preview.py')
-rw-r--r--tests/test_preview.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/tests/test_preview.py b/tests/test_preview.py
index c19facc1cb..0c6cbbd921 100644
--- a/tests/test_preview.py
+++ b/tests/test_preview.py
@@ -261,3 +261,32 @@ class PreviewUrlTestCase(unittest.TestCase):
         html = ""
         og = decode_and_calc_og(html, "http://example.com/test.html")
         self.assertEqual(og, {})
+
+    def test_invalid_encoding(self):
+        """An invalid character encoding should be ignored and treated as UTF-8, if possible."""
+        html = """
+        <html>
+        <head><title>Foo</title></head>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+        og = decode_and_calc_og(
+            html, "http://example.com/test.html", "invalid-encoding"
+        )
+        self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."})
+
+    def test_invalid_encoding2(self):
+        """A body which doesn't match the sent character encoding."""
+        # Note that this contains an invalid UTF-8 sequence in the title.
+        html = b"""
+        <html>
+        <head><title>\xff\xff Foo</title></head>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+        og = decode_and_calc_og(html, "http://example.com/test.html")
+        self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."})