summary refs log tree commit diff
path: root/tests
diff options
context:
space:
mode:
authorreivilibre <oliverw@matrix.org>2022-06-28 14:29:08 +0100
committerGitHub <noreply@github.com>2022-06-28 14:29:08 +0100
commitfa1308061802ac7b7d20e954ba7372c5ac292333 (patch)
treed58c5ead13e8c45418c375fbbf0e31a7d818e866 /tests
parentFix incorrect link in changelog. (diff)
downloadsynapse-fa1308061802ac7b7d20e954ba7372c5ac292333.tar.xz
Merge pull request from GHSA-22p3-qrh9-cx32
* Make _iterate_over_text easier to read by using simple data structures

* Prefer a set of tags to ignore

In my tests, it's 4x faster to check for containment in a set of this size

* Add a stack size limit to _iterate_over_text

* Continue accepting the case where there is no body element

* Use an early return instead for None

Co-authored-by: Richard van der Hoff <richard@matrix.org>
Diffstat (limited to 'tests')
-rw-r--r--tests/rest/media/v1/test_html_preview.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/tests/rest/media/v1/test_html_preview.py b/tests/rest/media/v1/test_html_preview.py
index ea9e5889bf..61357622bd 100644
--- a/tests/rest/media/v1/test_html_preview.py
+++ b/tests/rest/media/v1/test_html_preview.py
@@ -370,6 +370,23 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase):
         og = parse_html_to_open_graph(tree)
         self.assertEqual(og, {"og:title": "รณ", "og:description": "Some text."})
 
+    def test_nested_nodes(self) -> None:
+        """A body with some nested nodes. Tests that we iterate over children
+        in the right order (and don't reverse the order of the text)."""
+        html = b"""
+        <a href="somewhere">Welcome <b>the bold <u>and underlined text <svg>
+        with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
+        """
+        tree = decode_body(html, "http://example.com/test.html")
+        og = parse_html_to_open_graph(tree)
+        self.assertEqual(
+            og,
+            {
+                "og:title": None,
+                "og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text",
+            },
+        )
+
 
 class MediaEncodingTestCase(unittest.TestCase):
     def test_meta_charset(self) -> None: