summary refs log tree commit diff
path: root/tests/test_utils/html_parsers.py
diff options
context:
space:
mode:
authorJason Robinson <jasonr@matrix.org>2021-01-23 21:41:35 +0200
committerJason Robinson <jasonr@matrix.org>2021-01-23 21:41:35 +0200
commit8965b6cfec8a1de847efe3d1be4b9babf4622e2e (patch)
tree4551f104ee2ce840689aa5ecffa939938482ffd5 /tests/test_utils/html_parsers.py
parentAdd depth and received_ts to forward_extremities admin API response (diff)
parentReturn a 404 if no valid thumbnail is found. (#9163) (diff)
downloadsynapse-8965b6cfec8a1de847efe3d1be4b9babf4622e2e.tar.xz
Merge branch 'develop' into jaywink/admin-forward-extremities
Diffstat (limited to 'tests/test_utils/html_parsers.py')
-rw-r--r--tests/test_utils/html_parsers.py53
1 files changed, 53 insertions, 0 deletions
diff --git a/tests/test_utils/html_parsers.py b/tests/test_utils/html_parsers.py
new file mode 100644
index 0000000000..ad563eb3f0
--- /dev/null
+++ b/tests/test_utils/html_parsers.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+# Copyright 2021 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from html.parser import HTMLParser
+from typing import Dict, Iterable, List, Optional, Tuple
+
+
+class TestHtmlParser(HTMLParser):
+    """A generic HTML page parser which extracts useful things from the HTML"""
+
+    def __init__(self):
+        super().__init__()
+
+        # a list of links found in the doc
+        self.links = []  # type: List[str]
+
+        # the values of any hidden <input>s: map from name to value
+        self.hiddens = {}  # type: Dict[str, Optional[str]]
+
+        # the values of any radio buttons: map from name to list of values
+        self.radios = {}  # type: Dict[str, List[Optional[str]]]
+
+    def handle_starttag(
+        self, tag: str, attrs: Iterable[Tuple[str, Optional[str]]]
+    ) -> None:
+        attr_dict = dict(attrs)
+        if tag == "a":
+            href = attr_dict["href"]
+            if href:
+                self.links.append(href)
+        elif tag == "input":
+            input_name = attr_dict.get("name")
+            if attr_dict["type"] == "radio":
+                assert input_name
+                self.radios.setdefault(input_name, []).append(attr_dict["value"])
+            elif attr_dict["type"] == "hidden":
+                assert input_name
+                self.hiddens[input_name] = attr_dict["value"]
+
+    def error(_, message):
+        raise AssertionError(message)