From fa1308061802ac7b7d20e954ba7372c5ac292333 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 28 Jun 2022 14:29:08 +0100 Subject: Merge pull request from GHSA-22p3-qrh9-cx32 * Make _iterate_over_text easier to read by using simple data structures * Prefer a set of tags to ignore In my tests, it's 4x faster to check for containment in a set of this size * Add a stack size limit to _iterate_over_text * Continue accepting the case where there is no body element * Use an early return instead for None Co-authored-by: Richard van der Hoff --- synapse/rest/media/v1/preview_html.py | 63 ++++++++++++++++++++------------ tests/rest/media/v1/test_html_preview.py | 17 +++++++++ 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py index ed8f21a483..5f334f4634 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/rest/media/v1/preview_html.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import codecs -import itertools import logging import re -from typing import TYPE_CHECKING, Dict, Generator, Iterable, Optional, Set, Union +from typing import TYPE_CHECKING, Dict, Generator, Iterable, List, Optional, Set, Union if TYPE_CHECKING: from lxml import etree @@ -276,7 +275,7 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: from lxml import etree - TAGS_TO_REMOVE = ( + TAGS_TO_REMOVE = { "header", "nav", "aside", @@ -291,31 +290,42 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: "img", "picture", etree.Comment, - ) + } # Split all the text nodes into paragraphs (by splitting on new # lines) text_nodes = ( re.sub(r"\s+", "\n", el).strip() - for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE) + for el in _iterate_over_text(tree.find("body"), TAGS_TO_REMOVE) ) return summarize_paragraphs(text_nodes) def _iterate_over_text( - tree: "etree.Element", *tags_to_ignore: Union[str, "etree.Comment"] + tree: Optional["etree.Element"], + tags_to_ignore: Set[Union[str, "etree.Comment"]], + stack_limit: int = 1024, ) -> Generator[str, None, None]: """Iterate over the tree returning text nodes in a depth first fashion, skipping text nodes inside certain tags. + + Args: + tree: The parent element to iterate. Can be None if there isn't one. + tags_to_ignore: Set of tags to ignore + stack_limit: Maximum stack size limit for depth-first traversal. + Nodes will be dropped if this limit is hit, which may truncate the + textual result. + Intended to limit the maximum working memory when generating a preview. """ - # This is basically a stack that we extend using itertools.chain. - # This will either consist of an element to iterate over *or* a string + + if tree is None: + return + + # This is a stack whose items are elements to iterate over *or* strings # to be returned. - elements = iter([tree]) - while True: - el = next(elements, None) - if el is None: - return + elements: List[Union[str, "etree.Element"]] = [tree] + while elements: + el = elements.pop() if isinstance(el, str): yield el @@ -329,17 +339,22 @@ def _iterate_over_text( if el.text: yield el.text - # We add to the stack all the elements children, interspersed with - # each child's tail text (if it exists). The tail text of a node - # is text that comes *after* the node, so we always include it even - # if we ignore the child node. - elements = itertools.chain( - itertools.chain.from_iterable( # Basically a flatmap - [child, child.tail] if child.tail else [child] - for child in el.iterchildren() - ), - elements, - ) + # We add to the stack all the element's children, interspersed with + # each child's tail text (if it exists). + # + # We iterate in reverse order so that earlier pieces of text appear + # closer to the top of the stack. + for child in el.iterchildren(reversed=True): + if len(elements) > stack_limit: + # We've hit our limit for working memory + break + + if child.tail: + # The tail text of a node is text that comes *after* the node, + # so we always include it even if we ignore the child node. + elements.append(child.tail) + + elements.append(child) def summarize_paragraphs( diff --git a/tests/rest/media/v1/test_html_preview.py b/tests/rest/media/v1/test_html_preview.py index ea9e5889bf..61357622bd 100644 --- a/tests/rest/media/v1/test_html_preview.py +++ b/tests/rest/media/v1/test_html_preview.py @@ -370,6 +370,23 @@ class OpenGraphFromHtmlTestCase(unittest.TestCase): og = parse_html_to_open_graph(tree) self.assertEqual(og, {"og:title": "รณ", "og:description": "Some text."}) + def test_nested_nodes(self) -> None: + """A body with some nested nodes. Tests that we iterate over children + in the right order (and don't reverse the order of the text).""" + html = b""" + Welcome the bold and underlined text + with a cheeky SVG and some tail text + """ + tree = decode_body(html, "http://example.com/test.html") + og = parse_html_to_open_graph(tree) + self.assertEqual( + og, + { + "og:title": None, + "og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text", + }, + ) + class MediaEncodingTestCase(unittest.TestCase): def test_meta_charset(self) -> None: -- cgit 1.4.1 From ea10cdbea703f94a84a484377485de8dc14a963a Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 28 Jun 2022 14:33:56 +0100 Subject: 1.61.1 --- CHANGES.md | 21 +++++++++++++++++++++ debian/changelog | 6 ++++++ pyproject.toml | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index bd9b34dd7a..b97f014142 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,24 @@ +Synapse 1.61.1 (2022-06-28) +=========================== + +This patch release fixes a security issue regarding URL previews, affecting all prior versions of Synapse. Server administrators are encouraged to update Synapse as soon as possible. We are not aware of these vulnerabilities being exploited in the wild. + +Server administrators who are unable to update Synapse may use the workarounds described in the linked GitHub Security Advisory below. + +## Security advisory + +The following issue is fixed in 1.61.1. + +* [GHSA-22p3-qrh9-cx32](https://github.com/matrix-org/synapse/security/advisories/GHSA-22p3-qrh9-cx32) / [CVE-2022-31052](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-31052) + + Synapse instances with the [`url_preview_enabled`](https://matrix-org.github.io/synapse/v1.61/usage/configuration/config_documentation.html#media-store) homeserver config option set to `true` are affected. URL previews of some web pages can lead to unbounded recursion, causing the request to either fail, or in some cases crash the running Synapse process. + + Requesting URL previews requires authentication. Nevertheless, it is possible to exploit this maliciously, either by malicious users on the homeserver, or by remote users sending URLs that a local user's client may automatically request a URL preview for. + + Homeservers with the `url_preview_enabled` configuration option set to `false` (the default) are unaffected. Instances with the `enable_media_repo` configuration option set to `false` are also unaffected, as this also disables URL preview functionality. + + Fixed by fa1308061802ac7b7d20e954ba7372c5ac292333. + Synapse 1.61.0 (2022-06-14) =========================== diff --git a/debian/changelog b/debian/changelog index 753a03065a..2ca565a157 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.61.1) stable; urgency=medium + + * New Synapse release 1.61.1. + + -- Synapse Packaging team Tue, 28 Jun 2022 14:33:46 +0100 + matrix-synapse-py3 (1.61.0) stable; urgency=medium * New Synapse release 1.61.0. diff --git a/pyproject.toml b/pyproject.toml index 8b21bdc837..7d33c08f73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ skip_gitignore = true [tool.poetry] name = "matrix-synapse" -version = "1.61.0" +version = "1.61.1" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors "] license = "Apache-2.0" -- cgit 1.4.1 From 09d89ddc1f875bb1ea835a7614980787d4ebd043 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 28 Jun 2022 14:41:06 +0100 Subject: Linkify GHSA commit --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index b97f014142..0db01d4096 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,7 +17,7 @@ The following issue is fixed in 1.61.1. Homeservers with the `url_preview_enabled` configuration option set to `false` (the default) are unaffected. Instances with the `enable_media_repo` configuration option set to `false` are also unaffected, as this also disables URL preview functionality. - Fixed by fa1308061802ac7b7d20e954ba7372c5ac292333. + Fixed by [fa1308061802ac7b7d20e954ba7372c5ac292333](https://github.com/matrix-org/synapse/commit/fa1308061802ac7b7d20e954ba7372c5ac292333). Synapse 1.61.0 (2022-06-14) =========================== -- cgit 1.4.1