From 03318a766cac9f8b053db2214d9c332a977d226c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 11 May 2021 10:47:23 +0100 Subject: Merge pull request from GHSA-x345-32rc-8h85 * tests for push rule pattern matching * tests for acl pattern matching * factor out common `re.escape` * Factor out common re.compile * Factor out common anchoring code * add word_boundary support to `glob_to_regex` * Use `glob_to_regex` in push rule evaluator NB that this drops support for character classes. I don't think anyone ever used them. * Improve efficiency of globs with multiple wildcards The idea here is that we compress multiple `*` globs into a single `.*`. We also need to consider `?`, since `*?*` is as hard to implement efficiently as `**`. * add assertion on regex pattern * Fix mypy * Simplify glob_to_regex * Inline the glob_to_regex helper function Signed-off-by: Dan Callahan * Moar comments Signed-off-by: Dan Callahan Co-authored-by: Dan Callahan --- synapse/push/push_rule_evaluator.py | 55 ++----------------------------------- 1 file changed, 3 insertions(+), 52 deletions(-) (limited to 'synapse/push') diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 49ecb38522..98b90a4f51 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -19,6 +19,7 @@ from typing import Any, Dict, List, Optional, Pattern, Tuple, Union from synapse.events import EventBase from synapse.types import UserID +from synapse.util import glob_to_regex, re_word_boundary from synapse.util.caches.lrucache import LruCache logger = logging.getLogger(__name__) @@ -183,7 +184,7 @@ class PushRuleEvaluatorForEvent: r = regex_cache.get((display_name, False, True), None) if not r: r1 = re.escape(display_name) - r1 = _re_word_boundary(r1) + r1 = re_word_boundary(r1) r = re.compile(r1, flags=re.IGNORECASE) regex_cache[(display_name, False, True)] = r @@ -212,7 +213,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: try: r = regex_cache.get((glob, True, word_boundary), None) if not r: - r = _glob_to_re(glob, word_boundary) + r = glob_to_regex(glob, word_boundary) regex_cache[(glob, True, word_boundary)] = r return bool(r.search(value)) except re.error: @@ -220,56 +221,6 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: return False -def _glob_to_re(glob: str, word_boundary: bool) -> Pattern: - """Generates regex for a given glob. - - Args: - glob - word_boundary: Whether to match against word boundaries or entire string. - """ - if IS_GLOB.search(glob): - r = re.escape(glob) - - r = r.replace(r"\*", ".*?") - r = r.replace(r"\?", ".") - - # handle [abc], [a-z] and [!a-z] style ranges. - r = GLOB_REGEX.sub( - lambda x: ( - "[%s%s]" % (x.group(1) and "^" or "", x.group(2).replace(r"\\\-", "-")) - ), - r, - ) - if word_boundary: - r = _re_word_boundary(r) - - return re.compile(r, flags=re.IGNORECASE) - else: - r = "^" + r + "$" - - return re.compile(r, flags=re.IGNORECASE) - elif word_boundary: - r = re.escape(glob) - r = _re_word_boundary(r) - - return re.compile(r, flags=re.IGNORECASE) - else: - r = "^" + re.escape(glob) + "$" - return re.compile(r, flags=re.IGNORECASE) - - -def _re_word_boundary(r: str) -> str: - """ - Adds word boundary characters to the start and end of an - expression to require that the match occur as a whole word, - but do so respecting the fact that strings starting or ending - with non-word characters will change word boundaries. - """ - # we can't use \b as it chokes on unicode. however \W seems to be okay - # as shorthand for [^0-9A-Za-z_]. - return r"(^|\W)%s(\W|$)" % (r,) - - def _flatten_dict( d: Union[EventBase, dict], prefix: Optional[List[str]] = None, -- cgit 1.5.1