diff options
author | Richard van der Hoff <1389908+richvdh@users.noreply.github.com> | 2021-05-11 10:47:23 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-11 11:47:23 +0200 |
commit | 03318a766cac9f8b053db2214d9c332a977d226c (patch) | |
tree | 3444111943cbfda45609535c83e53f9adede3a90 /synapse/push | |
parent | Unpin attrs dep after new version has been released (#9946) (diff) | |
download | synapse-03318a766cac9f8b053db2214d9c332a977d226c.tar.xz |
Merge pull request from GHSA-x345-32rc-8h85
* tests for push rule pattern matching * tests for acl pattern matching * factor out common `re.escape` * Factor out common re.compile * Factor out common anchoring code * add word_boundary support to `glob_to_regex` * Use `glob_to_regex` in push rule evaluator NB that this drops support for character classes. I don't think anyone ever used them. * Improve efficiency of globs with multiple wildcards The idea here is that we compress multiple `*` globs into a single `.*`. We also need to consider `?`, since `*?*` is as hard to implement efficiently as `**`. * add assertion on regex pattern * Fix mypy * Simplify glob_to_regex * Inline the glob_to_regex helper function Signed-off-by: Dan Callahan <danc@element.io> * Moar comments Signed-off-by: Dan Callahan <danc@element.io> Co-authored-by: Dan Callahan <danc@element.io>
Diffstat (limited to 'synapse/push')
-rw-r--r-- | synapse/push/push_rule_evaluator.py | 55 |
1 files changed, 3 insertions, 52 deletions
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 49ecb38522..98b90a4f51 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -19,6 +19,7 @@ from typing import Any, Dict, List, Optional, Pattern, Tuple, Union from synapse.events import EventBase from synapse.types import UserID +from synapse.util import glob_to_regex, re_word_boundary from synapse.util.caches.lrucache import LruCache logger = logging.getLogger(__name__) @@ -183,7 +184,7 @@ class PushRuleEvaluatorForEvent: r = regex_cache.get((display_name, False, True), None) if not r: r1 = re.escape(display_name) - r1 = _re_word_boundary(r1) + r1 = re_word_boundary(r1) r = re.compile(r1, flags=re.IGNORECASE) regex_cache[(display_name, False, True)] = r @@ -212,7 +213,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: try: r = regex_cache.get((glob, True, word_boundary), None) if not r: - r = _glob_to_re(glob, word_boundary) + r = glob_to_regex(glob, word_boundary) regex_cache[(glob, True, word_boundary)] = r return bool(r.search(value)) except re.error: @@ -220,56 +221,6 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: return False -def _glob_to_re(glob: str, word_boundary: bool) -> Pattern: - """Generates regex for a given glob. - - Args: - glob - word_boundary: Whether to match against word boundaries or entire string. - """ - if IS_GLOB.search(glob): - r = re.escape(glob) - - r = r.replace(r"\*", ".*?") - r = r.replace(r"\?", ".") - - # handle [abc], [a-z] and [!a-z] style ranges. - r = GLOB_REGEX.sub( - lambda x: ( - "[%s%s]" % (x.group(1) and "^" or "", x.group(2).replace(r"\\\-", "-")) - ), - r, - ) - if word_boundary: - r = _re_word_boundary(r) - - return re.compile(r, flags=re.IGNORECASE) - else: - r = "^" + r + "$" - - return re.compile(r, flags=re.IGNORECASE) - elif word_boundary: - r = re.escape(glob) - r = _re_word_boundary(r) - - return re.compile(r, flags=re.IGNORECASE) - else: - r = "^" + re.escape(glob) + "$" - return re.compile(r, flags=re.IGNORECASE) - - -def _re_word_boundary(r: str) -> str: - """ - Adds word boundary characters to the start and end of an - expression to require that the match occur as a whole word, - but do so respecting the fact that strings starting or ending - with non-word characters will change word boundaries. - """ - # we can't use \b as it chokes on unicode. however \W seems to be okay - # as shorthand for [^0-9A-Za-z_]. - return r"(^|\W)%s(\W|$)" % (r,) - - def _flatten_dict( d: Union[EventBase, dict], prefix: Optional[List[str]] = None, |