diff options
author | Sean Quah <8349537+squahtx@users.noreply.github.com> | 2021-12-07 13:51:11 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-07 13:51:11 +0000 |
commit | 088d748f2cb51f03f3bcacc0fb3af1e0f9607737 (patch) | |
tree | be04437420017f1954791a925a4bac1c92205c33 /synapse | |
parent | Refactors in `_generate_sync_entry_for_rooms` (#11515) (diff) | |
download | synapse-088d748f2cb51f03f3bcacc0fb3af1e0f9607737.tar.xz |
Revert "Move `glob_to_regex` and `re_word_boundary` to `matrix-python-common` (#11505) (#11527)
This reverts commit a77c36989785c0d5565ab9a1169f4f88e512ce8a.
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/config/room_directory.py | 3 | ||||
-rw-r--r-- | synapse/config/tls.py | 3 | ||||
-rw-r--r-- | synapse/federation/federation_server.py | 3 | ||||
-rw-r--r-- | synapse/push/push_rule_evaluator.py | 7 | ||||
-rw-r--r-- | synapse/python_dependencies.py | 1 | ||||
-rw-r--r-- | synapse/util/__init__.py | 59 |
6 files changed, 64 insertions, 12 deletions
diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 3c5e0f7ce7..57316c59b6 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -15,9 +15,8 @@ from typing import List -from matrix_common.regex import glob_to_regex - from synapse.types import JsonDict +from synapse.util import glob_to_regex from ._base import Config, ConfigError diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 3e235b57a7..4ca111618f 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -16,12 +16,11 @@ import logging import os from typing import List, Optional, Pattern -from matrix_common.regex import glob_to_regex - from OpenSSL import SSL, crypto from twisted.internet._sslverify import Certificate, trustRootFromCertificates from synapse.config._base import Config, ConfigError +from synapse.util import glob_to_regex logger = logging.getLogger(__name__) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 4697a62c18..8e37e76206 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -28,7 +28,6 @@ from typing import ( Union, ) -from matrix_common.regex import glob_to_regex from prometheus_client import Counter, Gauge, Histogram from twisted.internet import defer @@ -67,7 +66,7 @@ from synapse.replication.http.federation import ( ) from synapse.storage.databases.main.lock import Lock from synapse.types import JsonDict, get_domain_from_id -from synapse.util import json_decoder, unwrapFirstError +from synapse.util import glob_to_regex, json_decoder, unwrapFirstError from synapse.util.async_helpers import Linearizer, concurrently_execute from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import parse_server_name diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 659a53805d..7f68092ec5 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -17,10 +17,9 @@ import logging import re from typing import Any, Dict, List, Optional, Pattern, Tuple, Union -from matrix_common.regex import glob_to_regex, to_word_pattern - from synapse.events import EventBase from synapse.types import JsonDict, UserID +from synapse.util import glob_to_regex, re_word_boundary from synapse.util.caches.lrucache import LruCache logger = logging.getLogger(__name__) @@ -185,7 +184,7 @@ class PushRuleEvaluatorForEvent: r = regex_cache.get((display_name, False, True), None) if not r: r1 = re.escape(display_name) - r1 = to_word_pattern(r1) + r1 = re_word_boundary(r1) r = re.compile(r1, flags=re.IGNORECASE) regex_cache[(display_name, False, True)] = r @@ -214,7 +213,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: try: r = regex_cache.get((glob, True, word_boundary), None) if not r: - r = glob_to_regex(glob, word_boundary=word_boundary) + r = glob_to_regex(glob, word_boundary) regex_cache[(glob, True, word_boundary)] = r return bool(r.search(value)) except re.error: diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 386debd7db..7d26954244 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -87,7 +87,6 @@ REQUIREMENTS = [ # with the latest security patches. "cryptography>=3.4.7", "ijson>=3.1", - "matrix-common==1.0.0", ] CONDITIONAL_REQUIREMENTS = { diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index f157132210..95f23e27b6 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -14,8 +14,9 @@ import json import logging +import re import typing -from typing import Any, Callable, Dict, Generator, Optional +from typing import Any, Callable, Dict, Generator, Optional, Pattern import attr from frozendict import frozendict @@ -34,6 +35,9 @@ if typing.TYPE_CHECKING: logger = logging.getLogger(__name__) +_WILDCARD_RUN = re.compile(r"([\?\*]+)") + + def _reject_invalid_json(val: Any) -> None: """Do not allow Infinity, -Infinity, or NaN values in JSON.""" raise ValueError("Invalid JSON value: '%s'" % val) @@ -181,3 +185,56 @@ def log_failure( if not consumeErrors: return failure return None + + +def glob_to_regex(glob: str, word_boundary: bool = False) -> Pattern: + """Converts a glob to a compiled regex object. + + Args: + glob: pattern to match + word_boundary: If True, the pattern will be allowed to match at word boundaries + anywhere in the string. Otherwise, the pattern is anchored at the start and + end of the string. + + Returns: + compiled regex pattern + """ + + # Patterns with wildcards must be simplified to avoid performance cliffs + # - The glob `?**?**?` is equivalent to the glob `???*` + # - The glob `???*` is equivalent to the regex `.{3,}` + chunks = [] + for chunk in _WILDCARD_RUN.split(glob): + # No wildcards? re.escape() + if not _WILDCARD_RUN.match(chunk): + chunks.append(re.escape(chunk)) + continue + + # Wildcards? Simplify. + qmarks = chunk.count("?") + if "*" in chunk: + chunks.append(".{%d,}" % qmarks) + else: + chunks.append(".{%d}" % qmarks) + + res = "".join(chunks) + + if word_boundary: + res = re_word_boundary(res) + else: + # \A anchors at start of string, \Z at end of string + res = r"\A" + res + r"\Z" + + return re.compile(res, re.IGNORECASE) + + +def re_word_boundary(r: str) -> str: + """ + Adds word boundary characters to the start and end of an + expression to require that the match occur as a whole word, + but do so respecting the fact that strings starting or ending + with non-word characters will change word boundaries. + """ + # we can't use \b as it chokes on unicode. however \W seems to be okay + # as shorthand for [^0-9A-Za-z_]. + return r"(^|\W)%s(\W|$)" % (r,) |