diff options
author | David Baker <dbkr@users.noreply.github.com> | 2017-10-05 12:27:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-05 12:27:59 +0100 |
commit | 44f8e383f36edf7ca31980a08449f2f11d107661 (patch) | |
tree | 11ba51e0f9f9464e9522ed0d6dff943383efd57d /synapse | |
parent | Merge pull request #2495 from matrix-org/dbkr/spam_check_room_creation (diff) | |
parent | Use better method for word boundary searching (diff) | |
download | synapse-44f8e383f36edf7ca31980a08449f2f11d107661.tar.xz |
Merge pull request #2500 from matrix-org/dbkr/fix_word_boundary_mentions
Fix notif kws that start/end with non-word chars
Diffstat (limited to 'synapse')
-rw-r--r-- | synapse/push/push_rule_evaluator.py | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 172c27c137..65f9a63fd8 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -183,7 +183,7 @@ def _glob_to_re(glob, word_boundary): r, ) if word_boundary: - r = r"\b%s\b" % (r,) + r = _re_word_boundary(r) return re.compile(r, flags=re.IGNORECASE) else: @@ -192,7 +192,7 @@ def _glob_to_re(glob, word_boundary): return re.compile(r, flags=re.IGNORECASE) elif word_boundary: r = re.escape(glob) - r = r"\b%s\b" % (r,) + r = _re_word_boundary(r) return re.compile(r, flags=re.IGNORECASE) else: @@ -200,6 +200,18 @@ def _glob_to_re(glob, word_boundary): return re.compile(r, flags=re.IGNORECASE) +def _re_word_boundary(r): + """ + Adds word boundary characters to the start and end of an + expression to require that the match occur as a whole word, + but do so respecting the fact that strings starting or ending + with non-word characters will change word boundaries. + """ + # we can't use \b as it chokes on unicode. however \W seems to be okay + # as shorthand for [^0-9A-Za-z_]. + return r"(^|\W)%s(\W|$)" % (r,) + + def _flatten_dict(d, prefix=[], result=None): if result is None: result = {} |