summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
authorDavid Baker <dbkr@users.noreply.github.com>2017-10-05 12:27:59 +0100
committerGitHub <noreply@github.com>2017-10-05 12:27:59 +0100
commit44f8e383f36edf7ca31980a08449f2f11d107661 (patch)
tree11ba51e0f9f9464e9522ed0d6dff943383efd57d /synapse
parentMerge pull request #2495 from matrix-org/dbkr/spam_check_room_creation (diff)
parentUse better method for word boundary searching (diff)
downloadsynapse-44f8e383f36edf7ca31980a08449f2f11d107661.tar.xz
Merge pull request #2500 from matrix-org/dbkr/fix_word_boundary_mentions
Fix notif kws that start/end with non-word chars
Diffstat (limited to 'synapse')
-rw-r--r--synapse/push/push_rule_evaluator.py16
1 files changed, 14 insertions, 2 deletions
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index 172c27c137..65f9a63fd8 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -183,7 +183,7 @@ def _glob_to_re(glob, word_boundary):
             r,
         )
         if word_boundary:
-            r = r"\b%s\b" % (r,)
+            r = _re_word_boundary(r)
 
             return re.compile(r, flags=re.IGNORECASE)
         else:
@@ -192,7 +192,7 @@ def _glob_to_re(glob, word_boundary):
             return re.compile(r, flags=re.IGNORECASE)
     elif word_boundary:
         r = re.escape(glob)
-        r = r"\b%s\b" % (r,)
+        r = _re_word_boundary(r)
 
         return re.compile(r, flags=re.IGNORECASE)
     else:
@@ -200,6 +200,18 @@ def _glob_to_re(glob, word_boundary):
         return re.compile(r, flags=re.IGNORECASE)
 
 
+def _re_word_boundary(r):
+    """
+    Adds word boundary characters to the start and end of an
+    expression to require that the match occur as a whole word,
+    but do so respecting the fact that strings starting or ending
+    with non-word characters will change word boundaries.
+    """
+    # we can't use \b as it chokes on unicode. however \W seems to be okay
+    # as shorthand for [^0-9A-Za-z_].
+    return r"(^|\W)%s(\W|$)" % (r,)
+
+
 def _flatten_dict(d, prefix=[], result=None):
     if result is None:
         result = {}