Merge pull request #502 from matrix-org/erikj/push_notif_perf

Unread notification performance.
author: Erik Johnston <erik@matrix.org> 2016-01-19 11:27:27 +0000
committer: Erik Johnston <erik@matrix.org> 2016-01-19 11:27:27 +0000
commit: 47e7963e50704a1a981b8c009f8403085be721a6 (patch)
tree: 1a8b4930d67c314c21d40e8e48436a0b246476f1 /synapse/push/push_rule_evaluator.py
parent: Merge pull request #503 from matrix-org/daniel/nonghosts (diff)
parent: Take a deepcopy of push rules before mutating them (diff)
download: synapse-47e7963e50704a1a981b8c009f8403085be721a6.tar.xz
1 files changed, 166 insertions, 94 deletions
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index b0283743a2..379652c513 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -15,17 +15,22 @@
 
 from twisted.internet import defer
 
-from synapse.types import UserID
-
 import baserules
 
 import logging
 import simplejson as json
 import re
 
+from synapse.types import UserID
+
 logger = logging.getLogger(__name__)
 
 
+GLOB_REGEX = re.compile(r'\\\[(\\\!|)(.*)\\\]')
+IS_GLOB = re.compile(r'[\?\*\[\]]')
+INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$")
+
+
 @defer.inlineCallbacks
 def evaluator_for_user_id_and_profile_tag(user_id, profile_tag, room_id, store):
     rawrules = yield store.get_push_rules_for_user(user_id)
@@ -42,9 +47,34 @@ def evaluator_for_user_id_and_profile_tag(user_id, profile_tag, room_id, store):
     ))
 
 
+def _room_member_count(ev, condition, room_member_count):
+    if 'is' not in condition:
+        return False
+    m = INEQUALITY_EXPR.match(condition['is'])
+    if not m:
+        return False
+    ineq = m.group(1)
+    rhs = m.group(2)
+    if not rhs.isdigit():
+        return False
+    rhs = int(rhs)
+
+    if ineq == '' or ineq == '==':
+        return room_member_count == rhs
+    elif ineq == '<':
+        return room_member_count < rhs
+    elif ineq == '>':
+        return room_member_count > rhs
+    elif ineq == '>=':
+        return room_member_count >= rhs
+    elif ineq == '<=':
+        return room_member_count <= rhs
+    else:
+        return False
+
+
 class PushRuleEvaluator:
     DEFAULT_ACTIONS = []
-    INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$")
 
     def __init__(self, user_id, profile_tag, raw_rules, enabled_map, room_id,
                  our_member_event, store):
@@ -61,8 +91,7 @@ class PushRuleEvaluator:
             rule['actions'] = json.loads(raw_rule['actions'])
             rules.append(rule)
 
-        user = UserID.from_string(self.user_id)
-        self.rules = baserules.list_with_base_rules(rules, user)
+        self.rules = baserules.list_with_base_rules(rules)
 
         self.enabled_map = enabled_map
 
@@ -98,28 +127,19 @@ class PushRuleEvaluator:
         room_members = yield self.store.get_users_in_room(room_id)
         room_member_count = len(room_members)
 
+        evaluator = PushRuleEvaluatorForEvent(ev, room_member_count)
+
         for r in self.rules:
-            if r['rule_id'] in self.enabled_map:
-                r['enabled'] = self.enabled_map[r['rule_id']]
-            elif 'enabled' not in r:
-                r['enabled'] = True
-            if not r['enabled']:
+            enabled = self.enabled_map.get(r['rule_id'], None)
+            if enabled is not None and not enabled:
+                continue
+
+            if not r.get("enabled", True):
                 continue
-            matches = True
 
             conditions = r['conditions']
             actions = r['actions']
 
-            for c in conditions:
-                matches &= self._event_fulfills_condition(
-                    ev, c, display_name=my_display_name,
-                    room_member_count=room_member_count,
-                    profile_tag=self.profile_tag
-                )
-            logger.debug(
-                "Rule %s %s",
-                r['rule_id'], "matches" if matches else "doesn't match"
-            )
             # ignore rules with no actions (we have an explict 'dont_notify')
             if len(actions) == 0:
                 logger.warn(
@@ -127,8 +147,22 @@ class PushRuleEvaluator:
                     r['rule_id'], self.user_id
                 )
                 continue
+
+            matches = True
+            for c in conditions:
+                matches = evaluator.matches(
+                    c, self.user_id, my_display_name, self.profile_tag
+                )
+                if not matches:
+                    break
+
+            logger.debug(
+                "Rule %s %s",
+                r['rule_id'], "matches" if matches else "doesn't match"
+            )
+
             if matches:
-                logger.info(
+                logger.debug(
                     "%s matches for user %s, event %s",
                     r['rule_id'], self.user_id, ev['event_id']
                 )
@@ -139,94 +173,132 @@ class PushRuleEvaluator:
 
                 defer.returnValue(actions)
 
-        logger.info(
+        logger.debug(
             "No rules match for user %s, event %s",
             self.user_id, ev['event_id']
         )
         defer.returnValue(PushRuleEvaluator.DEFAULT_ACTIONS)
 
-    @staticmethod
-    def _glob_to_regexp(glob):
-        r = re.escape(glob)
-        r = re.sub(r'\\\*', r'.*?', r)
-        r = re.sub(r'\\\?', r'.', r)
 
-        # handle [abc], [a-z] and [!a-z] style ranges.
-        r = re.sub(r'\\\[(\\\!|)(.*)\\\]',
-                   lambda x: ('[%s%s]' % (x.group(1) and '^' or '',
-                                          re.sub(r'\\\-', '-', x.group(2)))), r)
-        return r
+class PushRuleEvaluatorForEvent(object):
+    def __init__(self, event, room_member_count):
+        self._event = event
+        self._room_member_count = room_member_count
 
-    @staticmethod
-    def _event_fulfills_condition(ev, condition,
-                                  display_name, room_member_count, profile_tag):
-        if condition['kind'] == 'event_match':
-            if 'pattern' not in condition:
-                logger.warn("event_match condition with no pattern")
-                return False
-            # XXX: optimisation: cache our pattern regexps
-            if condition['key'] == 'content.body':
-                r = r'\b%s\b' % PushRuleEvaluator._glob_to_regexp(condition['pattern'])
-            else:
-                r = r'^%s$' % PushRuleEvaluator._glob_to_regexp(condition['pattern'])
-            val = _value_for_dotted_key(condition['key'], ev)
-            if val is None:
-                return False
-            return re.search(r, val, flags=re.IGNORECASE) is not None
+        # Maps strings of e.g. 'content.body' -> event["content"]["body"]
+        self._value_cache = _flatten_dict(event)
 
+    def matches(self, condition, user_id, display_name, profile_tag):
+        if condition['kind'] == 'event_match':
+            return self._event_match(condition, user_id)
         elif condition['kind'] == 'device':
             if 'profile_tag' not in condition:
                 return True
             return condition['profile_tag'] == profile_tag
-
         elif condition['kind'] == 'contains_display_name':
-            # This is special because display names can be different
-            # between rooms and so you can't really hard code it in a rule.
-            # Optimisation: we should cache these names and update them from
-            # the event stream.
-            if 'content' not in ev or 'body' not in ev['content']:
-                return False
-            if not display_name:
-                return False
-            return re.search(
-                r"\b%s\b" % re.escape(display_name), ev['content']['body'],
-                flags=re.IGNORECASE
-            ) is not None
-
+            return self._contains_display_name(display_name)
         elif condition['kind'] == 'room_member_count':
-            if 'is' not in condition:
-                return False
-            m = PushRuleEvaluator.INEQUALITY_EXPR.match(condition['is'])
-            if not m:
-                return False
-            ineq = m.group(1)
-            rhs = m.group(2)
-            if not rhs.isdigit():
+            return _room_member_count(
+                self._event, condition, self._room_member_count
+            )
+        else:
+            return True
+
+    def _event_match(self, condition, user_id):
+        pattern = condition.get('pattern', None)
+
+        if not pattern:
+            pattern_type = condition.get('pattern_type', None)
+            if pattern_type == "user_id":
+                pattern = user_id
+            elif pattern_type == "user_localpart":
+                pattern = UserID.from_string(user_id).localpart
+
+        if not pattern:
+            logger.warn("event_match condition with no pattern")
+            return False
+
+        # XXX: optimisation: cache our pattern regexps
+        if condition['key'] == 'content.body':
+            body = self._event["content"].get("body", None)
+            if not body:
                 return False
-            rhs = int(rhs)
-
-            if ineq == '' or ineq == '==':
-                return room_member_count == rhs
-            elif ineq == '<':
-                return room_member_count < rhs
-            elif ineq == '>':
-                return room_member_count > rhs
-            elif ineq == '>=':
-                return room_member_count >= rhs
-            elif ineq == '<=':
-                return room_member_count <= rhs
-            else:
+
+            return _glob_matches(pattern, body, word_boundary=True)
+        else:
+            haystack = self._get_value(condition['key'])
+            if haystack is None:
                 return False
+
+            return _glob_matches(pattern, haystack)
+
+    def _contains_display_name(self, display_name):
+        if not display_name:
+            return False
+
+        body = self._event["content"].get("body", None)
+        if not body:
+            return False
+
+        return _glob_matches(display_name, body, word_boundary=True)
+
+    def _get_value(self, dotted_key):
+        return self._value_cache.get(dotted_key, None)
+
+
+def _glob_matches(glob, value, word_boundary=False):
+    """Tests if value matches glob.
+
+    Args:
+        glob (string)
+        value (string): String to test against glob.
+        word_boundary (bool): Whether to match against word boundaries or entire
+            string. Defaults to False.
+
+    Returns:
+        bool
+    """
+    if IS_GLOB.search(glob):
+        r = re.escape(glob)
+
+        r = r.replace(r'\*', '.*?')
+        r = r.replace(r'\?', '.')
+
+        # handle [abc], [a-z] and [!a-z] style ranges.
+        r = GLOB_REGEX.sub(
+            lambda x: (
+                '[%s%s]' % (
+                    x.group(1) and '^' or '',
+                    x.group(2).replace(r'\\\-', '-')
+                )
+            ),
+            r,
+        )
+        if word_boundary:
+            r = r"\b%s\b" % (r,)
+            r = re.compile(r, flags=re.IGNORECASE)
+
+            return r.search(value)
         else:
-            return True
+            r = r + "$"
+            r = re.compile(r, flags=re.IGNORECASE)
+
+            return r.match(value)
+    elif word_boundary:
+        r = re.escape(glob)
+        r = r"\b%s\b" % (r,)
+        r = re.compile(r, flags=re.IGNORECASE)
+
+        return r.search(value)
+    else:
+        return value.lower() == glob.lower()
+
 
+def _flatten_dict(d, prefix=[], result={}):
+    for key, value in d.items():
+        if isinstance(value, basestring):
+            result[".".join(prefix + [key])] = value.lower()
+        elif hasattr(value, "items"):
+            _flatten_dict(value, prefix=(prefix+[key]), result=result)
 
-def _value_for_dotted_key(dotted_key, event):
-    parts = dotted_key.split(".")
-    val = event
-    while len(parts) > 0:
-        if parts[0] not in val:
-            return None
-        val = val[parts[0]]
-        parts = parts[1:]
-    return val
+    return result
author	Erik Johnston <erik@matrix.org>	2016-01-19 11:27:27 +0000
committer	Erik Johnston <erik@matrix.org>	2016-01-19 11:27:27 +0000
commit	47e7963e50704a1a981b8c009f8403085be721a6 (patch)
tree	1a8b4930d67c314c21d40e8e48436a0b246476f1 /synapse/push/push_rule_evaluator.py
parent	Merge pull request #503 from matrix-org/daniel/nonghosts (diff)
parent	Take a deepcopy of push rules before mutating them (diff)
download	synapse-47e7963e50704a1a981b8c009f8403085be721a6.tar.xz