Merge branch 'master' of git+ssh://github.com/matrix-org/synapse

author: Matthew Hodgson <matthew@matrix.org> 2016-02-10 16:27:15 +0000
committer: Matthew Hodgson <matthew@matrix.org> 2016-02-10 16:27:15 +0000
commit: 76346870571ce885d07b180cf88c11a33a051cf8 (patch)
tree: 832664ad8351517a43bd91230eeacf257b2c0bec /synapse/push/push_rule_evaluator.py
parent: try to bump syweb to 0.6.8 (diff)
parent: Merge branch 'release-v0.13.0' of github.com:matrix-org/synapse (diff)
download: synapse-76346870571ce885d07b180cf88c11a33a051cf8.tar.xz
1 files changed, 204 insertions, 106 deletions
diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py
index 92c7fd048f..2a2b4437dc 100644
--- a/synapse/push/push_rule_evaluator.py
+++ b/synapse/push/push_rule_evaluator.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright 2015 OpenMarket Ltd
+# Copyright 2015, 2016 OpenMarket Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,40 +15,71 @@
 
 from twisted.internet import defer
 
-from synapse.types import UserID
-
 import baserules
 
 import logging
 import simplejson as json
 import re
 
+from synapse.types import UserID
+from synapse.util.caches.lrucache import LruCache
+
 logger = logging.getLogger(__name__)
 
 
+GLOB_REGEX = re.compile(r'\\\[(\\\!|)(.*)\\\]')
+IS_GLOB = re.compile(r'[\?\*\[\]]')
+INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$")
+
+
 @defer.inlineCallbacks
-def evaluator_for_user_name_and_profile_tag(user_name, profile_tag, room_id, store):
-    rawrules = yield store.get_push_rules_for_user(user_name)
-    enabled_map = yield store.get_push_rules_enabled_for_user(user_name)
+def evaluator_for_user_id_and_profile_tag(user_id, profile_tag, room_id, store):
+    rawrules = yield store.get_push_rules_for_user(user_id)
+    enabled_map = yield store.get_push_rules_enabled_for_user(user_id)
     our_member_event = yield store.get_current_state(
         room_id=room_id,
         event_type='m.room.member',
-        state_key=user_name,
+        state_key=user_id,
     )
 
     defer.returnValue(PushRuleEvaluator(
-        user_name, profile_tag, rawrules, enabled_map,
+        user_id, profile_tag, rawrules, enabled_map,
         room_id, our_member_event, store
     ))
 
 
+def _room_member_count(ev, condition, room_member_count):
+    if 'is' not in condition:
+        return False
+    m = INEQUALITY_EXPR.match(condition['is'])
+    if not m:
+        return False
+    ineq = m.group(1)
+    rhs = m.group(2)
+    if not rhs.isdigit():
+        return False
+    rhs = int(rhs)
+
+    if ineq == '' or ineq == '==':
+        return room_member_count == rhs
+    elif ineq == '<':
+        return room_member_count < rhs
+    elif ineq == '>':
+        return room_member_count > rhs
+    elif ineq == '>=':
+        return room_member_count >= rhs
+    elif ineq == '<=':
+        return room_member_count <= rhs
+    else:
+        return False
+
+
 class PushRuleEvaluator:
-    DEFAULT_ACTIONS = ['dont_notify']
-    INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$")
+    DEFAULT_ACTIONS = []
 
-    def __init__(self, user_name, profile_tag, raw_rules, enabled_map, room_id,
+    def __init__(self, user_id, profile_tag, raw_rules, enabled_map, room_id,
                  our_member_event, store):
-        self.user_name = user_name
+        self.user_id = user_id
         self.profile_tag = profile_tag
         self.room_id = room_id
         self.our_member_event = our_member_event
@@ -61,8 +92,7 @@ class PushRuleEvaluator:
             rule['actions'] = json.loads(raw_rule['actions'])
             rules.append(rule)
 
-        user = UserID.from_string(self.user_name)
-        self.rules = baserules.list_with_base_rules(rules, user)
+        self.rules = baserules.list_with_base_rules(rules)
 
         self.enabled_map = enabled_map
 
@@ -83,9 +113,9 @@ class PushRuleEvaluator:
         has configured both globally and per-room when we have the ability
         to do such things.
         """
-        if ev['user_id'] == self.user_name:
+        if ev['user_id'] == self.user_id:
             # let's assume you probably know about messages you sent yourself
-            defer.returnValue(['dont_notify'])
+            defer.returnValue([])
 
         room_id = ev['room_id']
 
@@ -98,127 +128,195 @@ class PushRuleEvaluator:
         room_members = yield self.store.get_users_in_room(room_id)
         room_member_count = len(room_members)
 
+        evaluator = PushRuleEvaluatorForEvent(ev, room_member_count)
+
         for r in self.rules:
-            if r['rule_id'] in self.enabled_map:
-                r['enabled'] = self.enabled_map[r['rule_id']]
-            elif 'enabled' not in r:
-                r['enabled'] = True
-            if not r['enabled']:
+            enabled = self.enabled_map.get(r['rule_id'], None)
+            if enabled is not None and not enabled:
+                continue
+
+            if not r.get("enabled", True):
                 continue
-            matches = True
 
             conditions = r['conditions']
             actions = r['actions']
 
-            for c in conditions:
-                matches &= self._event_fulfills_condition(
-                    ev, c, display_name=my_display_name,
-                    room_member_count=room_member_count
-                )
-            logger.debug(
-                "Rule %s %s",
-                r['rule_id'], "matches" if matches else "doesn't match"
-            )
             # ignore rules with no actions (we have an explict 'dont_notify')
             if len(actions) == 0:
                 logger.warn(
                     "Ignoring rule id %s with no actions for user %s",
-                    r['rule_id'], self.user_name
+                    r['rule_id'], self.user_id
                 )
                 continue
+
+            matches = True
+            for c in conditions:
+                matches = evaluator.matches(
+                    c, self.user_id, my_display_name, self.profile_tag
+                )
+                if not matches:
+                    break
+
+            logger.debug(
+                "Rule %s %s",
+                r['rule_id'], "matches" if matches else "doesn't match"
+            )
+
             if matches:
-                logger.info(
+                logger.debug(
                     "%s matches for user %s, event %s",
-                    r['rule_id'], self.user_name, ev['event_id']
+                    r['rule_id'], self.user_id, ev['event_id']
                 )
+
+                # filter out dont_notify as we treat an empty actions list
+                # as dont_notify, and this doesn't take up a row in our database
+                actions = [x for x in actions if x != 'dont_notify']
+
                 defer.returnValue(actions)
 
-        logger.info(
+        logger.debug(
             "No rules match for user %s, event %s",
-            self.user_name, ev['event_id']
+            self.user_id, ev['event_id']
         )
         defer.returnValue(PushRuleEvaluator.DEFAULT_ACTIONS)
 
-    @staticmethod
-    def _glob_to_regexp(glob):
-        r = re.escape(glob)
-        r = re.sub(r'\\\*', r'.*?', r)
-        r = re.sub(r'\\\?', r'.', r)
-
-        # handle [abc], [a-z] and [!a-z] style ranges.
-        r = re.sub(r'\\\[(\\\!|)(.*)\\\]',
-                   lambda x: ('[%s%s]' % (x.group(1) and '^' or '',
-                                          re.sub(r'\\\-', '-', x.group(2)))), r)
-        return r
 
-    def _event_fulfills_condition(self, ev, condition, display_name, room_member_count):
-        if condition['kind'] == 'event_match':
-            if 'pattern' not in condition:
-                logger.warn("event_match condition with no pattern")
-                return False
-            # XXX: optimisation: cache our pattern regexps
-            if condition['key'] == 'content.body':
-                r = r'\b%s\b' % self._glob_to_regexp(condition['pattern'])
-            else:
-                r = r'^%s$' % self._glob_to_regexp(condition['pattern'])
-            val = _value_for_dotted_key(condition['key'], ev)
-            if val is None:
-                return False
-            return re.search(r, val, flags=re.IGNORECASE) is not None
+class PushRuleEvaluatorForEvent(object):
+    def __init__(self, event, room_member_count):
+        self._event = event
+        self._room_member_count = room_member_count
 
+        # Maps strings of e.g. 'content.body' -> event["content"]["body"]
+        self._value_cache = _flatten_dict(event)
+
+    def matches(self, condition, user_id, display_name, profile_tag):
+        if condition['kind'] == 'event_match':
+            return self._event_match(condition, user_id)
         elif condition['kind'] == 'device':
             if 'profile_tag' not in condition:
                 return True
-            return condition['profile_tag'] == self.profile_tag
-
+            return condition['profile_tag'] == profile_tag
         elif condition['kind'] == 'contains_display_name':
-            # This is special because display names can be different
-            # between rooms and so you can't really hard code it in a rule.
-            # Optimisation: we should cache these names and update them from
-            # the event stream.
-            if 'content' not in ev or 'body' not in ev['content']:
-                return False
-            if not display_name:
-                return False
-            return re.search(
-                r"\b%s\b" % re.escape(display_name), ev['content']['body'],
-                flags=re.IGNORECASE
-            ) is not None
-
+            return self._contains_display_name(display_name)
         elif condition['kind'] == 'room_member_count':
-            if 'is' not in condition:
-                return False
-            m = PushRuleEvaluator.INEQUALITY_EXPR.match(condition['is'])
-            if not m:
+            return _room_member_count(
+                self._event, condition, self._room_member_count
+            )
+        else:
+            return True
+
+    def _event_match(self, condition, user_id):
+        pattern = condition.get('pattern', None)
+
+        if not pattern:
+            pattern_type = condition.get('pattern_type', None)
+            if pattern_type == "user_id":
+                pattern = user_id
+            elif pattern_type == "user_localpart":
+                pattern = UserID.from_string(user_id).localpart
+
+        if not pattern:
+            logger.warn("event_match condition with no pattern")
+            return False
+
+        # XXX: optimisation: cache our pattern regexps
+        if condition['key'] == 'content.body':
+            body = self._event["content"].get("body", None)
+            if not body:
                 return False
-            ineq = m.group(1)
-            rhs = m.group(2)
-            if not rhs.isdigit():
+
+            return _glob_matches(pattern, body, word_boundary=True)
+        else:
+            haystack = self._get_value(condition['key'])
+            if haystack is None:
                 return False
-            rhs = int(rhs)
-
-            if ineq == '' or ineq == '==':
-                return room_member_count == rhs
-            elif ineq == '<':
-                return room_member_count < rhs
-            elif ineq == '>':
-                return room_member_count > rhs
-            elif ineq == '>=':
-                return room_member_count >= rhs
-            elif ineq == '<=':
-                return room_member_count <= rhs
+
+            return _glob_matches(pattern, haystack)
+
+    def _contains_display_name(self, display_name):
+        if not display_name:
+            return False
+
+        body = self._event["content"].get("body", None)
+        if not body:
+            return False
+
+        return _glob_matches(display_name, body, word_boundary=True)
+
+    def _get_value(self, dotted_key):
+        return self._value_cache.get(dotted_key, None)
+
+
+def _glob_matches(glob, value, word_boundary=False):
+    """Tests if value matches glob.
+
+    Args:
+        glob (string)
+        value (string): String to test against glob.
+        word_boundary (bool): Whether to match against word boundaries or entire
+            string. Defaults to False.
+
+    Returns:
+        bool
+    """
+    try:
+        if IS_GLOB.search(glob):
+            r = re.escape(glob)
+
+            r = r.replace(r'\*', '.*?')
+            r = r.replace(r'\?', '.')
+
+            # handle [abc], [a-z] and [!a-z] style ranges.
+            r = GLOB_REGEX.sub(
+                lambda x: (
+                    '[%s%s]' % (
+                        x.group(1) and '^' or '',
+                        x.group(2).replace(r'\\\-', '-')
+                    )
+                ),
+                r,
+            )
+            if word_boundary:
+                r = r"\b%s\b" % (r,)
+                r = _compile_regex(r)
+
+                return r.search(value)
             else:
-                return False
+                r = r + "$"
+                r = _compile_regex(r)
+
+                return r.match(value)
+        elif word_boundary:
+            r = re.escape(glob)
+            r = r"\b%s\b" % (r,)
+            r = _compile_regex(r)
+
+            return r.search(value)
         else:
-            return True
+            return value.lower() == glob.lower()
+    except re.error:
+        logger.warn("Failed to parse glob to regex: %r", glob)
+        return False
+
+
+def _flatten_dict(d, prefix=[], result={}):
+    for key, value in d.items():
+        if isinstance(value, basestring):
+            result[".".join(prefix + [key])] = value.lower()
+        elif hasattr(value, "items"):
+            _flatten_dict(value, prefix=(prefix + [key]), result=result)
+
+    return result
 
 
-def _value_for_dotted_key(dotted_key, event):
-    parts = dotted_key.split(".")
-    val = event
-    while len(parts) > 0:
-        if parts[0] not in val:
-            return None
-        val = val[parts[0]]
-        parts = parts[1:]
-    return val
+regex_cache = LruCache(5000)
+
+
+def _compile_regex(regex_str):
+    r = regex_cache.get(regex_str, None)
+    if r:
+        return r
+
+    r = re.compile(regex_str, flags=re.IGNORECASE)
+    regex_cache[regex_str] = r
+    return r
author	Matthew Hodgson <matthew@matrix.org>	2016-02-10 16:27:15 +0000
committer	Matthew Hodgson <matthew@matrix.org>	2016-02-10 16:27:15 +0000
commit	76346870571ce885d07b180cf88c11a33a051cf8 (patch)
tree	832664ad8351517a43bd91230eeacf257b2c0bec /synapse/push/push_rule_evaluator.py
parent	try to bump syweb to 0.6.8 (diff)
parent	Merge branch 'release-v0.13.0' of github.com:matrix-org/synapse (diff)
download	synapse-76346870571ce885d07b180cf88c11a33a051cf8.tar.xz