summary refs log tree commit diff
path: root/rust/src/push
diff options
context:
space:
mode:
Diffstat (limited to 'rust/src/push')
-rw-r--r--rust/src/push/base_rules.rs1
-rw-r--r--rust/src/push/evaluator.rs374
-rw-r--r--rust/src/push/mod.rs28
-rw-r--r--rust/src/push/utils.rs215
4 files changed, 610 insertions, 8 deletions
diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs
index 7c62bc4849..bb59676bde 100644
--- a/rust/src/push/base_rules.rs
+++ b/rust/src/push/base_rules.rs
@@ -262,6 +262,7 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[
         priority_class: 1,
         conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch {
             rel_type: Cow::Borrowed("m.thread"),
+            event_type_pattern: None,
             sender: None,
             sender_type: Some(Cow::Borrowed("user_id")),
         })]),
diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs
new file mode 100644
index 0000000000..efe88ec76e
--- /dev/null
+++ b/rust/src/push/evaluator.rs
@@ -0,0 +1,374 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::{
+    borrow::Cow,
+    collections::{BTreeMap, BTreeSet},
+};
+
+use anyhow::{Context, Error};
+use lazy_static::lazy_static;
+use log::warn;
+use pyo3::prelude::*;
+use regex::Regex;
+
+use super::{
+    utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType},
+    Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition,
+};
+
+lazy_static! {
+    /// Used to parse the `is` clause in the room member count condition.
+    static ref INEQUALITY_EXPR: Regex = Regex::new(r"^([=<>]*)([0-9]+)$").expect("valid regex");
+}
+
+/// Allows running a set of push rules against a particular event.
+#[pyclass]
+pub struct PushRuleEvaluator {
+    /// A mapping of "flattened" keys to string values in the event, e.g.
+    /// includes things like "type" and "content.msgtype".
+    flattened_keys: BTreeMap<String, String>,
+
+    /// The "content.body", if any.
+    body: String,
+
+    /// The number of users in the room.
+    room_member_count: u64,
+
+    /// The `notifications` section of the current power levels in the room.
+    notification_power_levels: BTreeMap<String, i64>,
+
+    /// The relations related to the event as a mapping from relation type to
+    /// set of sender/event type 2-tuples.
+    relations: BTreeMap<String, BTreeSet<(String, String)>>,
+
+    /// Is running "relation" conditions enabled?
+    relation_match_enabled: bool,
+
+    /// The power level of the sender of the event, or None if event is an
+    /// outlier.
+    sender_power_level: Option<i64>,
+}
+
+#[pymethods]
+impl PushRuleEvaluator {
+    /// Create a new `PushRuleEvaluator`. See struct docstring for details.
+    #[new]
+    pub fn py_new(
+        flattened_keys: BTreeMap<String, String>,
+        room_member_count: u64,
+        sender_power_level: Option<i64>,
+        notification_power_levels: BTreeMap<String, i64>,
+        relations: BTreeMap<String, BTreeSet<(String, String)>>,
+        relation_match_enabled: bool,
+    ) -> Result<Self, Error> {
+        let body = flattened_keys
+            .get("content.body")
+            .cloned()
+            .unwrap_or_default();
+
+        Ok(PushRuleEvaluator {
+            flattened_keys,
+            body,
+            room_member_count,
+            notification_power_levels,
+            relations,
+            relation_match_enabled,
+            sender_power_level,
+        })
+    }
+
+    /// Run the evaluator with the given push rules, for the given user ID and
+    /// display name of the user.
+    ///
+    /// Passing in None will skip evaluating rules matching user ID and display
+    /// name.
+    ///
+    /// Returns the set of actions, if any, that match (filtering out any
+    /// `dont_notify` actions).
+    pub fn run(
+        &self,
+        push_rules: &FilteredPushRules,
+        user_id: Option<&str>,
+        display_name: Option<&str>,
+    ) -> Vec<Action> {
+        'outer: for (push_rule, enabled) in push_rules.iter() {
+            if !enabled {
+                continue;
+            }
+
+            for condition in push_rule.conditions.iter() {
+                match self.match_condition(condition, user_id, display_name) {
+                    Ok(true) => {}
+                    Ok(false) => continue 'outer,
+                    Err(err) => {
+                        warn!("Condition match failed {err}");
+                        continue 'outer;
+                    }
+                }
+            }
+
+            let actions = push_rule
+                .actions
+                .iter()
+                // Filter out "dont_notify" actions, as we don't store them.
+                .filter(|a| **a != Action::DontNotify)
+                .cloned()
+                .collect();
+
+            return actions;
+        }
+
+        Vec::new()
+    }
+
+    /// Check if the given condition matches.
+    fn matches(
+        &self,
+        condition: Condition,
+        user_id: Option<&str>,
+        display_name: Option<&str>,
+    ) -> bool {
+        match self.match_condition(&condition, user_id, display_name) {
+            Ok(true) => true,
+            Ok(false) => false,
+            Err(err) => {
+                warn!("Condition match failed {err}");
+                false
+            }
+        }
+    }
+}
+
+impl PushRuleEvaluator {
+    /// Match a given `Condition` for a push rule.
+    pub fn match_condition(
+        &self,
+        condition: &Condition,
+        user_id: Option<&str>,
+        display_name: Option<&str>,
+    ) -> Result<bool, Error> {
+        let known_condition = match condition {
+            Condition::Known(known) => known,
+            Condition::Unknown(_) => {
+                return Ok(false);
+            }
+        };
+
+        let result = match known_condition {
+            KnownCondition::EventMatch(event_match) => {
+                self.match_event_match(event_match, user_id)?
+            }
+            KnownCondition::ContainsDisplayName => {
+                if let Some(dn) = display_name {
+                    if !dn.is_empty() {
+                        get_glob_matcher(dn, GlobMatchType::Word)?.is_match(&self.body)?
+                    } else {
+                        // We specifically ignore empty display names, as otherwise
+                        // they would always match.
+                        false
+                    }
+                } else {
+                    false
+                }
+            }
+            KnownCondition::RoomMemberCount { is } => {
+                if let Some(is) = is {
+                    self.match_member_count(is)?
+                } else {
+                    false
+                }
+            }
+            KnownCondition::SenderNotificationPermission { key } => {
+                if let Some(sender_power_level) = &self.sender_power_level {
+                    let required_level = self
+                        .notification_power_levels
+                        .get(key.as_ref())
+                        .copied()
+                        .unwrap_or(50);
+
+                    *sender_power_level >= required_level
+                } else {
+                    false
+                }
+            }
+            KnownCondition::RelationMatch {
+                rel_type,
+                event_type_pattern,
+                sender,
+                sender_type,
+            } => {
+                self.match_relations(rel_type, sender, sender_type, user_id, event_type_pattern)?
+            }
+        };
+
+        Ok(result)
+    }
+
+    /// Evaluates a relation condition.
+    fn match_relations(
+        &self,
+        rel_type: &str,
+        sender: &Option<Cow<str>>,
+        sender_type: &Option<Cow<str>>,
+        user_id: Option<&str>,
+        event_type_pattern: &Option<Cow<str>>,
+    ) -> Result<bool, Error> {
+        // First check if relation matching is enabled...
+        if !self.relation_match_enabled {
+            return Ok(false);
+        }
+
+        // ... and if there are any relations to match against.
+        let relations = if let Some(relations) = self.relations.get(rel_type) {
+            relations
+        } else {
+            return Ok(false);
+        };
+
+        // Extract the sender pattern from the condition
+        let sender_pattern = if let Some(sender) = sender {
+            Some(sender.as_ref())
+        } else if let Some(sender_type) = sender_type {
+            if sender_type == "user_id" {
+                if let Some(user_id) = user_id {
+                    Some(user_id)
+                } else {
+                    return Ok(false);
+                }
+            } else {
+                warn!("Unrecognized sender_type: {sender_type}");
+                return Ok(false);
+            }
+        } else {
+            None
+        };
+
+        let mut sender_compiled_pattern = if let Some(pattern) = sender_pattern {
+            Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
+        } else {
+            None
+        };
+
+        let mut type_compiled_pattern = if let Some(pattern) = event_type_pattern {
+            Some(get_glob_matcher(pattern, GlobMatchType::Whole)?)
+        } else {
+            None
+        };
+
+        for (relation_sender, event_type) in relations {
+            if let Some(pattern) = &mut sender_compiled_pattern {
+                if !pattern.is_match(relation_sender)? {
+                    continue;
+                }
+            }
+
+            if let Some(pattern) = &mut type_compiled_pattern {
+                if !pattern.is_match(event_type)? {
+                    continue;
+                }
+            }
+
+            return Ok(true);
+        }
+
+        Ok(false)
+    }
+
+    /// Evaluates a `event_match` condition.
+    fn match_event_match(
+        &self,
+        event_match: &EventMatchCondition,
+        user_id: Option<&str>,
+    ) -> Result<bool, Error> {
+        let pattern = if let Some(pattern) = &event_match.pattern {
+            pattern
+        } else if let Some(pattern_type) = &event_match.pattern_type {
+            // The `pattern_type` can either be "user_id" or "user_localpart",
+            // either way if we don't have a `user_id` then the condition can't
+            // match.
+            let user_id = if let Some(user_id) = user_id {
+                user_id
+            } else {
+                return Ok(false);
+            };
+
+            match &**pattern_type {
+                "user_id" => user_id,
+                "user_localpart" => get_localpart_from_id(user_id)?,
+                _ => return Ok(false),
+            }
+        } else {
+            return Ok(false);
+        };
+
+        let haystack = if let Some(haystack) = self.flattened_keys.get(&*event_match.key) {
+            haystack
+        } else {
+            return Ok(false);
+        };
+
+        // For the content.body we match against "words", but for everything
+        // else we match against the entire value.
+        let match_type = if event_match.key == "content.body" {
+            GlobMatchType::Word
+        } else {
+            GlobMatchType::Whole
+        };
+
+        let mut compiled_pattern = get_glob_matcher(pattern, match_type)?;
+        compiled_pattern.is_match(haystack)
+    }
+
+    /// Match the member count against an 'is' condition
+    /// The `is` condition can be things like '>2', '==3' or even just '4'.
+    fn match_member_count(&self, is: &str) -> Result<bool, Error> {
+        let captures = INEQUALITY_EXPR.captures(is).context("bad 'is' clause")?;
+        let ineq = captures.get(1).map_or("==", |m| m.as_str());
+        let rhs: u64 = captures
+            .get(2)
+            .context("missing number")?
+            .as_str()
+            .parse()?;
+
+        let matches = match ineq {
+            "" | "==" => self.room_member_count == rhs,
+            "<" => self.room_member_count < rhs,
+            ">" => self.room_member_count > rhs,
+            ">=" => self.room_member_count >= rhs,
+            "<=" => self.room_member_count <= rhs,
+            _ => false,
+        };
+
+        Ok(matches)
+    }
+}
+
+#[test]
+fn push_rule_evaluator() {
+    let mut flattened_keys = BTreeMap::new();
+    flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string());
+    let evaluator = PushRuleEvaluator::py_new(
+        flattened_keys,
+        10,
+        Some(0),
+        BTreeMap::new(),
+        BTreeMap::new(),
+        true,
+    )
+    .unwrap();
+
+    let result = evaluator.run(&FilteredPushRules::default(), None, Some("bob"));
+    assert_eq!(result.len(), 3);
+}
diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs
index de6764e7c5..30fffc31ad 100644
--- a/rust/src/push/mod.rs
+++ b/rust/src/push/mod.rs
@@ -42,7 +42,6 @@
 //!
 //! The set of "base rules" are the list of rules that every user has by default. A
 //! user can modify their copy of the push rules in one of three ways:
-//!
 //!     1. Adding a new push rule of a certain kind
 //!     2. Changing the actions of a base rule
 //!     3. Enabling/disabling a base rule.
@@ -58,12 +57,16 @@ use std::collections::{BTreeMap, HashMap, HashSet};
 use anyhow::{Context, Error};
 use log::warn;
 use pyo3::prelude::*;
-use pythonize::pythonize;
+use pythonize::{depythonize, pythonize};
 use serde::de::Error as _;
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 
+use self::evaluator::PushRuleEvaluator;
+
 mod base_rules;
+pub mod evaluator;
+pub mod utils;
 
 /// Called when registering modules with python.
 pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
@@ -71,6 +74,7 @@ pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> {
     child_module.add_class::<PushRule>()?;
     child_module.add_class::<PushRules>()?;
     child_module.add_class::<FilteredPushRules>()?;
+    child_module.add_class::<PushRuleEvaluator>()?;
     child_module.add_function(wrap_pyfunction!(get_base_rule_ids, m)?)?;
 
     m.add_submodule(child_module)?;
@@ -274,6 +278,8 @@ pub enum KnownCondition {
     #[serde(rename = "org.matrix.msc3772.relation_match")]
     RelationMatch {
         rel_type: Cow<'static, str>,
+        #[serde(skip_serializing_if = "Option::is_none", rename = "type")]
+        event_type_pattern: Option<Cow<'static, str>>,
         #[serde(skip_serializing_if = "Option::is_none")]
         sender: Option<Cow<'static, str>>,
         #[serde(skip_serializing_if = "Option::is_none")]
@@ -287,20 +293,26 @@ impl IntoPy<PyObject> for Condition {
     }
 }
 
+impl<'source> FromPyObject<'source> for Condition {
+    fn extract(ob: &'source PyAny) -> PyResult<Self> {
+        Ok(depythonize(ob)?)
+    }
+}
+
 /// The body of a [`Condition::EventMatch`]
 #[derive(Serialize, Deserialize, Debug, Clone)]
 pub struct EventMatchCondition {
-    key: Cow<'static, str>,
+    pub key: Cow<'static, str>,
     #[serde(skip_serializing_if = "Option::is_none")]
-    pattern: Option<Cow<'static, str>>,
+    pub pattern: Option<Cow<'static, str>>,
     #[serde(skip_serializing_if = "Option::is_none")]
-    pattern_type: Option<Cow<'static, str>>,
+    pub pattern_type: Option<Cow<'static, str>>,
 }
 
 /// The collection of push rules for a user.
 #[derive(Debug, Clone, Default)]
 #[pyclass(frozen)]
-struct PushRules {
+pub struct PushRules {
     /// Custom push rules that override a base rule.
     overridden_base_rules: HashMap<Cow<'static, str>, PushRule>,
 
@@ -319,7 +331,7 @@ struct PushRules {
 #[pymethods]
 impl PushRules {
     #[new]
-    fn new(rules: Vec<PushRule>) -> PushRules {
+    pub fn new(rules: Vec<PushRule>) -> PushRules {
         let mut push_rules: PushRules = Default::default();
 
         for rule in rules {
@@ -396,7 +408,7 @@ pub struct FilteredPushRules {
 #[pymethods]
 impl FilteredPushRules {
     #[new]
-    fn py_new(
+    pub fn py_new(
         push_rules: PushRules,
         enabled_map: BTreeMap<String, bool>,
         msc3786_enabled: bool,
diff --git a/rust/src/push/utils.rs b/rust/src/push/utils.rs
new file mode 100644
index 0000000000..8759340473
--- /dev/null
+++ b/rust/src/push/utils.rs
@@ -0,0 +1,215 @@
+// Copyright 2022 The Matrix.org Foundation C.I.C.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use anyhow::bail;
+use anyhow::Context;
+use anyhow::Error;
+use lazy_static::lazy_static;
+use regex;
+use regex::Regex;
+use regex::RegexBuilder;
+
+lazy_static! {
+    /// Matches runs of non-wildcard characters followed by wildcard characters.
+    static ref WILDCARD_RUN: Regex = Regex::new(r"([^\?\*]*)([\?\*]*)").expect("valid regex");
+}
+
+/// Extract the localpart from a Matrix style ID
+pub(crate) fn get_localpart_from_id(id: &str) -> Result<&str, Error> {
+    let (localpart, _) = id
+        .split_once(':')
+        .with_context(|| format!("ID does not contain colon: {id}"))?;
+
+    // We need to strip off the first character, which is the ID type.
+    if localpart.is_empty() {
+        bail!("Invalid ID {id}");
+    }
+
+    Ok(&localpart[1..])
+}
+
+/// Used by `glob_to_regex` to specify what to match the regex against.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum GlobMatchType {
+    /// The generated regex will match against the entire input.
+    Whole,
+    /// The generated regex will match against words.
+    Word,
+}
+
+/// Convert a "glob" style expression to a regex, anchoring either to the entire
+/// input or to individual words.
+pub fn glob_to_regex(glob: &str, match_type: GlobMatchType) -> Result<Regex, Error> {
+    let mut chunks = Vec::new();
+
+    // Patterns with wildcards must be simplified to avoid performance cliffs
+    // - The glob `?**?**?` is equivalent to the glob `???*`
+    // - The glob `???*` is equivalent to the regex `.{3,}`
+    for captures in WILDCARD_RUN.captures_iter(glob) {
+        if let Some(chunk) = captures.get(1) {
+            chunks.push(regex::escape(chunk.as_str()));
+        }
+
+        if let Some(wildcards) = captures.get(2) {
+            if wildcards.as_str() == "" {
+                continue;
+            }
+
+            let question_marks = wildcards.as_str().chars().filter(|c| *c == '?').count();
+
+            if wildcards.as_str().contains('*') {
+                chunks.push(format!(".{{{question_marks},}}"));
+            } else {
+                chunks.push(format!(".{{{question_marks}}}"));
+            }
+        }
+    }
+
+    let joined = chunks.join("");
+
+    let regex_str = match match_type {
+        GlobMatchType::Whole => format!(r"\A{joined}\z"),
+
+        // `^|\W` and `\W|$` handle the case where `pattern` starts or ends with a non-word
+        // character.
+        GlobMatchType::Word => format!(r"(?:^|\b|\W){joined}(?:\b|\W|$)"),
+    };
+
+    Ok(RegexBuilder::new(&regex_str)
+        .case_insensitive(true)
+        .build()?)
+}
+
+/// Compiles the glob into a `Matcher`.
+pub fn get_glob_matcher(glob: &str, match_type: GlobMatchType) -> Result<Matcher, Error> {
+    // There are a number of shortcuts we can make if the glob doesn't contain a
+    // wild card.
+    let matcher = if glob.contains(['*', '?']) {
+        let regex = glob_to_regex(glob, match_type)?;
+        Matcher::Regex(regex)
+    } else if match_type == GlobMatchType::Whole {
+        // If there aren't any wildcards and we're matching the whole thing,
+        // then we simply can do a case-insensitive string match.
+        Matcher::Whole(glob.to_lowercase())
+    } else {
+        // Otherwise, if we're matching against words then can first check
+        // if the haystack contains the glob at all.
+        Matcher::Word {
+            word: glob.to_lowercase(),
+            regex: None,
+        }
+    };
+
+    Ok(matcher)
+}
+
+/// Matches against a glob
+pub enum Matcher {
+    /// Plain regex matching.
+    Regex(Regex),
+
+    /// Case-insensitive equality.
+    Whole(String),
+
+    /// Word matching. `regex` is a cache of calling [`glob_to_regex`] on word.
+    Word { word: String, regex: Option<Regex> },
+}
+
+impl Matcher {
+    /// Checks if the glob matches the given haystack.
+    pub fn is_match(&mut self, haystack: &str) -> Result<bool, Error> {
+        // We want to to do case-insensitive matching, so we convert to
+        // lowercase first.
+        let haystack = haystack.to_lowercase();
+
+        match self {
+            Matcher::Regex(regex) => Ok(regex.is_match(&haystack)),
+            Matcher::Whole(whole) => Ok(whole == &haystack),
+            Matcher::Word { word, regex } => {
+                // If we're looking for a literal word, then we first check if
+                // the haystack contains the word as a substring.
+                if !haystack.contains(&*word) {
+                    return Ok(false);
+                }
+
+                // If it does contain the word as a substring, then we need to
+                // check if it is an actual word by testing it against the regex.
+                let regex = if let Some(regex) = regex {
+                    regex
+                } else {
+                    let compiled_regex = glob_to_regex(word, GlobMatchType::Word)?;
+                    regex.insert(compiled_regex)
+                };
+
+                Ok(regex.is_match(&haystack))
+            }
+        }
+    }
+}
+
+#[test]
+fn test_get_domain_from_id() {
+    get_localpart_from_id("").unwrap_err();
+    get_localpart_from_id(":").unwrap_err();
+    get_localpart_from_id(":asd").unwrap_err();
+    get_localpart_from_id("::as::asad").unwrap_err();
+
+    assert_eq!(get_localpart_from_id("@test:foo").unwrap(), "test");
+    assert_eq!(get_localpart_from_id("@:").unwrap(), "");
+    assert_eq!(get_localpart_from_id("@test:foo:907").unwrap(), "test");
+}
+
+#[test]
+fn tset_glob() -> Result<(), Error> {
+    assert_eq!(
+        glob_to_regex("simple", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple\z"
+    );
+    assert_eq!(
+        glob_to_regex("simple*", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple.{0,}\z"
+    );
+    assert_eq!(
+        glob_to_regex("simple?", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple.{1}\z"
+    );
+    assert_eq!(
+        glob_to_regex("simple?*?*", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple.{2,}\z"
+    );
+    assert_eq!(
+        glob_to_regex("simple???", GlobMatchType::Whole)?.as_str(),
+        r"\Asimple.{3}\z"
+    );
+
+    assert_eq!(
+        glob_to_regex("escape.", GlobMatchType::Whole)?.as_str(),
+        r"\Aescape\.\z"
+    );
+
+    assert!(glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simple"));
+    assert!(!glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simples"));
+    assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simples"));
+    assert!(glob_to_regex("simple?", GlobMatchType::Whole)?.is_match("simples"));
+    assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simple"));
+
+    assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("some simple."));
+    assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("simple"));
+    assert!(!glob_to_regex("simple", GlobMatchType::Word)?.is_match("simples"));
+
+    assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("Some @user:foo test"));
+    assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("@user:foo"));
+
+    Ok(())
+}