From 3da645032722fbf09c1e5efbc51d8c5c78d8a2cd Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 28 Nov 2022 16:29:53 -0700 Subject: Initial support for MSC3931: Room version push rule feature flags (#14520) * Add support for MSC3931: Room Version Supports push rule condition * Create experimental flag for future work, and use it to gate MSC3931 * Changelog entry --- synapse/push/bulk_push_rule_evaluator.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'synapse/push') diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 75b7e126ca..9cc3da6d91 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -338,6 +338,10 @@ class BulkPushRuleEvaluator: for user_id, level in notification_levels.items(): notification_levels[user_id] = int(level) + room_version_features = event.room_version.msc3931_push_features + if not room_version_features: + room_version_features = [] + evaluator = PushRuleEvaluator( _flatten_dict(event), room_member_count, @@ -345,6 +349,8 @@ class BulkPushRuleEvaluator: notification_levels, related_events, self._related_event_match_enabled, + room_version_features, + self.hs.config.experimental.msc1767_enabled, # MSC3931 flag ) users = rules_by_user.keys() -- cgit 1.5.1 From 9ccc09fe9e332a71b8cf5bf42b16f6acf5a6887d Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Mon, 28 Nov 2022 18:02:41 -0700 Subject: Support MSC1767's `content.body` behaviour; Add base rules from MSC3933 (#14524) * Support MSC1767's `content.body` behaviour in push rules * Add the base rules from MSC3933 * Changelog entry * Flip condition around for finding `m.markup` * Remove forgotten import --- changelog.d/14524.feature | 1 + rust/src/push/base_rules.rs | 270 ++++++++++++++++++++++++++++ rust/src/push/evaluator.rs | 2 +- rust/src/push/mod.rs | 7 + stubs/synapse/synapse_rust/push.pyi | 6 +- synapse/push/bulk_push_rule_evaluator.py | 29 ++- synapse/storage/databases/main/push_rule.py | 5 +- 7 files changed, 316 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14524.feature (limited to 'synapse/push') diff --git a/changelog.d/14524.feature b/changelog.d/14524.feature new file mode 100644 index 0000000000..210acaa8ee --- /dev/null +++ b/changelog.d/14524.feature @@ -0,0 +1 @@ +Add unstable support for an Extensible Events room version (`org.matrix.msc1767.10`) via [MSC1767](https://github.com/matrix-org/matrix-spec-proposals/pull/1767), [MSC3931](https://github.com/matrix-org/matrix-spec-proposals/pull/3931), [MSC3932](https://github.com/matrix-org/matrix-spec-proposals/pull/3932), and [MSC3933](https://github.com/matrix-org/matrix-spec-proposals/pull/3933). \ No newline at end of file diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs index 49802fa4eb..35129691ca 100644 --- a/rust/src/push/base_rules.rs +++ b/rust/src/push/base_rules.rs @@ -274,6 +274,156 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ default: true, default_enabled: true, }, + PushRule { + rule_id: Cow::Borrowed( + "global/underride/.org.matrix.msc3933.rule.extensible.encrypted_room_one_to_one", + ), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("org.matrix.msc1767.encrypted")), + pattern_type: None, + })), + Condition::Known(KnownCondition::RoomMemberCount { + is: Some(Cow::Borrowed("2")), + }), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed( + "global/underride/.org.matrix.msc3933.rule.extensible.message.room_one_to_one", + ), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("org.matrix.msc1767.message")), + pattern_type: None, + })), + Condition::Known(KnownCondition::RoomMemberCount { + is: Some(Cow::Borrowed("2")), + }), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed( + "global/underride/.org.matrix.msc3933.rule.extensible.file.room_one_to_one", + ), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("org.matrix.msc1767.file")), + pattern_type: None, + })), + Condition::Known(KnownCondition::RoomMemberCount { + is: Some(Cow::Borrowed("2")), + }), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed( + "global/underride/.org.matrix.msc3933.rule.extensible.image.room_one_to_one", + ), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("org.matrix.msc1767.image")), + pattern_type: None, + })), + Condition::Known(KnownCondition::RoomMemberCount { + is: Some(Cow::Borrowed("2")), + }), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed( + "global/underride/.org.matrix.msc3933.rule.extensible.video.room_one_to_one", + ), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("org.matrix.msc1767.video")), + pattern_type: None, + })), + Condition::Known(KnownCondition::RoomMemberCount { + is: Some(Cow::Borrowed("2")), + }), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed( + "global/underride/.org.matrix.msc3933.rule.extensible.audio.room_one_to_one", + ), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("org.matrix.msc1767.audio")), + pattern_type: None, + })), + Condition::Known(KnownCondition::RoomMemberCount { + is: Some(Cow::Borrowed("2")), + }), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, PushRule { rule_id: Cow::Borrowed("global/underride/.m.rule.message"), priority_class: 1, @@ -302,6 +452,126 @@ pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ default: true, default_enabled: true, }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.encrypted"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("m.encrypted")), + pattern_type: None, + })), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.message"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("m.message")), + pattern_type: None, + })), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.file"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("m.file")), + pattern_type: None, + })), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.image"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("m.image")), + pattern_type: None, + })), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.video"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("m.video")), + pattern_type: None, + })), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.org.matrix.msc1767.rule.extensible.audio"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + // MSC3933: Type changed from template rule - see MSC. + pattern: Some(Cow::Borrowed("m.audio")), + pattern_type: None, + })), + // MSC3933: Add condition on top of template rule - see MSC. + Condition::Known(KnownCondition::RoomVersionSupports { + // RoomVersionFeatures::ExtensibleEvents.as_str(), ideally + feature: Cow::Borrowed("org.matrix.msc3932.extensible_events"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, PushRule { rule_id: Cow::Borrowed("global/underride/.im.vector.jitsi"), priority_class: 1, diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index b4c3039aba..1cd54f7e2c 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -483,7 +483,7 @@ fn test_requires_room_version_supports_condition() { }; let rules = PushRules::new(vec![custom_rule]); result = evaluator.run( - &FilteredPushRules::py_new(rules, BTreeMap::new(), true), + &FilteredPushRules::py_new(rules, BTreeMap::new(), true, true), None, None, ); diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index eef39f6472..2e9d3e38a1 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -412,6 +412,7 @@ pub struct FilteredPushRules { push_rules: PushRules, enabled_map: BTreeMap, msc3664_enabled: bool, + msc1767_enabled: bool, } #[pymethods] @@ -421,11 +422,13 @@ impl FilteredPushRules { push_rules: PushRules, enabled_map: BTreeMap, msc3664_enabled: bool, + msc1767_enabled: bool, ) -> Self { Self { push_rules, enabled_map, msc3664_enabled, + msc1767_enabled, } } @@ -450,6 +453,10 @@ impl FilteredPushRules { return false; } + if !self.msc1767_enabled && rule.rule_id.contains("org.matrix.msc1767") { + return false; + } + true }) .map(|r| { diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index cbeb49663c..a6a586a0b5 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -26,7 +26,11 @@ class PushRules: class FilteredPushRules: def __init__( - self, push_rules: PushRules, enabled_map: Dict[str, bool], msc3664_enabled: bool + self, + push_rules: PushRules, + enabled_map: Dict[str, bool], + msc3664_enabled: bool, + msc1767_enabled: bool, ): ... def rules(self) -> Collection[Tuple[PushRule, bool]]: ... diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 9cc3da6d91..d6b377860f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -29,6 +29,7 @@ from typing import ( from prometheus_client import Counter from synapse.api.constants import MAIN_TIMELINE, EventTypes, Membership, RelationTypes +from synapse.api.room_versions import PushRuleRoomFlag, RoomVersion from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -343,7 +344,7 @@ class BulkPushRuleEvaluator: room_version_features = [] evaluator = PushRuleEvaluator( - _flatten_dict(event), + _flatten_dict(event, room_version=event.room_version), room_member_count, sender_power_level, notification_levels, @@ -426,6 +427,7 @@ StateGroup = Union[object, int] def _flatten_dict( d: Union[EventBase, Mapping[str, Any]], + room_version: Optional[RoomVersion] = None, prefix: Optional[List[str]] = None, result: Optional[Dict[str, str]] = None, ) -> Dict[str, str]: @@ -437,6 +439,31 @@ def _flatten_dict( if isinstance(value, str): result[".".join(prefix + [key])] = value.lower() elif isinstance(value, Mapping): + # do not set `room_version` due to recursion considerations below _flatten_dict(value, prefix=(prefix + [key]), result=result) + # `room_version` should only ever be set when looking at the top level of an event + if ( + room_version is not None + and PushRuleRoomFlag.EXTENSIBLE_EVENTS in room_version.msc3931_push_features + and isinstance(d, EventBase) + ): + # Room supports extensible events: replace `content.body` with the plain text + # representation from `m.markup`, as per MSC1767. + markup = d.get("content").get("m.markup") + if room_version.identifier.startswith("org.matrix.msc1767."): + markup = d.get("content").get("org.matrix.msc1767.markup") + if markup is not None and isinstance(markup, list): + text = "" + for rep in markup: + if not isinstance(rep, dict): + # invalid markup - skip all processing + break + if rep.get("mimetype", "text/plain") == "text/plain": + rep_text = rep.get("body") + if rep_text is not None and isinstance(rep_text, str): + text = rep_text.lower() + break + result["content.body"] = text + return result diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 12ad44dbb3..d4c64c46ad 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -84,7 +84,10 @@ def _load_rules( push_rules = PushRules(ruleslist) filtered_rules = FilteredPushRules( - push_rules, enabled_map, msc3664_enabled=experimental_config.msc3664_enabled + push_rules, + enabled_map, + msc3664_enabled=experimental_config.msc3664_enabled, + msc1767_enabled=experimental_config.msc1767_enabled, ) return filtered_rules -- cgit 1.5.1 From e8bce8999f21d30affc459755e304a1f4732165c Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Wed, 30 Nov 2022 13:45:06 +0000 Subject: Aggregate unread notif count query for badge count calculation (#14255) Fetch the unread notification counts used by the badge counts in push notifications for all rooms at once (instead of fetching them per room). --- changelog.d/14255.misc | 1 + synapse/push/push_tools.py | 28 ++-- .../storage/databases/main/event_push_actions.py | 149 +++++++++++++++++++++ tests/storage/test_event_push_actions.py | 47 +++++-- 4 files changed, 198 insertions(+), 27 deletions(-) create mode 100644 changelog.d/14255.misc (limited to 'synapse/push') diff --git a/changelog.d/14255.misc b/changelog.d/14255.misc new file mode 100644 index 0000000000..39924659c7 --- /dev/null +++ b/changelog.d/14255.misc @@ -0,0 +1 @@ +Optimise push badge count calculations. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py index edeba27a45..7ee07e4bee 100644 --- a/synapse/push/push_tools.py +++ b/synapse/push/push_tools.py @@ -17,7 +17,6 @@ from synapse.events import EventBase from synapse.push.presentable_names import calculate_room_name, name_from_member_event from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore -from synapse.util.async_helpers import concurrently_execute async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) -> int: @@ -26,23 +25,12 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - badge = len(invites) - room_notifs = [] - - async def get_room_unread_count(room_id: str) -> None: - room_notifs.append( - await store.get_unread_event_push_actions_by_room_for_user( - room_id, - user_id, - ) - ) - - await concurrently_execute(get_room_unread_count, joins, 10) - - for notifs in room_notifs: - # Combine the counts from all the threads. - notify_count = notifs.main_timeline.notify_count + sum( - n.notify_count for n in notifs.threads.values() - ) + room_to_count = await store.get_unread_counts_by_room_for_user(user_id) + for room_id, notify_count in room_to_count.items(): + # room_to_count may include rooms which the user has left, + # ignore those. + if room_id not in joins: + continue if notify_count == 0: continue @@ -51,8 +39,10 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - # return one badge count per conversation badge += 1 else: - # increment the badge count by the number of unread messages in the room + # Increase badge by number of notifications in room + # NOTE: this includes threaded and unthreaded notifications. badge += notify_count + return badge diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index b283ab0f9c..7ebe34f773 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -74,6 +74,7 @@ receipt. """ import logging +from collections import defaultdict from typing import ( TYPE_CHECKING, Collection, @@ -95,6 +96,7 @@ from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, LoggingTransaction, + PostgresEngine, ) from synapse.storage.databases.main.receipts import ReceiptsWorkerStore from synapse.storage.databases.main.stream import StreamWorkerStore @@ -463,6 +465,153 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas return result + async def get_unread_counts_by_room_for_user(self, user_id: str) -> Dict[str, int]: + """Get the notification count by room for a user. Only considers notifications, + not highlight or unread counts, and threads are currently aggregated under their room. + + This function is intentionally not cached because it is called to calculate the + unread badge for push notifications and thus the result is expected to change. + + Note that this function assumes the user is a member of the room. Because + summary rows are not removed when a user leaves a room, the caller must + filter out those results from the result. + + Returns: + A map of room ID to notification counts for the given user. + """ + return await self.db_pool.runInteraction( + "get_unread_counts_by_room_for_user", + self._get_unread_counts_by_room_for_user_txn, + user_id, + ) + + def _get_unread_counts_by_room_for_user_txn( + self, txn: LoggingTransaction, user_id: str + ) -> Dict[str, int]: + receipt_types_clause, args = make_in_list_sql_clause( + self.database_engine, + "receipt_type", + (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE), + ) + args.extend([user_id, user_id]) + + receipts_cte = f""" + WITH all_receipts AS ( + SELECT room_id, thread_id, MAX(event_stream_ordering) AS max_receipt_stream_ordering + FROM receipts_linearized + LEFT JOIN events USING (room_id, event_id) + WHERE + {receipt_types_clause} + AND user_id = ? + GROUP BY room_id, thread_id + ) + """ + + receipts_joins = """ + LEFT JOIN ( + SELECT room_id, thread_id, + max_receipt_stream_ordering AS threaded_receipt_stream_ordering + FROM all_receipts + WHERE thread_id IS NOT NULL + ) AS threaded_receipts USING (room_id, thread_id) + LEFT JOIN ( + SELECT room_id, thread_id, + max_receipt_stream_ordering AS unthreaded_receipt_stream_ordering + FROM all_receipts + WHERE thread_id IS NULL + ) AS unthreaded_receipts USING (room_id) + """ + + # First get summary counts by room / thread for the user. We use the max receipt + # stream ordering of both threaded & unthreaded receipts to compare against the + # summary table. + # + # PostgreSQL and SQLite differ in comparing scalar numerics. + if isinstance(self.database_engine, PostgresEngine): + # GREATEST ignores NULLs. + max_clause = """GREATEST( + threaded_receipt_stream_ordering, + unthreaded_receipt_stream_ordering + )""" + else: + # MAX returns NULL if any are NULL, so COALESCE to 0 first. + max_clause = """MAX( + COALESCE(threaded_receipt_stream_ordering, 0), + COALESCE(unthreaded_receipt_stream_ordering, 0) + )""" + + sql = f""" + {receipts_cte} + SELECT eps.room_id, eps.thread_id, notif_count + FROM event_push_summary AS eps + {receipts_joins} + WHERE user_id = ? + AND notif_count != 0 + AND ( + (last_receipt_stream_ordering IS NULL AND stream_ordering > {max_clause}) + OR last_receipt_stream_ordering = {max_clause} + ) + """ + txn.execute(sql, args) + + seen_thread_ids = set() + room_to_count: Dict[str, int] = defaultdict(int) + + for room_id, thread_id, notif_count in txn: + room_to_count[room_id] += notif_count + seen_thread_ids.add(thread_id) + + # Now get any event push actions that haven't been rotated using the same OR + # join and filter by receipt and event push summary rotated up to stream ordering. + sql = f""" + {receipts_cte} + SELECT epa.room_id, epa.thread_id, COUNT(CASE WHEN epa.notif = 1 THEN 1 END) AS notif_count + FROM event_push_actions AS epa + {receipts_joins} + WHERE user_id = ? + AND epa.notif = 1 + AND stream_ordering > (SELECT stream_ordering FROM event_push_summary_stream_ordering) + AND (threaded_receipt_stream_ordering IS NULL OR stream_ordering > threaded_receipt_stream_ordering) + AND (unthreaded_receipt_stream_ordering IS NULL OR stream_ordering > unthreaded_receipt_stream_ordering) + GROUP BY epa.room_id, epa.thread_id + """ + txn.execute(sql, args) + + for room_id, thread_id, notif_count in txn: + # Note: only count push actions we have valid summaries for with up to date receipt. + if thread_id not in seen_thread_ids: + continue + room_to_count[room_id] += notif_count + + thread_id_clause, thread_ids_args = make_in_list_sql_clause( + self.database_engine, "epa.thread_id", seen_thread_ids + ) + + # Finally re-check event_push_actions for any rooms not in the summary, ignoring + # the rotated up-to position. This handles the case where a read receipt has arrived + # but not been rotated meaning the summary table is out of date, so we go back to + # the push actions table. + sql = f""" + {receipts_cte} + SELECT epa.room_id, COUNT(CASE WHEN epa.notif = 1 THEN 1 END) AS notif_count + FROM event_push_actions AS epa + {receipts_joins} + WHERE user_id = ? + AND NOT {thread_id_clause} + AND epa.notif = 1 + AND (threaded_receipt_stream_ordering IS NULL OR stream_ordering > threaded_receipt_stream_ordering) + AND (unthreaded_receipt_stream_ordering IS NULL OR stream_ordering > unthreaded_receipt_stream_ordering) + GROUP BY epa.room_id + """ + + args.extend(thread_ids_args) + txn.execute(sql, args) + + for room_id, notif_count in txn: + room_to_count[room_id] += notif_count + + return room_to_count + @cached(tree=True, max_entries=5000, iterable=True) async def get_unread_event_push_actions_by_room_for_user( self, diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index ee48920f84..5fa8bd2d98 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -156,7 +156,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): last_event_id: str - def _assert_counts(noitf_count: int, highlight_count: int) -> None: + def _assert_counts(notif_count: int, highlight_count: int) -> None: counts = self.get_success( self.store.db_pool.runInteraction( "get-unread-counts", @@ -168,13 +168,22 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual( counts.main_timeline, NotifCounts( - notify_count=noitf_count, + notify_count=notif_count, unread_count=0, highlight_count=highlight_count, ), ) self.assertEqual(counts.threads, {}) + aggregate_counts = self.get_success( + self.store.db_pool.runInteraction( + "get-aggregate-unread-counts", + self.store._get_unread_counts_by_room_for_user_txn, + user_id, + ) + ) + self.assertEqual(aggregate_counts[room_id], notif_count) + def _create_event(highlight: bool = False) -> str: result = self.helper.send_event( room_id, @@ -283,7 +292,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): last_event_id: str def _assert_counts( - noitf_count: int, + notif_count: int, highlight_count: int, thread_notif_count: int, thread_highlight_count: int, @@ -299,7 +308,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual( counts.main_timeline, NotifCounts( - notify_count=noitf_count, + notify_count=notif_count, unread_count=0, highlight_count=highlight_count, ), @@ -318,6 +327,17 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): else: self.assertEqual(counts.threads, {}) + aggregate_counts = self.get_success( + self.store.db_pool.runInteraction( + "get-aggregate-unread-counts", + self.store._get_unread_counts_by_room_for_user_txn, + user_id, + ) + ) + self.assertEqual( + aggregate_counts[room_id], notif_count + thread_notif_count + ) + def _create_event( highlight: bool = False, thread_id: Optional[str] = None ) -> str: @@ -454,7 +474,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): last_event_id: str def _assert_counts( - noitf_count: int, + notif_count: int, highlight_count: int, thread_notif_count: int, thread_highlight_count: int, @@ -470,7 +490,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual( counts.main_timeline, NotifCounts( - notify_count=noitf_count, + notify_count=notif_count, unread_count=0, highlight_count=highlight_count, ), @@ -489,6 +509,17 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): else: self.assertEqual(counts.threads, {}) + aggregate_counts = self.get_success( + self.store.db_pool.runInteraction( + "get-aggregate-unread-counts", + self.store._get_unread_counts_by_room_for_user_txn, + user_id, + ) + ) + self.assertEqual( + aggregate_counts[room_id], notif_count + thread_notif_count + ) + def _create_event( highlight: bool = False, thread_id: Optional[str] = None ) -> str: @@ -646,7 +677,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): ) return result["event_id"] - def _assert_counts(noitf_count: int, thread_notif_count: int) -> None: + def _assert_counts(notif_count: int, thread_notif_count: int) -> None: counts = self.get_success( self.store.db_pool.runInteraction( "get-unread-counts", @@ -658,7 +689,7 @@ class EventPushActionsStoreTestCase(HomeserverTestCase): self.assertEqual( counts.main_timeline, NotifCounts( - notify_count=noitf_count, unread_count=0, highlight_count=0 + notify_count=notif_count, unread_count=0, highlight_count=0 ), ) if thread_notif_count: -- cgit 1.5.1 From 71f3e53ad010ba8c219f1076d40915b985760ed9 Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Thu, 1 Dec 2022 13:46:24 +0000 Subject: Add `push.enabled` option to disable push notification calculation (#14551) * Add initial option * changelog * Some more linting --- changelog.d/14551.feature | 1 + docs/usage/configuration/config_documentation.md | 5 +++ synapse/config/push.py | 1 + synapse/push/bulk_push_rule_evaluator.py | 3 ++ tests/push/test_bulk_push_rule_evaluator.py | 45 ++++++++++++++++++++++-- 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 changelog.d/14551.feature (limited to 'synapse/push') diff --git a/changelog.d/14551.feature b/changelog.d/14551.feature new file mode 100644 index 0000000000..43b91d2e57 --- /dev/null +++ b/changelog.d/14551.feature @@ -0,0 +1 @@ +Add new `push.enabled` config option to allow opting out of push notification calculation. \ No newline at end of file diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 749af12aac..b9bde8f47e 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3355,6 +3355,10 @@ Configuration settings related to push notifications This setting defines options for push notifications. This option has a number of sub-options. They are as follows: +* `enable_push`: Enables or disables push notification calculation. Note, disabling this will also + stop unread counts being calculated for rooms. This mode of operation is intended + for homeservers which may only have bots or appservice users connected, or are otherwise + not interested in push/unread counters. This is enabled by default. * `include_content`: Clients requesting push notifications can either have the body of the message sent in the notification poke along with other details like the sender, or just the event ID and room ID (`event_id_only`). @@ -3375,6 +3379,7 @@ This option has a number of sub-options. They are as follows: Example configuration: ```yaml push: + enable_push: true include_content: false group_unread_count_by_room: false ``` diff --git a/synapse/config/push.py b/synapse/config/push.py index 979b128eae..3b5378e6ea 100644 --- a/synapse/config/push.py +++ b/synapse/config/push.py @@ -26,6 +26,7 @@ class PushConfig(Config): def read_config(self, config: JsonDict, **kwargs: Any) -> None: push_config = config.get("push") or {} self.push_include_content = push_config.get("include_content", True) + self.enable_push = push_config.get("enabled", True) self.push_group_unread_count_by_room = push_config.get( "group_unread_count_by_room", True ) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index d6b377860f..9ed35d8461 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -106,6 +106,7 @@ class BulkPushRuleEvaluator: self.store = hs.get_datastores().main self.clock = hs.get_clock() self._event_auth_handler = hs.get_event_auth_handler() + self.should_calculate_push_rules = self.hs.config.push.enable_push self._related_event_match_enabled = self.hs.config.experimental.msc3664_enabled @@ -269,6 +270,8 @@ class BulkPushRuleEvaluator: for each event, check if the message should increment the unread count, and insert the results into the event_push_actions_staging table. """ + if not self.should_calculate_push_rules: + return # For batched events the power level events may not have been persisted yet, # so we pass in the batched events. Thus if the event cannot be found in the # database we can check in the batch. diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 594e7937a8..1cd453248e 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -6,10 +6,11 @@ from synapse.rest import admin from synapse.rest.client import login, register, room from synapse.types import create_requester -from tests import unittest +from tests.test_utils import simple_async_mock +from tests.unittest import HomeserverTestCase, override_config -class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase): +class TestBulkPushRuleEvaluator(HomeserverTestCase): servlets = [ admin.register_servlets_for_client_rest_resource, @@ -72,3 +73,43 @@ class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase): bulk_evaluator = BulkPushRuleEvaluator(self.hs) # should not raise self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) + + @override_config({"push": {"enabled": False}}) + def test_action_for_event_by_user_disabled_by_config(self) -> None: + """Ensure that push rules are not calculated when disabled in the config""" + # Create a new user and room. + alice = self.register_user("alice", "pass") + token = self.login(alice, "pass") + + room_id = self.helper.create_room_as( + alice, room_version=RoomVersions.V9.identifier, tok=token + ) + + # Alter the power levels in that room to include stringy and floaty levels. + # We need to suppress the validation logic or else it will reject these dodgy + # values. (Presumably this validation was not always present.) + event_creation_handler = self.hs.get_event_creation_handler() + requester = create_requester(alice) + + # Create a new message event, and try to evaluate it under the dodgy + # power level event. + event, context = self.get_success( + event_creation_handler.create_event( + requester, + { + "type": "m.room.message", + "room_id": room_id, + "content": { + "msgtype": "m.text", + "body": "helo", + }, + "sender": alice, + }, + ) + ) + + bulk_evaluator = BulkPushRuleEvaluator(self.hs) + bulk_evaluator._action_for_event_by_user = simple_async_mock() # type: ignore[assignment] + # should not raise + self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) + bulk_evaluator._action_for_event_by_user.assert_not_called() -- cgit 1.5.1 From b5b5f6608462a988b05502a3b70b6a57ca3846d2 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 12 Dec 2022 16:19:30 +0000 Subject: Move `StateFilter` to `synapse.types` (#14668) * Move `StateFilter` to `synapse.types` * Changelog --- changelog.d/14668.misc | 1 + synapse/events/builder.py | 2 +- synapse/events/snapshot.py | 2 +- synapse/handlers/federation.py | 2 +- synapse/handlers/federation_event.py | 2 +- synapse/handlers/message.py | 2 +- synapse/handlers/pagination.py | 2 +- synapse/handlers/register.py | 2 +- synapse/handlers/room.py | 2 +- synapse/handlers/room_member.py | 2 +- synapse/handlers/search.py | 2 +- synapse/handlers/sync.py | 2 +- synapse/module_api/__init__.py | 2 +- synapse/push/bulk_push_rule_evaluator.py | 2 +- synapse/push/mailer.py | 2 +- synapse/rest/admin/rooms.py | 2 +- synapse/rest/client/room.py | 2 +- synapse/state/__init__.py | 2 +- synapse/storage/controllers/persist_events.py | 2 +- synapse/storage/controllers/state.py | 2 +- synapse/storage/databases/main/state.py | 2 +- synapse/storage/databases/state/bg_updates.py | 2 +- synapse/storage/databases/state/store.py | 2 +- synapse/storage/state.py | 567 ---------------- synapse/types.py | 928 -------------------------- synapse/types/__init__.py | 928 ++++++++++++++++++++++++++ synapse/types/state.py | 567 ++++++++++++++++ synapse/visibility.py | 2 +- tests/storage/test_state.py | 2 +- 29 files changed, 1520 insertions(+), 1519 deletions(-) create mode 100644 changelog.d/14668.misc delete mode 100644 synapse/storage/state.py delete mode 100644 synapse/types.py create mode 100644 synapse/types/__init__.py create mode 100644 synapse/types/state.py (limited to 'synapse/push') diff --git a/changelog.d/14668.misc b/changelog.d/14668.misc new file mode 100644 index 0000000000..5269d8a97d --- /dev/null +++ b/changelog.d/14668.misc @@ -0,0 +1 @@ +Move `StateFilter` to `synapse.types`. diff --git a/synapse/events/builder.py b/synapse/events/builder.py index d62906043f..94dd1298e1 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -28,8 +28,8 @@ from synapse.event_auth import auth_types_for_event from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict from synapse.state import StateHandler from synapse.storage.databases.main import DataStore -from synapse.storage.state import StateFilter from synapse.types import EventID, JsonDict +from synapse.types.state import StateFilter from synapse.util import Clock from synapse.util.stringutils import random_string diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 1c0e96bec7..6eaef8b57a 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -23,7 +23,7 @@ from synapse.types import JsonDict, StateMap if TYPE_CHECKING: from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore - from synapse.storage.state import StateFilter + from synapse.types.state import StateFilter @attr.s(slots=True, auto_attribs=True) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3398fcaf7d..b2784d7333 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -70,8 +70,8 @@ from synapse.replication.http.federation import ( ) from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import JsonDict, get_domain_from_id +from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination from synapse.visibility import filter_events_for_server diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f7223b03c3..d2facdab60 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -75,7 +75,6 @@ from synapse.replication.http.federation import ( from synapse.state import StateResolutionStore from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import ( PersistedEventPosition, RoomStreamToken, @@ -83,6 +82,7 @@ from synapse.types import ( UserID, get_domain_from_id, ) +from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer, concurrently_execute from synapse.util.iterutils import batch_iter from synapse.util.retryutils import NotRetryingDestination diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5cbe89f4fd..d6e90ef259 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -59,7 +59,6 @@ from synapse.replication.http.send_event import ReplicationSendEventRestServlet from synapse.replication.http.send_events import ReplicationSendEventsRestServlet from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import ( MutableStateMap, PersistedEventPosition, @@ -70,6 +69,7 @@ from synapse.types import ( UserID, create_requester, ) +from synapse.types.state import StateFilter from synapse.util import json_decoder, json_encoder, log_failure, unwrapFirstError from synapse.util.async_helpers import Linearizer, gather_results from synapse.util.caches.expiringcache import ExpiringCache diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index c572508a02..8c8ff18a1a 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -27,9 +27,9 @@ from synapse.handlers.room import ShutdownRoomResponse from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.admin._base import assert_user_is_admin -from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import JsonDict, Requester, StreamKeyType +from synapse.types.state import StateFilter from synapse.util.async_helpers import ReadWriteLock from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 6307fa9c5d..c611efb760 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -46,8 +46,8 @@ from synapse.replication.http.register import ( ReplicationRegisterServlet, ) from synapse.spam_checker_api import RegistrationBehaviour -from synapse.storage.state import StateFilter from synapse.types import RoomAlias, UserID, create_requester +from synapse.types.state import StateFilter if TYPE_CHECKING: from synapse.server import HomeServer diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6dcfd86fdf..f81241c2b3 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -62,7 +62,6 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.handlers.relations import BundledAggregations from synapse.module_api import NOT_SPAM from synapse.rest.admin._base import assert_user_is_admin -from synapse.storage.state import StateFilter from synapse.streams import EventSource from synapse.types import ( JsonDict, @@ -77,6 +76,7 @@ from synapse.types import ( UserID, create_requester, ) +from synapse.types.state import StateFilter from synapse.util import stringutils from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import parse_and_validate_server_name diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6ad2b38b8f..0c39e852a1 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -34,7 +34,6 @@ from synapse.events.snapshot import EventContext from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN from synapse.logging import opentracing from synapse.module_api import NOT_SPAM -from synapse.storage.state import StateFilter from synapse.types import ( JsonDict, Requester, @@ -45,6 +44,7 @@ from synapse.types import ( create_requester, get_domain_from_id, ) +from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_left_room diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index bcab98c6d5..33115ce488 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -23,8 +23,8 @@ from synapse.api.constants import EventTypes, Membership from synapse.api.errors import NotFoundError, SynapseError from synapse.api.filtering import Filter from synapse.events import EventBase -from synapse.storage.state import StateFilter from synapse.types import JsonDict, StreamKeyType, UserID +from synapse.types.state import StateFilter from synapse.visibility import filter_events_for_client if TYPE_CHECKING: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index dace9b606f..7d6a653747 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -49,7 +49,6 @@ from synapse.push.clientformat import format_push_rules_for_user from synapse.storage.databases.main.event_push_actions import RoomNotifCounts from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.roommember import MemberSummary -from synapse.storage.state import StateFilter from synapse.types import ( DeviceListUpdates, JsonDict, @@ -61,6 +60,7 @@ from synapse.types import ( StreamToken, UserID, ) +from synapse.types.state import StateFilter from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.caches.lrucache import LruCache diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 96a661177a..0092a03c59 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -111,7 +111,6 @@ from synapse.storage.background_updates import ( ) from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo -from synapse.storage.state import StateFilter from synapse.types import ( DomainSpecificString, JsonDict, @@ -124,6 +123,7 @@ from synapse.types import ( UserProfile, create_requester, ) +from synapse.types.state import StateFilter from synapse.util import Clock from synapse.util.async_helpers import maybe_awaitable from synapse.util.caches.descriptors import CachedFunction, cached diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 9ed35d8461..36e5b327ef 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -35,8 +35,8 @@ from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership -from synapse.storage.state import StateFilter from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator +from synapse.types.state import StateFilter from synapse.util.caches import register_cache from synapse.util.metrics import measure_func from synapse.visibility import filter_event_for_clients_with_state diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index c2575ba3d9..93b255ced5 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -37,8 +37,8 @@ from synapse.push.push_types import ( TemplateVars, ) from synapse.storage.databases.main.event_push_actions import EmailPushAction -from synapse.storage.state import StateFilter from synapse.types import StateMap, UserID +from synapse.types.state import StateFilter from synapse.util.async_helpers import concurrently_execute from synapse.visibility import filter_events_for_client diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 747e6fda83..e957aa28ca 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -34,9 +34,9 @@ from synapse.rest.admin._base import ( assert_user_is_admin, ) from synapse.storage.databases.main.room import RoomSortOrder -from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import JsonDict, RoomID, UserID, create_requester +from synapse.types.state import StateFilter from synapse.util import json_decoder if TYPE_CHECKING: diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 514eb6afc8..790614d721 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -55,9 +55,9 @@ from synapse.logging.opentracing import set_tag from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.client._base import client_patterns from synapse.rest.client.transactions import HttpTransactionCache -from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import JsonDict, StreamToken, ThirdPartyInstanceID, UserID +from synapse.types.state import StateFilter from synapse.util import json_decoder from synapse.util.cancellation import cancellable from synapse.util.stringutils import parse_and_validate_server_name, random_string diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 833ffec3de..ee5469d5a8 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -44,8 +44,8 @@ from synapse.logging.context import ContextResourceUsage from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import StateMap +from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.metrics import Measure, measure_func diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 33ffef521b..f1d2c71c91 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -58,13 +58,13 @@ from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import ( PersistedEventPosition, RoomStreamToken, StateMap, get_domain_from_id, ) +from synapse.types.state import StateFilter from synapse.util.async_helpers import ObservableDeferred, yieldable_gather_results from synapse.util.metrics import Measure diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 2b31ce54bb..26d79c6e62 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -31,12 +31,12 @@ from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.logging.opentracing import tag_args, trace from synapse.storage.roommember import ProfileInfo -from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( PartialCurrentStateTracker, PartialStateEventsTracker, ) from synapse.types import MutableStateMap, StateMap +from synapse.types.state import StateFilter from synapse.util.cancellation import cancellable if TYPE_CHECKING: diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index af7bebee80..c801a93b5b 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -33,8 +33,8 @@ from synapse.storage.database import ( ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.roommember import RoomMemberWorkerStore -from synapse.storage.state import StateFilter from synapse.types import JsonDict, JsonMapping, StateMap +from synapse.types.state import StateFilter from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedList from synapse.util.cancellation import cancellable diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 4a4ad0f492..d743282f13 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -22,8 +22,8 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.engines import PostgresEngine -from synapse.storage.state import StateFilter from synapse.types import MutableStateMap, StateMap +from synapse.types.state import StateFilter from synapse.util.caches import intern_string if TYPE_CHECKING: diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index f8cfcaca83..1a7232b276 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -25,10 +25,10 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.databases.state.bg_updates import StateBackgroundUpdateStore -from synapse.storage.state import StateFilter from synapse.storage.types import Cursor from synapse.storage.util.sequence import build_sequence_generator from synapse.types import MutableStateMap, StateKey, StateMap +from synapse.types.state import StateFilter from synapse.util.caches.descriptors import cached from synapse.util.caches.dictionary_cache import DictionaryCache from synapse.util.cancellation import cancellable diff --git a/synapse/storage/state.py b/synapse/storage/state.py deleted file mode 100644 index 0004d955b4..0000000000 --- a/synapse/storage/state.py +++ /dev/null @@ -1,567 +0,0 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2022 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -from typing import ( - TYPE_CHECKING, - Callable, - Collection, - Dict, - Iterable, - List, - Mapping, - Optional, - Set, - Tuple, - TypeVar, -) - -import attr -from frozendict import frozendict - -from synapse.api.constants import EventTypes -from synapse.types import MutableStateMap, StateKey, StateMap - -if TYPE_CHECKING: - from typing import FrozenSet # noqa: used within quoted type hint; flake8 sad - - -logger = logging.getLogger(__name__) - -# Used for generic functions below -T = TypeVar("T") - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class StateFilter: - """A filter used when querying for state. - - Attributes: - types: Map from type to set of state keys (or None). This specifies - which state_keys for the given type to fetch from the DB. If None - then all events with that type are fetched. If the set is empty - then no events with that type are fetched. - include_others: Whether to fetch events with types that do not - appear in `types`. - """ - - types: "frozendict[str, Optional[FrozenSet[str]]]" - include_others: bool = False - - def __attrs_post_init__(self) -> None: - # If `include_others` is set we canonicalise the filter by removing - # wildcards from the types dictionary - if self.include_others: - # this is needed to work around the fact that StateFilter is frozen - object.__setattr__( - self, - "types", - frozendict({k: v for k, v in self.types.items() if v is not None}), - ) - - @staticmethod - def all() -> "StateFilter": - """Returns a filter that fetches everything. - - Returns: - The state filter. - """ - return _ALL_STATE_FILTER - - @staticmethod - def none() -> "StateFilter": - """Returns a filter that fetches nothing. - - Returns: - The new state filter. - """ - return _NONE_STATE_FILTER - - @staticmethod - def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter": - """Creates a filter that only fetches the given types - - Args: - types: A list of type and state keys to fetch. A state_key of None - fetches everything for that type - - Returns: - The new state filter. - """ - type_dict: Dict[str, Optional[Set[str]]] = {} - for typ, s in types: - if typ in type_dict: - if type_dict[typ] is None: - continue - - if s is None: - type_dict[typ] = None - continue - - type_dict.setdefault(typ, set()).add(s) # type: ignore - - return StateFilter( - types=frozendict( - (k, frozenset(v) if v is not None else None) - for k, v in type_dict.items() - ) - ) - - @staticmethod - def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter": - """Creates a filter that returns all non-member events, plus the member - events for the given users - - Args: - members: Set of user IDs - - Returns: - The new state filter - """ - return StateFilter( - types=frozendict({EventTypes.Member: frozenset(members)}), - include_others=True, - ) - - @staticmethod - def freeze( - types: Mapping[str, Optional[Collection[str]]], include_others: bool - ) -> "StateFilter": - """ - Returns a (frozen) StateFilter with the same contents as the parameters - specified here, which can be made of mutable types. - """ - types_with_frozen_values: Dict[str, Optional[FrozenSet[str]]] = {} - for state_types, state_keys in types.items(): - if state_keys is not None: - types_with_frozen_values[state_types] = frozenset(state_keys) - else: - types_with_frozen_values[state_types] = None - - return StateFilter( - frozendict(types_with_frozen_values), include_others=include_others - ) - - def return_expanded(self) -> "StateFilter": - """Creates a new StateFilter where type wild cards have been removed - (except for memberships). The returned filter is a superset of the - current one, i.e. anything that passes the current filter will pass - the returned filter. - - This helps the caching as the DictionaryCache knows if it has *all* the - state, but does not know if it has all of the keys of a particular type, - which makes wildcard lookups expensive unless we have a complete cache. - Hence, if we are doing a wildcard lookup, populate the cache fully so - that we can do an efficient lookup next time. - - Note that since we have two caches, one for membership events and one for - other events, we can be a bit more clever than simply returning - `StateFilter.all()` if `has_wildcards()` is True. - - We return a StateFilter where: - 1. the list of membership events to return is the same - 2. if there is a wildcard that matches non-member events we - return all non-member events - - Returns: - The new state filter. - """ - - if self.is_full(): - # If we're going to return everything then there's nothing to do - return self - - if not self.has_wildcards(): - # If there are no wild cards, there's nothing to do - return self - - if EventTypes.Member in self.types: - get_all_members = self.types[EventTypes.Member] is None - else: - get_all_members = self.include_others - - has_non_member_wildcard = self.include_others or any( - state_keys is None - for t, state_keys in self.types.items() - if t != EventTypes.Member - ) - - if not has_non_member_wildcard: - # If there are no non-member wild cards we can just return ourselves - return self - - if get_all_members: - # We want to return everything. - return StateFilter.all() - elif EventTypes.Member in self.types: - # We want to return all non-members, but only particular - # memberships - return StateFilter( - types=frozendict({EventTypes.Member: self.types[EventTypes.Member]}), - include_others=True, - ) - else: - # We want to return all non-members - return _ALL_NON_MEMBER_STATE_FILTER - - def make_sql_filter_clause(self) -> Tuple[str, List[str]]: - """Converts the filter to an SQL clause. - - For example: - - f = StateFilter.from_types([("m.room.create", "")]) - clause, args = f.make_sql_filter_clause() - clause == "(type = ? AND state_key = ?)" - args == ['m.room.create', ''] - - - Returns: - The SQL string (may be empty) and arguments. An empty SQL string is - returned when the filter matches everything (i.e. is "full"). - """ - - where_clause = "" - where_args: List[str] = [] - - if self.is_full(): - return where_clause, where_args - - if not self.include_others and not self.types: - # i.e. this is an empty filter, so we need to return a clause that - # will match nothing - return "1 = 2", [] - - # First we build up a lost of clauses for each type/state_key combo - clauses = [] - for etype, state_keys in self.types.items(): - if state_keys is None: - clauses.append("(type = ?)") - where_args.append(etype) - continue - - for state_key in state_keys: - clauses.append("(type = ? AND state_key = ?)") - where_args.extend((etype, state_key)) - - # This will match anything that appears in `self.types` - where_clause = " OR ".join(clauses) - - # If we want to include stuff that's not in the types dict then we add - # a `OR type NOT IN (...)` clause to the end. - if self.include_others: - if where_clause: - where_clause += " OR " - - where_clause += "type NOT IN (%s)" % (",".join(["?"] * len(self.types)),) - where_args.extend(self.types) - - return where_clause, where_args - - def max_entries_returned(self) -> Optional[int]: - """Returns the maximum number of entries this filter will return if - known, otherwise returns None. - - For example a simple state filter asking for `("m.room.create", "")` - will return 1, whereas the default state filter will return None. - - This is used to bail out early if the right number of entries have been - fetched. - """ - if self.has_wildcards(): - return None - - return len(self.concrete_types()) - - def filter_state(self, state_dict: StateMap[T]) -> MutableStateMap[T]: - """Returns the state filtered with by this StateFilter. - - Args: - state: The state map to filter - - Returns: - The filtered state map. - This is a copy, so it's safe to mutate. - """ - if self.is_full(): - return dict(state_dict) - - filtered_state = {} - for k, v in state_dict.items(): - typ, state_key = k - if typ in self.types: - state_keys = self.types[typ] - if state_keys is None or state_key in state_keys: - filtered_state[k] = v - elif self.include_others: - filtered_state[k] = v - - return filtered_state - - def is_full(self) -> bool: - """Whether this filter fetches everything or not - - Returns: - True if the filter fetches everything. - """ - return self.include_others and not self.types - - def has_wildcards(self) -> bool: - """Whether the filter includes wildcards or is attempting to fetch - specific state. - - Returns: - True if the filter includes wildcards. - """ - - return self.include_others or any( - state_keys is None for state_keys in self.types.values() - ) - - def concrete_types(self) -> List[Tuple[str, str]]: - """Returns a list of concrete type/state_keys (i.e. not None) that - will be fetched. This will be a complete list if `has_wildcards` - returns False, but otherwise will be a subset (or even empty). - - Returns: - A list of type/state_keys tuples. - """ - return [ - (t, s) - for t, state_keys in self.types.items() - if state_keys is not None - for s in state_keys - ] - - def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]: - """Return the filter split into two: one which assumes it's exclusively - matching against member state, and one which assumes it's matching - against non member state. - - This is useful due to the returned filters giving correct results for - `is_full()`, `has_wildcards()`, etc, when operating against maps that - either exclusively contain member events or only contain non-member - events. (Which is the case when dealing with the member vs non-member - state caches). - - Returns: - The member and non member filters - """ - - if EventTypes.Member in self.types: - state_keys = self.types[EventTypes.Member] - if state_keys is None: - member_filter = StateFilter.all() - else: - member_filter = StateFilter(frozendict({EventTypes.Member: state_keys})) - elif self.include_others: - member_filter = StateFilter.all() - else: - member_filter = StateFilter.none() - - non_member_filter = StateFilter( - types=frozendict( - {k: v for k, v in self.types.items() if k != EventTypes.Member} - ), - include_others=self.include_others, - ) - - return member_filter, non_member_filter - - def _decompose_into_four_parts( - self, - ) -> Tuple[Tuple[bool, Set[str]], Tuple[Set[str], Set[StateKey]]]: - """ - Decomposes this state filter into 4 constituent parts, which can be - thought of as this: - all? - minus_wildcards + plus_wildcards + plus_state_keys - - where - * all represents ALL state - * minus_wildcards represents entire state types to remove - * plus_wildcards represents entire state types to add - * plus_state_keys represents individual state keys to add - - See `recompose_from_four_parts` for the other direction of this - correspondence. - """ - is_all = self.include_others - excluded_types: Set[str] = {t for t in self.types if is_all} - wildcard_types: Set[str] = {t for t, s in self.types.items() if s is None} - concrete_keys: Set[StateKey] = set(self.concrete_types()) - - return (is_all, excluded_types), (wildcard_types, concrete_keys) - - @staticmethod - def _recompose_from_four_parts( - all_part: bool, - minus_wildcards: Set[str], - plus_wildcards: Set[str], - plus_state_keys: Set[StateKey], - ) -> "StateFilter": - """ - Recomposes a state filter from 4 parts. - - See `decompose_into_four_parts` (the other direction of this - correspondence) for descriptions on each of the parts. - """ - - # {state type -> set of state keys OR None for wildcard} - # (The same structure as that of a StateFilter.) - new_types: Dict[str, Optional[Set[str]]] = {} - - # if we start with all, insert the excluded statetypes as empty sets - # to prevent them from being included - if all_part: - new_types.update({state_type: set() for state_type in minus_wildcards}) - - # insert the plus wildcards - new_types.update({state_type: None for state_type in plus_wildcards}) - - # insert the specific state keys - for state_type, state_key in plus_state_keys: - if state_type in new_types: - entry = new_types[state_type] - if entry is not None: - entry.add(state_key) - elif not all_part: - # don't insert if the entire type is already included by - # include_others as this would actually shrink the state allowed - # by this filter. - new_types[state_type] = {state_key} - - return StateFilter.freeze(new_types, include_others=all_part) - - def approx_difference(self, other: "StateFilter") -> "StateFilter": - """ - Returns a state filter which represents `self - other`. - - This is useful for determining what state remains to be pulled out of the - database if we want the state included by `self` but already have the state - included by `other`. - - The returned state filter - - MUST include all state events that are included by this filter (`self`) - unless they are included by `other`; - - MUST NOT include state events not included by this filter (`self`); and - - MAY be an over-approximation: the returned state filter - MAY additionally include some state events from `other`. - - This implementation attempts to return the narrowest such state filter. - In the case that `self` contains wildcards for state types where - `other` contains specific state keys, an approximation must be made: - the returned state filter keeps the wildcard, as state filters are not - able to express 'all state keys except some given examples'. - e.g. - StateFilter(m.room.member -> None (wildcard)) - minus - StateFilter(m.room.member -> {'@wombat:example.org'}) - is approximated as - StateFilter(m.room.member -> None (wildcard)) - """ - - # We first transform self and other into an alternative representation: - # - whether or not they include all events to begin with ('all') - # - if so, which event types are excluded? ('excludes') - # - which entire event types to include ('wildcards') - # - which concrete state keys to include ('concrete state keys') - (self_all, self_excludes), ( - self_wildcards, - self_concrete_keys, - ) = self._decompose_into_four_parts() - (other_all, other_excludes), ( - other_wildcards, - other_concrete_keys, - ) = other._decompose_into_four_parts() - - # Start with an estimate of the difference based on self - new_all = self_all - # Wildcards from the other can be added to the exclusion filter - new_excludes = self_excludes | other_wildcards - # We remove wildcards that appeared as wildcards in the other - new_wildcards = self_wildcards - other_wildcards - # We filter out the concrete state keys that appear in the other - # as wildcards or concrete state keys. - new_concrete_keys = { - (state_type, state_key) - for (state_type, state_key) in self_concrete_keys - if state_type not in other_wildcards - } - other_concrete_keys - - if other_all: - if self_all: - # If self starts with all, then we add as wildcards any - # types which appear in the other's exclusion filter (but - # aren't in the self exclusion filter). This is as the other - # filter will return everything BUT the types in its exclusion, so - # we need to add those excluded types that also match the self - # filter as wildcard types in the new filter. - new_wildcards |= other_excludes.difference(self_excludes) - - # If other is an `include_others` then the difference isn't. - new_all = False - # (We have no need for excludes when we don't start with all, as there - # is nothing to exclude.) - new_excludes = set() - - # We also filter out all state types that aren't in the exclusion - # list of the other. - new_wildcards &= other_excludes - new_concrete_keys = { - (state_type, state_key) - for (state_type, state_key) in new_concrete_keys - if state_type in other_excludes - } - - # Transform our newly-constructed state filter from the alternative - # representation back into the normal StateFilter representation. - return StateFilter._recompose_from_four_parts( - new_all, new_excludes, new_wildcards, new_concrete_keys - ) - - def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool: - """Check if we need to wait for full state to complete to calculate this state - - If we have a state filter which is completely satisfied even with partial - state, then we don't need to await_full_state before we can return it. - - Args: - is_mine_id: a callable which confirms if a given state_key matches a mxid - of a local user - """ - # if we haven't requested membership events, then it depends on the value of - # 'include_others' - if EventTypes.Member not in self.types: - return self.include_others - - # if we're looking for *all* membership events, then we have to wait - member_state_keys = self.types[EventTypes.Member] - if member_state_keys is None: - return True - - # otherwise, consider whose membership we are looking for. If it's entirely - # local users, then we don't need to wait. - for state_key in member_state_keys: - if not is_mine_id(state_key): - # remote user - return True - - # local users only - return False - - -_ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True) -_ALL_NON_MEMBER_STATE_FILTER = StateFilter( - types=frozendict({EventTypes.Member: frozenset()}), include_others=True -) -_NONE_STATE_FILTER = StateFilter(types=frozendict(), include_others=False) diff --git a/synapse/types.py b/synapse/types.py deleted file mode 100644 index f2d436ddc3..0000000000 --- a/synapse/types.py +++ /dev/null @@ -1,928 +0,0 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import abc -import re -import string -from typing import ( - TYPE_CHECKING, - Any, - ClassVar, - Dict, - List, - Mapping, - Match, - MutableMapping, - NoReturn, - Optional, - Set, - Tuple, - Type, - TypeVar, - Union, -) - -import attr -from frozendict import frozendict -from signedjson.key import decode_verify_key_bytes -from signedjson.types import VerifyKey -from typing_extensions import Final, TypedDict -from unpaddedbase64 import decode_base64 -from zope.interface import Interface - -from twisted.internet.defer import CancelledError -from twisted.internet.interfaces import ( - IReactorCore, - IReactorPluggableNameResolver, - IReactorSSL, - IReactorTCP, - IReactorThreads, - IReactorTime, -) - -from synapse.api.errors import Codes, SynapseError -from synapse.util.cancellation import cancellable -from synapse.util.stringutils import parse_and_validate_server_name - -if TYPE_CHECKING: - from synapse.appservice.api import ApplicationService - from synapse.storage.databases.main import DataStore, PurgeEventsStore - from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore - -# Define a state map type from type/state_key to T (usually an event ID or -# event) -T = TypeVar("T") -StateKey = Tuple[str, str] -StateMap = Mapping[StateKey, T] -MutableStateMap = MutableMapping[StateKey, T] - -# JSON types. These could be made stronger, but will do for now. -# A JSON-serialisable dict. -JsonDict = Dict[str, Any] -# A JSON-serialisable mapping; roughly speaking an immutable JSONDict. -# Useful when you have a TypedDict which isn't going to be mutated and you don't want -# to cast to JsonDict everywhere. -JsonMapping = Mapping[str, Any] -# A JSON-serialisable object. -JsonSerializable = object - - -# Note that this seems to require inheriting *directly* from Interface in order -# for mypy-zope to realize it is an interface. -class ISynapseReactor( - IReactorTCP, - IReactorSSL, - IReactorPluggableNameResolver, - IReactorTime, - IReactorCore, - IReactorThreads, - Interface, -): - """The interfaces necessary for Synapse to function.""" - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class Requester: - """ - Represents the user making a request - - Attributes: - user: id of the user making the request - access_token_id: *ID* of the access token used for this - request, or None if it came via the appservice API or similar - is_guest: True if the user making this request is a guest user - shadow_banned: True if the user making this request has been shadow-banned. - device_id: device_id which was set at authentication time - app_service: the AS requesting on behalf of the user - authenticated_entity: The entity that authenticated when making the request. - This is different to the user_id when an admin user or the server is - "puppeting" the user. - """ - - user: "UserID" - access_token_id: Optional[int] - is_guest: bool - shadow_banned: bool - device_id: Optional[str] - app_service: Optional["ApplicationService"] - authenticated_entity: str - - def serialize(self) -> Dict[str, Any]: - """Converts self to a type that can be serialized as JSON, and then - deserialized by `deserialize` - - Returns: - dict - """ - return { - "user_id": self.user.to_string(), - "access_token_id": self.access_token_id, - "is_guest": self.is_guest, - "shadow_banned": self.shadow_banned, - "device_id": self.device_id, - "app_server_id": self.app_service.id if self.app_service else None, - "authenticated_entity": self.authenticated_entity, - } - - @staticmethod - def deserialize( - store: "ApplicationServiceWorkerStore", input: Dict[str, Any] - ) -> "Requester": - """Converts a dict that was produced by `serialize` back into a - Requester. - - Args: - store: Used to convert AS ID to AS object - input: A dict produced by `serialize` - - Returns: - Requester - """ - appservice = None - if input["app_server_id"]: - appservice = store.get_app_service_by_id(input["app_server_id"]) - - return Requester( - user=UserID.from_string(input["user_id"]), - access_token_id=input["access_token_id"], - is_guest=input["is_guest"], - shadow_banned=input["shadow_banned"], - device_id=input["device_id"], - app_service=appservice, - authenticated_entity=input["authenticated_entity"], - ) - - -def create_requester( - user_id: Union[str, "UserID"], - access_token_id: Optional[int] = None, - is_guest: bool = False, - shadow_banned: bool = False, - device_id: Optional[str] = None, - app_service: Optional["ApplicationService"] = None, - authenticated_entity: Optional[str] = None, -) -> Requester: - """ - Create a new ``Requester`` object - - Args: - user_id: id of the user making the request - access_token_id: *ID* of the access token used for this - request, or None if it came via the appservice API or similar - is_guest: True if the user making this request is a guest user - shadow_banned: True if the user making this request is shadow-banned. - device_id: device_id which was set at authentication time - app_service: the AS requesting on behalf of the user - authenticated_entity: The entity that authenticated when making the request. - This is different to the user_id when an admin user or the server is - "puppeting" the user. - - Returns: - Requester - """ - if not isinstance(user_id, UserID): - user_id = UserID.from_string(user_id) - - if authenticated_entity is None: - authenticated_entity = user_id.to_string() - - return Requester( - user_id, - access_token_id, - is_guest, - shadow_banned, - device_id, - app_service, - authenticated_entity, - ) - - -def get_domain_from_id(string: str) -> str: - idx = string.find(":") - if idx == -1: - raise SynapseError(400, "Invalid ID: %r" % (string,)) - return string[idx + 1 :] - - -def get_localpart_from_id(string: str) -> str: - idx = string.find(":") - if idx == -1: - raise SynapseError(400, "Invalid ID: %r" % (string,)) - return string[1:idx] - - -DS = TypeVar("DS", bound="DomainSpecificString") - - -@attr.s(slots=True, frozen=True, repr=False, auto_attribs=True) -class DomainSpecificString(metaclass=abc.ABCMeta): - """Common base class among ID/name strings that have a local part and a - domain name, prefixed with a sigil. - - Has the fields: - - 'localpart' : The local part of the name (without the leading sigil) - 'domain' : The domain part of the name - """ - - SIGIL: ClassVar[str] = abc.abstractproperty() # type: ignore - - localpart: str - domain: str - - # Because this is a frozen class, it is deeply immutable. - def __copy__(self: DS) -> DS: - return self - - def __deepcopy__(self: DS, memo: Dict[str, object]) -> DS: - return self - - @classmethod - def from_string(cls: Type[DS], s: str) -> DS: - """Parse the string given by 's' into a structure object.""" - if len(s) < 1 or s[0:1] != cls.SIGIL: - raise SynapseError( - 400, - "Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL), - Codes.INVALID_PARAM, - ) - - parts = s[1:].split(":", 1) - if len(parts) != 2: - raise SynapseError( - 400, - "Expected %s of the form '%slocalname:domain'" - % (cls.__name__, cls.SIGIL), - Codes.INVALID_PARAM, - ) - - domain = parts[1] - # This code will need changing if we want to support multiple domain - # names on one HS - return cls(localpart=parts[0], domain=domain) - - def to_string(self) -> str: - """Return a string encoding the fields of the structure object.""" - return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain) - - @classmethod - def is_valid(cls: Type[DS], s: str) -> bool: - """Parses the input string and attempts to ensure it is valid.""" - # TODO: this does not reject an empty localpart or an overly-long string. - # See https://spec.matrix.org/v1.2/appendices/#identifier-grammar - try: - obj = cls.from_string(s) - # Apply additional validation to the domain. This is only done - # during is_valid (and not part of from_string) since it is - # possible for invalid data to exist in room-state, etc. - parse_and_validate_server_name(obj.domain) - return True - except Exception: - return False - - __repr__ = to_string - - -@attr.s(slots=True, frozen=True, repr=False) -class UserID(DomainSpecificString): - """Structure representing a user ID.""" - - SIGIL = "@" - - -@attr.s(slots=True, frozen=True, repr=False) -class RoomAlias(DomainSpecificString): - """Structure representing a room name.""" - - SIGIL = "#" - - -@attr.s(slots=True, frozen=True, repr=False) -class RoomID(DomainSpecificString): - """Structure representing a room id.""" - - SIGIL = "!" - - -@attr.s(slots=True, frozen=True, repr=False) -class EventID(DomainSpecificString): - """Structure representing an event id.""" - - SIGIL = "$" - - -mxid_localpart_allowed_characters = set( - "_-./=" + string.ascii_lowercase + string.digits -) - - -def contains_invalid_mxid_characters(localpart: str) -> bool: - """Check for characters not allowed in an mxid or groupid localpart - - Args: - localpart: the localpart to be checked - - Returns: - True if there are any naughty characters - """ - return any(c not in mxid_localpart_allowed_characters for c in localpart) - - -UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") - -# the following is a pattern which matches '=', and bytes which are not allowed in a mxid -# localpart. -# -# It works by: -# * building a string containing the allowed characters (excluding '=') -# * escaping every special character with a backslash (to stop '-' being interpreted as a -# range operator) -# * wrapping it in a '[^...]' regex -# * converting the whole lot to a 'bytes' sequence, so that we can use it to match -# bytes rather than strings -# -NON_MXID_CHARACTER_PATTERN = re.compile( - ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode( - "ascii" - ) -) - - -def map_username_to_mxid_localpart( - username: Union[str, bytes], case_sensitive: bool = False -) -> str: - """Map a username onto a string suitable for a MXID - - This follows the algorithm laid out at - https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets. - - Args: - username: username to be mapped - case_sensitive: true if TEST and test should be mapped - onto different mxids - - Returns: - string suitable for a mxid localpart - """ - if not isinstance(username, bytes): - username = username.encode("utf-8") - - # first we sort out upper-case characters - if case_sensitive: - - def f1(m: Match[bytes]) -> bytes: - return b"_" + m.group().lower() - - username = UPPER_CASE_PATTERN.sub(f1, username) - else: - username = username.lower() - - # then we sort out non-ascii characters by converting to the hex equivalent. - def f2(m: Match[bytes]) -> bytes: - return b"=%02x" % (m.group()[0],) - - username = NON_MXID_CHARACTER_PATTERN.sub(f2, username) - - # we also do the =-escaping to mxids starting with an underscore. - username = re.sub(b"^_", b"=5f", username) - - # we should now only have ascii bytes left, so can decode back to a string. - return username.decode("ascii") - - -@attr.s(frozen=True, slots=True, order=False) -class RoomStreamToken: - """Tokens are positions between events. The token "s1" comes after event 1. - - s0 s1 - | | - [0] â–¼ [1] â–¼ [2] - - Tokens can either be a point in the live event stream or a cursor going - through historic events. - - When traversing the live event stream, events are ordered by - `stream_ordering` (when they arrived at the homeserver). - - When traversing historic events, events are first ordered by their `depth` - (`topological_ordering` in the event graph) and tie-broken by - `stream_ordering` (when the event arrived at the homeserver). - - If you're looking for more info about what a token with all of the - underscores means, ex. - `s2633508_17_338_6732159_1082514_541479_274711_265584_1`, see the docstring - for `StreamToken` below. - - --- - - Live tokens start with an "s" followed by the `stream_ordering` of the event - that comes before the position of the token. Said another way: - `stream_ordering` uniquely identifies a persisted event. The live token - means "the position just after the event identified by `stream_ordering`". - An example token is: - - s2633508 - - --- - - Historic tokens start with a "t" followed by the `depth` - (`topological_ordering` in the event graph) of the event that comes before - the position of the token, followed by "-", followed by the - `stream_ordering` of the event that comes before the position of the token. - An example token is: - - t426-2633508 - - --- - - There is also a third mode for live tokens where the token starts with "m", - which is sometimes used when using sharded event persisters. In this case - the events stream is considered to be a set of streams (one for each writer) - and the token encodes the vector clock of positions of each writer in their - respective streams. - - The format of the token in such case is an initial integer min position, - followed by the mapping of instance ID to position separated by '.' and '~': - - m{min_pos}~{writer1}.{pos1}~{writer2}.{pos2}. ... - - The `min_pos` corresponds to the minimum position all writers have persisted - up to, and then only writers that are ahead of that position need to be - encoded. An example token is: - - m56~2.58~3.59 - - Which corresponds to a set of three (or more writers) where instances 2 and - 3 (these are instance IDs that can be looked up in the DB to fetch the more - commonly used instance names) are at positions 58 and 59 respectively, and - all other instances are at position 56. - - Note: The `RoomStreamToken` cannot have both a topological part and an - instance map. - - --- - - For caching purposes, `RoomStreamToken`s and by extension, all their - attributes, must be hashable. - """ - - topological: Optional[int] = attr.ib( - validator=attr.validators.optional(attr.validators.instance_of(int)), - ) - stream: int = attr.ib(validator=attr.validators.instance_of(int)) - - instance_map: "frozendict[str, int]" = attr.ib( - factory=frozendict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(int), - mapping_validator=attr.validators.instance_of(frozendict), - ), - ) - - def __attrs_post_init__(self) -> None: - """Validates that both `topological` and `instance_map` aren't set.""" - - if self.instance_map and self.topological: - raise ValueError( - "Cannot set both 'topological' and 'instance_map' on 'RoomStreamToken'." - ) - - @classmethod - async def parse(cls, store: "PurgeEventsStore", string: str) -> "RoomStreamToken": - try: - if string[0] == "s": - return cls(topological=None, stream=int(string[1:])) - if string[0] == "t": - parts = string[1:].split("-", 1) - return cls(topological=int(parts[0]), stream=int(parts[1])) - if string[0] == "m": - parts = string[1:].split("~") - stream = int(parts[0]) - - instance_map = {} - for part in parts[1:]: - key, value = part.split(".") - instance_id = int(key) - pos = int(value) - - instance_name = await store.get_name_from_instance_id(instance_id) # type: ignore[attr-defined] - instance_map[instance_name] = pos - - return cls( - topological=None, - stream=stream, - instance_map=frozendict(instance_map), - ) - except CancelledError: - raise - except Exception: - pass - raise SynapseError(400, "Invalid room stream token %r" % (string,)) - - @classmethod - def parse_stream_token(cls, string: str) -> "RoomStreamToken": - try: - if string[0] == "s": - return cls(topological=None, stream=int(string[1:])) - except Exception: - pass - raise SynapseError(400, "Invalid room stream token %r" % (string,)) - - def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken": - """Return a new token such that if an event is after both this token and - the other token, then its after the returned token too. - """ - - if self.topological or other.topological: - raise Exception("Can't advance topological tokens") - - max_stream = max(self.stream, other.stream) - - instance_map = { - instance: max( - self.instance_map.get(instance, self.stream), - other.instance_map.get(instance, other.stream), - ) - for instance in set(self.instance_map).union(other.instance_map) - } - - return RoomStreamToken(None, max_stream, frozendict(instance_map)) - - def as_historical_tuple(self) -> Tuple[int, int]: - """Returns a tuple of `(topological, stream)` for historical tokens. - - Raises if not an historical token (i.e. doesn't have a topological part). - """ - if self.topological is None: - raise Exception( - "Cannot call `RoomStreamToken.as_historical_tuple` on live token" - ) - - return self.topological, self.stream - - def get_stream_pos_for_instance(self, instance_name: str) -> int: - """Get the stream position that the given writer was at at this token. - - This only makes sense for "live" tokens that may have a vector clock - component, and so asserts that this is a "live" token. - """ - assert self.topological is None - - # If we don't have an entry for the instance we can assume that it was - # at `self.stream`. - return self.instance_map.get(instance_name, self.stream) - - def get_max_stream_pos(self) -> int: - """Get the maximum stream position referenced in this token. - - The corresponding "min" position is, by definition just `self.stream`. - - This is used to handle tokens that have non-empty `instance_map`, and so - reference stream positions after the `self.stream` position. - """ - return max(self.instance_map.values(), default=self.stream) - - async def to_string(self, store: "DataStore") -> str: - if self.topological is not None: - return "t%d-%d" % (self.topological, self.stream) - elif self.instance_map: - entries = [] - for name, pos in self.instance_map.items(): - instance_id = await store.get_id_for_instance(name) - entries.append(f"{instance_id}.{pos}") - - encoded_map = "~".join(entries) - return f"m{self.stream}~{encoded_map}" - else: - return "s%d" % (self.stream,) - - -class StreamKeyType: - """Known stream types. - - A stream is a list of entities ordered by an incrementing "stream token". - """ - - ROOM: Final = "room_key" - PRESENCE: Final = "presence_key" - TYPING: Final = "typing_key" - RECEIPT: Final = "receipt_key" - ACCOUNT_DATA: Final = "account_data_key" - PUSH_RULES: Final = "push_rules_key" - TO_DEVICE: Final = "to_device_key" - DEVICE_LIST: Final = "device_list_key" - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class StreamToken: - """A collection of keys joined together by underscores in the following - order and which represent the position in their respective streams. - - ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1` - 1. `room_key`: `s2633508` which is a `RoomStreamToken` - - `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59` - - See the docstring for `RoomStreamToken` for more details. - 2. `presence_key`: `17` - 3. `typing_key`: `338` - 4. `receipt_key`: `6732159` - 5. `account_data_key`: `1082514` - 6. `push_rules_key`: `541479` - 7. `to_device_key`: `274711` - 8. `device_list_key`: `265584` - 9. `groups_key`: `1` (note that this key is now unused) - - You can see how many of these keys correspond to the various - fields in a "/sync" response: - ```json - { - "next_batch": "s12_4_0_1_1_1_1_4_1", - "presence": { - "events": [] - }, - "device_lists": { - "changed": [] - }, - "rooms": { - "join": { - "!QrZlfIDQLNLdZHqTnt:hs1": { - "timeline": { - "events": [], - "prev_batch": "s10_4_0_1_1_1_1_4_1", - "limited": false - }, - "state": { - "events": [] - }, - "account_data": { - "events": [] - }, - "ephemeral": { - "events": [] - } - } - } - } - } - ``` - - --- - - For caching purposes, `StreamToken`s and by extension, all their attributes, - must be hashable. - """ - - room_key: RoomStreamToken = attr.ib( - validator=attr.validators.instance_of(RoomStreamToken) - ) - presence_key: int - typing_key: int - receipt_key: int - account_data_key: int - push_rules_key: int - to_device_key: int - device_list_key: int - # Note that the groups key is no longer used and may have bogus values. - groups_key: int - - _SEPARATOR = "_" - START: ClassVar["StreamToken"] - - @classmethod - @cancellable - async def from_string(cls, store: "DataStore", string: str) -> "StreamToken": - """ - Creates a RoomStreamToken from its textual representation. - """ - try: - keys = string.split(cls._SEPARATOR) - while len(keys) < len(attr.fields(cls)): - # i.e. old token from before receipt_key - keys.append("0") - return cls( - await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:]) - ) - except CancelledError: - raise - except Exception: - raise SynapseError(400, "Invalid stream token") - - async def to_string(self, store: "DataStore") -> str: - return self._SEPARATOR.join( - [ - await self.room_key.to_string(store), - str(self.presence_key), - str(self.typing_key), - str(self.receipt_key), - str(self.account_data_key), - str(self.push_rules_key), - str(self.to_device_key), - str(self.device_list_key), - # Note that the groups key is no longer used, but it is still - # serialized so that there will not be confusion in the future - # if additional tokens are added. - str(self.groups_key), - ] - ) - - @property - def room_stream_id(self) -> int: - return self.room_key.stream - - def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken": - """Advance the given key in the token to a new value if and only if the - new value is after the old value. - - :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken. - """ - if key == StreamKeyType.ROOM: - new_token = self.copy_and_replace( - StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value) - ) - return new_token - - new_token = self.copy_and_replace(key, new_value) - new_id = int(getattr(new_token, key)) - old_id = int(getattr(self, key)) - - if old_id < new_id: - return new_token - else: - return self - - def copy_and_replace(self, key: str, new_value: Any) -> "StreamToken": - return attr.evolve(self, **{key: new_value}) - - -StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0) - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class PersistedEventPosition: - """Position of a newly persisted event with instance that persisted it. - - This can be used to test whether the event is persisted before or after a - RoomStreamToken. - """ - - instance_name: str - stream: int - - def persisted_after(self, token: RoomStreamToken) -> bool: - return token.get_stream_pos_for_instance(self.instance_name) < self.stream - - def to_room_stream_token(self) -> RoomStreamToken: - """Converts the position to a room stream token such that events - persisted in the same room after this position will be after the - returned `RoomStreamToken`. - - Note: no guarantees are made about ordering w.r.t. events in other - rooms. - """ - # Doing the naive thing satisfies the desired properties described in - # the docstring. - return RoomStreamToken(None, self.stream) - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class ThirdPartyInstanceID: - appservice_id: Optional[str] - network_id: Optional[str] - - # Deny iteration because it will bite you if you try to create a singleton - # set by: - # users = set(user) - def __iter__(self) -> NoReturn: - raise ValueError("Attempted to iterate a %s" % (type(self).__name__,)) - - # Because this class is a frozen class, it is deeply immutable. - def __copy__(self) -> "ThirdPartyInstanceID": - return self - - def __deepcopy__(self, memo: Dict[str, object]) -> "ThirdPartyInstanceID": - return self - - @classmethod - def from_string(cls, s: str) -> "ThirdPartyInstanceID": - bits = s.split("|", 2) - if len(bits) != 2: - raise SynapseError(400, "Invalid ID %r" % (s,)) - - return cls(appservice_id=bits[0], network_id=bits[1]) - - def to_string(self) -> str: - return "%s|%s" % (self.appservice_id, self.network_id) - - __str__ = to_string - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class ReadReceipt: - """Information about a read-receipt""" - - room_id: str - receipt_type: str - user_id: str - event_ids: List[str] - thread_id: Optional[str] - data: JsonDict - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class DeviceListUpdates: - """ - An object containing a diff of information regarding other users' device lists, intended for - a recipient to carry out device list tracking. - - Attributes: - changed: A set of users whose device lists have changed recently. - left: A set of users who the recipient no longer needs to track the device lists of. - Typically when those users no longer share any end-to-end encryption enabled rooms. - """ - - # We need to use a factory here, otherwise `set` is not evaluated at - # object instantiation, but instead at class definition instantiation. - # The latter happening only once, thus always giving you the same sets - # across multiple DeviceListUpdates instances. - # Also see: don't define mutable default arguments. - changed: Set[str] = attr.ib(factory=set) - left: Set[str] = attr.ib(factory=set) - - def __bool__(self) -> bool: - return bool(self.changed or self.left) - - -def get_verify_key_from_cross_signing_key( - key_info: Mapping[str, Any] -) -> Tuple[str, VerifyKey]: - """Get the key ID and signedjson verify key from a cross-signing key dict - - Args: - key_info: a cross-signing key dict, which must have a "keys" - property that has exactly one item in it - - Returns: - the key ID and verify key for the cross-signing key - """ - # make sure that a `keys` field is provided - if "keys" not in key_info: - raise ValueError("Invalid key") - keys = key_info["keys"] - # and that it contains exactly one key - if len(keys) == 1: - key_id, key_data = next(iter(keys.items())) - return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data)) - else: - raise ValueError("Invalid key") - - -@attr.s(auto_attribs=True, frozen=True, slots=True) -class UserInfo: - """Holds information about a user. Result of get_userinfo_by_id. - - Attributes: - user_id: ID of the user. - appservice_id: Application service ID that created this user. - consent_server_notice_sent: Version of policy documents the user has been sent. - consent_version: Version of policy documents the user has consented to. - creation_ts: Creation timestamp of the user. - is_admin: True if the user is an admin. - is_deactivated: True if the user has been deactivated. - is_guest: True if the user is a guest user. - is_shadow_banned: True if the user has been shadow-banned. - user_type: User type (None for normal user, 'support' and 'bot' other options). - """ - - user_id: UserID - appservice_id: Optional[int] - consent_server_notice_sent: Optional[str] - consent_version: Optional[str] - user_type: Optional[str] - creation_ts: int - is_admin: bool - is_deactivated: bool - is_guest: bool - is_shadow_banned: bool - - -class UserProfile(TypedDict): - user_id: str - display_name: Optional[str] - avatar_url: Optional[str] - - -@attr.s(auto_attribs=True, frozen=True, slots=True) -class RetentionPolicy: - min_lifetime: Optional[int] = None - max_lifetime: Optional[int] = None diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py new file mode 100644 index 0000000000..f2d436ddc3 --- /dev/null +++ b/synapse/types/__init__.py @@ -0,0 +1,928 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import abc +import re +import string +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + List, + Mapping, + Match, + MutableMapping, + NoReturn, + Optional, + Set, + Tuple, + Type, + TypeVar, + Union, +) + +import attr +from frozendict import frozendict +from signedjson.key import decode_verify_key_bytes +from signedjson.types import VerifyKey +from typing_extensions import Final, TypedDict +from unpaddedbase64 import decode_base64 +from zope.interface import Interface + +from twisted.internet.defer import CancelledError +from twisted.internet.interfaces import ( + IReactorCore, + IReactorPluggableNameResolver, + IReactorSSL, + IReactorTCP, + IReactorThreads, + IReactorTime, +) + +from synapse.api.errors import Codes, SynapseError +from synapse.util.cancellation import cancellable +from synapse.util.stringutils import parse_and_validate_server_name + +if TYPE_CHECKING: + from synapse.appservice.api import ApplicationService + from synapse.storage.databases.main import DataStore, PurgeEventsStore + from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore + +# Define a state map type from type/state_key to T (usually an event ID or +# event) +T = TypeVar("T") +StateKey = Tuple[str, str] +StateMap = Mapping[StateKey, T] +MutableStateMap = MutableMapping[StateKey, T] + +# JSON types. These could be made stronger, but will do for now. +# A JSON-serialisable dict. +JsonDict = Dict[str, Any] +# A JSON-serialisable mapping; roughly speaking an immutable JSONDict. +# Useful when you have a TypedDict which isn't going to be mutated and you don't want +# to cast to JsonDict everywhere. +JsonMapping = Mapping[str, Any] +# A JSON-serialisable object. +JsonSerializable = object + + +# Note that this seems to require inheriting *directly* from Interface in order +# for mypy-zope to realize it is an interface. +class ISynapseReactor( + IReactorTCP, + IReactorSSL, + IReactorPluggableNameResolver, + IReactorTime, + IReactorCore, + IReactorThreads, + Interface, +): + """The interfaces necessary for Synapse to function.""" + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class Requester: + """ + Represents the user making a request + + Attributes: + user: id of the user making the request + access_token_id: *ID* of the access token used for this + request, or None if it came via the appservice API or similar + is_guest: True if the user making this request is a guest user + shadow_banned: True if the user making this request has been shadow-banned. + device_id: device_id which was set at authentication time + app_service: the AS requesting on behalf of the user + authenticated_entity: The entity that authenticated when making the request. + This is different to the user_id when an admin user or the server is + "puppeting" the user. + """ + + user: "UserID" + access_token_id: Optional[int] + is_guest: bool + shadow_banned: bool + device_id: Optional[str] + app_service: Optional["ApplicationService"] + authenticated_entity: str + + def serialize(self) -> Dict[str, Any]: + """Converts self to a type that can be serialized as JSON, and then + deserialized by `deserialize` + + Returns: + dict + """ + return { + "user_id": self.user.to_string(), + "access_token_id": self.access_token_id, + "is_guest": self.is_guest, + "shadow_banned": self.shadow_banned, + "device_id": self.device_id, + "app_server_id": self.app_service.id if self.app_service else None, + "authenticated_entity": self.authenticated_entity, + } + + @staticmethod + def deserialize( + store: "ApplicationServiceWorkerStore", input: Dict[str, Any] + ) -> "Requester": + """Converts a dict that was produced by `serialize` back into a + Requester. + + Args: + store: Used to convert AS ID to AS object + input: A dict produced by `serialize` + + Returns: + Requester + """ + appservice = None + if input["app_server_id"]: + appservice = store.get_app_service_by_id(input["app_server_id"]) + + return Requester( + user=UserID.from_string(input["user_id"]), + access_token_id=input["access_token_id"], + is_guest=input["is_guest"], + shadow_banned=input["shadow_banned"], + device_id=input["device_id"], + app_service=appservice, + authenticated_entity=input["authenticated_entity"], + ) + + +def create_requester( + user_id: Union[str, "UserID"], + access_token_id: Optional[int] = None, + is_guest: bool = False, + shadow_banned: bool = False, + device_id: Optional[str] = None, + app_service: Optional["ApplicationService"] = None, + authenticated_entity: Optional[str] = None, +) -> Requester: + """ + Create a new ``Requester`` object + + Args: + user_id: id of the user making the request + access_token_id: *ID* of the access token used for this + request, or None if it came via the appservice API or similar + is_guest: True if the user making this request is a guest user + shadow_banned: True if the user making this request is shadow-banned. + device_id: device_id which was set at authentication time + app_service: the AS requesting on behalf of the user + authenticated_entity: The entity that authenticated when making the request. + This is different to the user_id when an admin user or the server is + "puppeting" the user. + + Returns: + Requester + """ + if not isinstance(user_id, UserID): + user_id = UserID.from_string(user_id) + + if authenticated_entity is None: + authenticated_entity = user_id.to_string() + + return Requester( + user_id, + access_token_id, + is_guest, + shadow_banned, + device_id, + app_service, + authenticated_entity, + ) + + +def get_domain_from_id(string: str) -> str: + idx = string.find(":") + if idx == -1: + raise SynapseError(400, "Invalid ID: %r" % (string,)) + return string[idx + 1 :] + + +def get_localpart_from_id(string: str) -> str: + idx = string.find(":") + if idx == -1: + raise SynapseError(400, "Invalid ID: %r" % (string,)) + return string[1:idx] + + +DS = TypeVar("DS", bound="DomainSpecificString") + + +@attr.s(slots=True, frozen=True, repr=False, auto_attribs=True) +class DomainSpecificString(metaclass=abc.ABCMeta): + """Common base class among ID/name strings that have a local part and a + domain name, prefixed with a sigil. + + Has the fields: + + 'localpart' : The local part of the name (without the leading sigil) + 'domain' : The domain part of the name + """ + + SIGIL: ClassVar[str] = abc.abstractproperty() # type: ignore + + localpart: str + domain: str + + # Because this is a frozen class, it is deeply immutable. + def __copy__(self: DS) -> DS: + return self + + def __deepcopy__(self: DS, memo: Dict[str, object]) -> DS: + return self + + @classmethod + def from_string(cls: Type[DS], s: str) -> DS: + """Parse the string given by 's' into a structure object.""" + if len(s) < 1 or s[0:1] != cls.SIGIL: + raise SynapseError( + 400, + "Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL), + Codes.INVALID_PARAM, + ) + + parts = s[1:].split(":", 1) + if len(parts) != 2: + raise SynapseError( + 400, + "Expected %s of the form '%slocalname:domain'" + % (cls.__name__, cls.SIGIL), + Codes.INVALID_PARAM, + ) + + domain = parts[1] + # This code will need changing if we want to support multiple domain + # names on one HS + return cls(localpart=parts[0], domain=domain) + + def to_string(self) -> str: + """Return a string encoding the fields of the structure object.""" + return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain) + + @classmethod + def is_valid(cls: Type[DS], s: str) -> bool: + """Parses the input string and attempts to ensure it is valid.""" + # TODO: this does not reject an empty localpart or an overly-long string. + # See https://spec.matrix.org/v1.2/appendices/#identifier-grammar + try: + obj = cls.from_string(s) + # Apply additional validation to the domain. This is only done + # during is_valid (and not part of from_string) since it is + # possible for invalid data to exist in room-state, etc. + parse_and_validate_server_name(obj.domain) + return True + except Exception: + return False + + __repr__ = to_string + + +@attr.s(slots=True, frozen=True, repr=False) +class UserID(DomainSpecificString): + """Structure representing a user ID.""" + + SIGIL = "@" + + +@attr.s(slots=True, frozen=True, repr=False) +class RoomAlias(DomainSpecificString): + """Structure representing a room name.""" + + SIGIL = "#" + + +@attr.s(slots=True, frozen=True, repr=False) +class RoomID(DomainSpecificString): + """Structure representing a room id.""" + + SIGIL = "!" + + +@attr.s(slots=True, frozen=True, repr=False) +class EventID(DomainSpecificString): + """Structure representing an event id.""" + + SIGIL = "$" + + +mxid_localpart_allowed_characters = set( + "_-./=" + string.ascii_lowercase + string.digits +) + + +def contains_invalid_mxid_characters(localpart: str) -> bool: + """Check for characters not allowed in an mxid or groupid localpart + + Args: + localpart: the localpart to be checked + + Returns: + True if there are any naughty characters + """ + return any(c not in mxid_localpart_allowed_characters for c in localpart) + + +UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") + +# the following is a pattern which matches '=', and bytes which are not allowed in a mxid +# localpart. +# +# It works by: +# * building a string containing the allowed characters (excluding '=') +# * escaping every special character with a backslash (to stop '-' being interpreted as a +# range operator) +# * wrapping it in a '[^...]' regex +# * converting the whole lot to a 'bytes' sequence, so that we can use it to match +# bytes rather than strings +# +NON_MXID_CHARACTER_PATTERN = re.compile( + ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode( + "ascii" + ) +) + + +def map_username_to_mxid_localpart( + username: Union[str, bytes], case_sensitive: bool = False +) -> str: + """Map a username onto a string suitable for a MXID + + This follows the algorithm laid out at + https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets. + + Args: + username: username to be mapped + case_sensitive: true if TEST and test should be mapped + onto different mxids + + Returns: + string suitable for a mxid localpart + """ + if not isinstance(username, bytes): + username = username.encode("utf-8") + + # first we sort out upper-case characters + if case_sensitive: + + def f1(m: Match[bytes]) -> bytes: + return b"_" + m.group().lower() + + username = UPPER_CASE_PATTERN.sub(f1, username) + else: + username = username.lower() + + # then we sort out non-ascii characters by converting to the hex equivalent. + def f2(m: Match[bytes]) -> bytes: + return b"=%02x" % (m.group()[0],) + + username = NON_MXID_CHARACTER_PATTERN.sub(f2, username) + + # we also do the =-escaping to mxids starting with an underscore. + username = re.sub(b"^_", b"=5f", username) + + # we should now only have ascii bytes left, so can decode back to a string. + return username.decode("ascii") + + +@attr.s(frozen=True, slots=True, order=False) +class RoomStreamToken: + """Tokens are positions between events. The token "s1" comes after event 1. + + s0 s1 + | | + [0] â–¼ [1] â–¼ [2] + + Tokens can either be a point in the live event stream or a cursor going + through historic events. + + When traversing the live event stream, events are ordered by + `stream_ordering` (when they arrived at the homeserver). + + When traversing historic events, events are first ordered by their `depth` + (`topological_ordering` in the event graph) and tie-broken by + `stream_ordering` (when the event arrived at the homeserver). + + If you're looking for more info about what a token with all of the + underscores means, ex. + `s2633508_17_338_6732159_1082514_541479_274711_265584_1`, see the docstring + for `StreamToken` below. + + --- + + Live tokens start with an "s" followed by the `stream_ordering` of the event + that comes before the position of the token. Said another way: + `stream_ordering` uniquely identifies a persisted event. The live token + means "the position just after the event identified by `stream_ordering`". + An example token is: + + s2633508 + + --- + + Historic tokens start with a "t" followed by the `depth` + (`topological_ordering` in the event graph) of the event that comes before + the position of the token, followed by "-", followed by the + `stream_ordering` of the event that comes before the position of the token. + An example token is: + + t426-2633508 + + --- + + There is also a third mode for live tokens where the token starts with "m", + which is sometimes used when using sharded event persisters. In this case + the events stream is considered to be a set of streams (one for each writer) + and the token encodes the vector clock of positions of each writer in their + respective streams. + + The format of the token in such case is an initial integer min position, + followed by the mapping of instance ID to position separated by '.' and '~': + + m{min_pos}~{writer1}.{pos1}~{writer2}.{pos2}. ... + + The `min_pos` corresponds to the minimum position all writers have persisted + up to, and then only writers that are ahead of that position need to be + encoded. An example token is: + + m56~2.58~3.59 + + Which corresponds to a set of three (or more writers) where instances 2 and + 3 (these are instance IDs that can be looked up in the DB to fetch the more + commonly used instance names) are at positions 58 and 59 respectively, and + all other instances are at position 56. + + Note: The `RoomStreamToken` cannot have both a topological part and an + instance map. + + --- + + For caching purposes, `RoomStreamToken`s and by extension, all their + attributes, must be hashable. + """ + + topological: Optional[int] = attr.ib( + validator=attr.validators.optional(attr.validators.instance_of(int)), + ) + stream: int = attr.ib(validator=attr.validators.instance_of(int)) + + instance_map: "frozendict[str, int]" = attr.ib( + factory=frozendict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(int), + mapping_validator=attr.validators.instance_of(frozendict), + ), + ) + + def __attrs_post_init__(self) -> None: + """Validates that both `topological` and `instance_map` aren't set.""" + + if self.instance_map and self.topological: + raise ValueError( + "Cannot set both 'topological' and 'instance_map' on 'RoomStreamToken'." + ) + + @classmethod + async def parse(cls, store: "PurgeEventsStore", string: str) -> "RoomStreamToken": + try: + if string[0] == "s": + return cls(topological=None, stream=int(string[1:])) + if string[0] == "t": + parts = string[1:].split("-", 1) + return cls(topological=int(parts[0]), stream=int(parts[1])) + if string[0] == "m": + parts = string[1:].split("~") + stream = int(parts[0]) + + instance_map = {} + for part in parts[1:]: + key, value = part.split(".") + instance_id = int(key) + pos = int(value) + + instance_name = await store.get_name_from_instance_id(instance_id) # type: ignore[attr-defined] + instance_map[instance_name] = pos + + return cls( + topological=None, + stream=stream, + instance_map=frozendict(instance_map), + ) + except CancelledError: + raise + except Exception: + pass + raise SynapseError(400, "Invalid room stream token %r" % (string,)) + + @classmethod + def parse_stream_token(cls, string: str) -> "RoomStreamToken": + try: + if string[0] == "s": + return cls(topological=None, stream=int(string[1:])) + except Exception: + pass + raise SynapseError(400, "Invalid room stream token %r" % (string,)) + + def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken": + """Return a new token such that if an event is after both this token and + the other token, then its after the returned token too. + """ + + if self.topological or other.topological: + raise Exception("Can't advance topological tokens") + + max_stream = max(self.stream, other.stream) + + instance_map = { + instance: max( + self.instance_map.get(instance, self.stream), + other.instance_map.get(instance, other.stream), + ) + for instance in set(self.instance_map).union(other.instance_map) + } + + return RoomStreamToken(None, max_stream, frozendict(instance_map)) + + def as_historical_tuple(self) -> Tuple[int, int]: + """Returns a tuple of `(topological, stream)` for historical tokens. + + Raises if not an historical token (i.e. doesn't have a topological part). + """ + if self.topological is None: + raise Exception( + "Cannot call `RoomStreamToken.as_historical_tuple` on live token" + ) + + return self.topological, self.stream + + def get_stream_pos_for_instance(self, instance_name: str) -> int: + """Get the stream position that the given writer was at at this token. + + This only makes sense for "live" tokens that may have a vector clock + component, and so asserts that this is a "live" token. + """ + assert self.topological is None + + # If we don't have an entry for the instance we can assume that it was + # at `self.stream`. + return self.instance_map.get(instance_name, self.stream) + + def get_max_stream_pos(self) -> int: + """Get the maximum stream position referenced in this token. + + The corresponding "min" position is, by definition just `self.stream`. + + This is used to handle tokens that have non-empty `instance_map`, and so + reference stream positions after the `self.stream` position. + """ + return max(self.instance_map.values(), default=self.stream) + + async def to_string(self, store: "DataStore") -> str: + if self.topological is not None: + return "t%d-%d" % (self.topological, self.stream) + elif self.instance_map: + entries = [] + for name, pos in self.instance_map.items(): + instance_id = await store.get_id_for_instance(name) + entries.append(f"{instance_id}.{pos}") + + encoded_map = "~".join(entries) + return f"m{self.stream}~{encoded_map}" + else: + return "s%d" % (self.stream,) + + +class StreamKeyType: + """Known stream types. + + A stream is a list of entities ordered by an incrementing "stream token". + """ + + ROOM: Final = "room_key" + PRESENCE: Final = "presence_key" + TYPING: Final = "typing_key" + RECEIPT: Final = "receipt_key" + ACCOUNT_DATA: Final = "account_data_key" + PUSH_RULES: Final = "push_rules_key" + TO_DEVICE: Final = "to_device_key" + DEVICE_LIST: Final = "device_list_key" + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class StreamToken: + """A collection of keys joined together by underscores in the following + order and which represent the position in their respective streams. + + ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1` + 1. `room_key`: `s2633508` which is a `RoomStreamToken` + - `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59` + - See the docstring for `RoomStreamToken` for more details. + 2. `presence_key`: `17` + 3. `typing_key`: `338` + 4. `receipt_key`: `6732159` + 5. `account_data_key`: `1082514` + 6. `push_rules_key`: `541479` + 7. `to_device_key`: `274711` + 8. `device_list_key`: `265584` + 9. `groups_key`: `1` (note that this key is now unused) + + You can see how many of these keys correspond to the various + fields in a "/sync" response: + ```json + { + "next_batch": "s12_4_0_1_1_1_1_4_1", + "presence": { + "events": [] + }, + "device_lists": { + "changed": [] + }, + "rooms": { + "join": { + "!QrZlfIDQLNLdZHqTnt:hs1": { + "timeline": { + "events": [], + "prev_batch": "s10_4_0_1_1_1_1_4_1", + "limited": false + }, + "state": { + "events": [] + }, + "account_data": { + "events": [] + }, + "ephemeral": { + "events": [] + } + } + } + } + } + ``` + + --- + + For caching purposes, `StreamToken`s and by extension, all their attributes, + must be hashable. + """ + + room_key: RoomStreamToken = attr.ib( + validator=attr.validators.instance_of(RoomStreamToken) + ) + presence_key: int + typing_key: int + receipt_key: int + account_data_key: int + push_rules_key: int + to_device_key: int + device_list_key: int + # Note that the groups key is no longer used and may have bogus values. + groups_key: int + + _SEPARATOR = "_" + START: ClassVar["StreamToken"] + + @classmethod + @cancellable + async def from_string(cls, store: "DataStore", string: str) -> "StreamToken": + """ + Creates a RoomStreamToken from its textual representation. + """ + try: + keys = string.split(cls._SEPARATOR) + while len(keys) < len(attr.fields(cls)): + # i.e. old token from before receipt_key + keys.append("0") + return cls( + await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:]) + ) + except CancelledError: + raise + except Exception: + raise SynapseError(400, "Invalid stream token") + + async def to_string(self, store: "DataStore") -> str: + return self._SEPARATOR.join( + [ + await self.room_key.to_string(store), + str(self.presence_key), + str(self.typing_key), + str(self.receipt_key), + str(self.account_data_key), + str(self.push_rules_key), + str(self.to_device_key), + str(self.device_list_key), + # Note that the groups key is no longer used, but it is still + # serialized so that there will not be confusion in the future + # if additional tokens are added. + str(self.groups_key), + ] + ) + + @property + def room_stream_id(self) -> int: + return self.room_key.stream + + def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken": + """Advance the given key in the token to a new value if and only if the + new value is after the old value. + + :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken. + """ + if key == StreamKeyType.ROOM: + new_token = self.copy_and_replace( + StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value) + ) + return new_token + + new_token = self.copy_and_replace(key, new_value) + new_id = int(getattr(new_token, key)) + old_id = int(getattr(self, key)) + + if old_id < new_id: + return new_token + else: + return self + + def copy_and_replace(self, key: str, new_value: Any) -> "StreamToken": + return attr.evolve(self, **{key: new_value}) + + +StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class PersistedEventPosition: + """Position of a newly persisted event with instance that persisted it. + + This can be used to test whether the event is persisted before or after a + RoomStreamToken. + """ + + instance_name: str + stream: int + + def persisted_after(self, token: RoomStreamToken) -> bool: + return token.get_stream_pos_for_instance(self.instance_name) < self.stream + + def to_room_stream_token(self) -> RoomStreamToken: + """Converts the position to a room stream token such that events + persisted in the same room after this position will be after the + returned `RoomStreamToken`. + + Note: no guarantees are made about ordering w.r.t. events in other + rooms. + """ + # Doing the naive thing satisfies the desired properties described in + # the docstring. + return RoomStreamToken(None, self.stream) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ThirdPartyInstanceID: + appservice_id: Optional[str] + network_id: Optional[str] + + # Deny iteration because it will bite you if you try to create a singleton + # set by: + # users = set(user) + def __iter__(self) -> NoReturn: + raise ValueError("Attempted to iterate a %s" % (type(self).__name__,)) + + # Because this class is a frozen class, it is deeply immutable. + def __copy__(self) -> "ThirdPartyInstanceID": + return self + + def __deepcopy__(self, memo: Dict[str, object]) -> "ThirdPartyInstanceID": + return self + + @classmethod + def from_string(cls, s: str) -> "ThirdPartyInstanceID": + bits = s.split("|", 2) + if len(bits) != 2: + raise SynapseError(400, "Invalid ID %r" % (s,)) + + return cls(appservice_id=bits[0], network_id=bits[1]) + + def to_string(self) -> str: + return "%s|%s" % (self.appservice_id, self.network_id) + + __str__ = to_string + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ReadReceipt: + """Information about a read-receipt""" + + room_id: str + receipt_type: str + user_id: str + event_ids: List[str] + thread_id: Optional[str] + data: JsonDict + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class DeviceListUpdates: + """ + An object containing a diff of information regarding other users' device lists, intended for + a recipient to carry out device list tracking. + + Attributes: + changed: A set of users whose device lists have changed recently. + left: A set of users who the recipient no longer needs to track the device lists of. + Typically when those users no longer share any end-to-end encryption enabled rooms. + """ + + # We need to use a factory here, otherwise `set` is not evaluated at + # object instantiation, but instead at class definition instantiation. + # The latter happening only once, thus always giving you the same sets + # across multiple DeviceListUpdates instances. + # Also see: don't define mutable default arguments. + changed: Set[str] = attr.ib(factory=set) + left: Set[str] = attr.ib(factory=set) + + def __bool__(self) -> bool: + return bool(self.changed or self.left) + + +def get_verify_key_from_cross_signing_key( + key_info: Mapping[str, Any] +) -> Tuple[str, VerifyKey]: + """Get the key ID and signedjson verify key from a cross-signing key dict + + Args: + key_info: a cross-signing key dict, which must have a "keys" + property that has exactly one item in it + + Returns: + the key ID and verify key for the cross-signing key + """ + # make sure that a `keys` field is provided + if "keys" not in key_info: + raise ValueError("Invalid key") + keys = key_info["keys"] + # and that it contains exactly one key + if len(keys) == 1: + key_id, key_data = next(iter(keys.items())) + return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data)) + else: + raise ValueError("Invalid key") + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class UserInfo: + """Holds information about a user. Result of get_userinfo_by_id. + + Attributes: + user_id: ID of the user. + appservice_id: Application service ID that created this user. + consent_server_notice_sent: Version of policy documents the user has been sent. + consent_version: Version of policy documents the user has consented to. + creation_ts: Creation timestamp of the user. + is_admin: True if the user is an admin. + is_deactivated: True if the user has been deactivated. + is_guest: True if the user is a guest user. + is_shadow_banned: True if the user has been shadow-banned. + user_type: User type (None for normal user, 'support' and 'bot' other options). + """ + + user_id: UserID + appservice_id: Optional[int] + consent_server_notice_sent: Optional[str] + consent_version: Optional[str] + user_type: Optional[str] + creation_ts: int + is_admin: bool + is_deactivated: bool + is_guest: bool + is_shadow_banned: bool + + +class UserProfile(TypedDict): + user_id: str + display_name: Optional[str] + avatar_url: Optional[str] + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class RetentionPolicy: + min_lifetime: Optional[int] = None + max_lifetime: Optional[int] = None diff --git a/synapse/types/state.py b/synapse/types/state.py new file mode 100644 index 0000000000..0004d955b4 --- /dev/null +++ b/synapse/types/state.py @@ -0,0 +1,567 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from typing import ( + TYPE_CHECKING, + Callable, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Set, + Tuple, + TypeVar, +) + +import attr +from frozendict import frozendict + +from synapse.api.constants import EventTypes +from synapse.types import MutableStateMap, StateKey, StateMap + +if TYPE_CHECKING: + from typing import FrozenSet # noqa: used within quoted type hint; flake8 sad + + +logger = logging.getLogger(__name__) + +# Used for generic functions below +T = TypeVar("T") + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class StateFilter: + """A filter used when querying for state. + + Attributes: + types: Map from type to set of state keys (or None). This specifies + which state_keys for the given type to fetch from the DB. If None + then all events with that type are fetched. If the set is empty + then no events with that type are fetched. + include_others: Whether to fetch events with types that do not + appear in `types`. + """ + + types: "frozendict[str, Optional[FrozenSet[str]]]" + include_others: bool = False + + def __attrs_post_init__(self) -> None: + # If `include_others` is set we canonicalise the filter by removing + # wildcards from the types dictionary + if self.include_others: + # this is needed to work around the fact that StateFilter is frozen + object.__setattr__( + self, + "types", + frozendict({k: v for k, v in self.types.items() if v is not None}), + ) + + @staticmethod + def all() -> "StateFilter": + """Returns a filter that fetches everything. + + Returns: + The state filter. + """ + return _ALL_STATE_FILTER + + @staticmethod + def none() -> "StateFilter": + """Returns a filter that fetches nothing. + + Returns: + The new state filter. + """ + return _NONE_STATE_FILTER + + @staticmethod + def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter": + """Creates a filter that only fetches the given types + + Args: + types: A list of type and state keys to fetch. A state_key of None + fetches everything for that type + + Returns: + The new state filter. + """ + type_dict: Dict[str, Optional[Set[str]]] = {} + for typ, s in types: + if typ in type_dict: + if type_dict[typ] is None: + continue + + if s is None: + type_dict[typ] = None + continue + + type_dict.setdefault(typ, set()).add(s) # type: ignore + + return StateFilter( + types=frozendict( + (k, frozenset(v) if v is not None else None) + for k, v in type_dict.items() + ) + ) + + @staticmethod + def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter": + """Creates a filter that returns all non-member events, plus the member + events for the given users + + Args: + members: Set of user IDs + + Returns: + The new state filter + """ + return StateFilter( + types=frozendict({EventTypes.Member: frozenset(members)}), + include_others=True, + ) + + @staticmethod + def freeze( + types: Mapping[str, Optional[Collection[str]]], include_others: bool + ) -> "StateFilter": + """ + Returns a (frozen) StateFilter with the same contents as the parameters + specified here, which can be made of mutable types. + """ + types_with_frozen_values: Dict[str, Optional[FrozenSet[str]]] = {} + for state_types, state_keys in types.items(): + if state_keys is not None: + types_with_frozen_values[state_types] = frozenset(state_keys) + else: + types_with_frozen_values[state_types] = None + + return StateFilter( + frozendict(types_with_frozen_values), include_others=include_others + ) + + def return_expanded(self) -> "StateFilter": + """Creates a new StateFilter where type wild cards have been removed + (except for memberships). The returned filter is a superset of the + current one, i.e. anything that passes the current filter will pass + the returned filter. + + This helps the caching as the DictionaryCache knows if it has *all* the + state, but does not know if it has all of the keys of a particular type, + which makes wildcard lookups expensive unless we have a complete cache. + Hence, if we are doing a wildcard lookup, populate the cache fully so + that we can do an efficient lookup next time. + + Note that since we have two caches, one for membership events and one for + other events, we can be a bit more clever than simply returning + `StateFilter.all()` if `has_wildcards()` is True. + + We return a StateFilter where: + 1. the list of membership events to return is the same + 2. if there is a wildcard that matches non-member events we + return all non-member events + + Returns: + The new state filter. + """ + + if self.is_full(): + # If we're going to return everything then there's nothing to do + return self + + if not self.has_wildcards(): + # If there are no wild cards, there's nothing to do + return self + + if EventTypes.Member in self.types: + get_all_members = self.types[EventTypes.Member] is None + else: + get_all_members = self.include_others + + has_non_member_wildcard = self.include_others or any( + state_keys is None + for t, state_keys in self.types.items() + if t != EventTypes.Member + ) + + if not has_non_member_wildcard: + # If there are no non-member wild cards we can just return ourselves + return self + + if get_all_members: + # We want to return everything. + return StateFilter.all() + elif EventTypes.Member in self.types: + # We want to return all non-members, but only particular + # memberships + return StateFilter( + types=frozendict({EventTypes.Member: self.types[EventTypes.Member]}), + include_others=True, + ) + else: + # We want to return all non-members + return _ALL_NON_MEMBER_STATE_FILTER + + def make_sql_filter_clause(self) -> Tuple[str, List[str]]: + """Converts the filter to an SQL clause. + + For example: + + f = StateFilter.from_types([("m.room.create", "")]) + clause, args = f.make_sql_filter_clause() + clause == "(type = ? AND state_key = ?)" + args == ['m.room.create', ''] + + + Returns: + The SQL string (may be empty) and arguments. An empty SQL string is + returned when the filter matches everything (i.e. is "full"). + """ + + where_clause = "" + where_args: List[str] = [] + + if self.is_full(): + return where_clause, where_args + + if not self.include_others and not self.types: + # i.e. this is an empty filter, so we need to return a clause that + # will match nothing + return "1 = 2", [] + + # First we build up a lost of clauses for each type/state_key combo + clauses = [] + for etype, state_keys in self.types.items(): + if state_keys is None: + clauses.append("(type = ?)") + where_args.append(etype) + continue + + for state_key in state_keys: + clauses.append("(type = ? AND state_key = ?)") + where_args.extend((etype, state_key)) + + # This will match anything that appears in `self.types` + where_clause = " OR ".join(clauses) + + # If we want to include stuff that's not in the types dict then we add + # a `OR type NOT IN (...)` clause to the end. + if self.include_others: + if where_clause: + where_clause += " OR " + + where_clause += "type NOT IN (%s)" % (",".join(["?"] * len(self.types)),) + where_args.extend(self.types) + + return where_clause, where_args + + def max_entries_returned(self) -> Optional[int]: + """Returns the maximum number of entries this filter will return if + known, otherwise returns None. + + For example a simple state filter asking for `("m.room.create", "")` + will return 1, whereas the default state filter will return None. + + This is used to bail out early if the right number of entries have been + fetched. + """ + if self.has_wildcards(): + return None + + return len(self.concrete_types()) + + def filter_state(self, state_dict: StateMap[T]) -> MutableStateMap[T]: + """Returns the state filtered with by this StateFilter. + + Args: + state: The state map to filter + + Returns: + The filtered state map. + This is a copy, so it's safe to mutate. + """ + if self.is_full(): + return dict(state_dict) + + filtered_state = {} + for k, v in state_dict.items(): + typ, state_key = k + if typ in self.types: + state_keys = self.types[typ] + if state_keys is None or state_key in state_keys: + filtered_state[k] = v + elif self.include_others: + filtered_state[k] = v + + return filtered_state + + def is_full(self) -> bool: + """Whether this filter fetches everything or not + + Returns: + True if the filter fetches everything. + """ + return self.include_others and not self.types + + def has_wildcards(self) -> bool: + """Whether the filter includes wildcards or is attempting to fetch + specific state. + + Returns: + True if the filter includes wildcards. + """ + + return self.include_others or any( + state_keys is None for state_keys in self.types.values() + ) + + def concrete_types(self) -> List[Tuple[str, str]]: + """Returns a list of concrete type/state_keys (i.e. not None) that + will be fetched. This will be a complete list if `has_wildcards` + returns False, but otherwise will be a subset (or even empty). + + Returns: + A list of type/state_keys tuples. + """ + return [ + (t, s) + for t, state_keys in self.types.items() + if state_keys is not None + for s in state_keys + ] + + def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]: + """Return the filter split into two: one which assumes it's exclusively + matching against member state, and one which assumes it's matching + against non member state. + + This is useful due to the returned filters giving correct results for + `is_full()`, `has_wildcards()`, etc, when operating against maps that + either exclusively contain member events or only contain non-member + events. (Which is the case when dealing with the member vs non-member + state caches). + + Returns: + The member and non member filters + """ + + if EventTypes.Member in self.types: + state_keys = self.types[EventTypes.Member] + if state_keys is None: + member_filter = StateFilter.all() + else: + member_filter = StateFilter(frozendict({EventTypes.Member: state_keys})) + elif self.include_others: + member_filter = StateFilter.all() + else: + member_filter = StateFilter.none() + + non_member_filter = StateFilter( + types=frozendict( + {k: v for k, v in self.types.items() if k != EventTypes.Member} + ), + include_others=self.include_others, + ) + + return member_filter, non_member_filter + + def _decompose_into_four_parts( + self, + ) -> Tuple[Tuple[bool, Set[str]], Tuple[Set[str], Set[StateKey]]]: + """ + Decomposes this state filter into 4 constituent parts, which can be + thought of as this: + all? - minus_wildcards + plus_wildcards + plus_state_keys + + where + * all represents ALL state + * minus_wildcards represents entire state types to remove + * plus_wildcards represents entire state types to add + * plus_state_keys represents individual state keys to add + + See `recompose_from_four_parts` for the other direction of this + correspondence. + """ + is_all = self.include_others + excluded_types: Set[str] = {t for t in self.types if is_all} + wildcard_types: Set[str] = {t for t, s in self.types.items() if s is None} + concrete_keys: Set[StateKey] = set(self.concrete_types()) + + return (is_all, excluded_types), (wildcard_types, concrete_keys) + + @staticmethod + def _recompose_from_four_parts( + all_part: bool, + minus_wildcards: Set[str], + plus_wildcards: Set[str], + plus_state_keys: Set[StateKey], + ) -> "StateFilter": + """ + Recomposes a state filter from 4 parts. + + See `decompose_into_four_parts` (the other direction of this + correspondence) for descriptions on each of the parts. + """ + + # {state type -> set of state keys OR None for wildcard} + # (The same structure as that of a StateFilter.) + new_types: Dict[str, Optional[Set[str]]] = {} + + # if we start with all, insert the excluded statetypes as empty sets + # to prevent them from being included + if all_part: + new_types.update({state_type: set() for state_type in minus_wildcards}) + + # insert the plus wildcards + new_types.update({state_type: None for state_type in plus_wildcards}) + + # insert the specific state keys + for state_type, state_key in plus_state_keys: + if state_type in new_types: + entry = new_types[state_type] + if entry is not None: + entry.add(state_key) + elif not all_part: + # don't insert if the entire type is already included by + # include_others as this would actually shrink the state allowed + # by this filter. + new_types[state_type] = {state_key} + + return StateFilter.freeze(new_types, include_others=all_part) + + def approx_difference(self, other: "StateFilter") -> "StateFilter": + """ + Returns a state filter which represents `self - other`. + + This is useful for determining what state remains to be pulled out of the + database if we want the state included by `self` but already have the state + included by `other`. + + The returned state filter + - MUST include all state events that are included by this filter (`self`) + unless they are included by `other`; + - MUST NOT include state events not included by this filter (`self`); and + - MAY be an over-approximation: the returned state filter + MAY additionally include some state events from `other`. + + This implementation attempts to return the narrowest such state filter. + In the case that `self` contains wildcards for state types where + `other` contains specific state keys, an approximation must be made: + the returned state filter keeps the wildcard, as state filters are not + able to express 'all state keys except some given examples'. + e.g. + StateFilter(m.room.member -> None (wildcard)) + minus + StateFilter(m.room.member -> {'@wombat:example.org'}) + is approximated as + StateFilter(m.room.member -> None (wildcard)) + """ + + # We first transform self and other into an alternative representation: + # - whether or not they include all events to begin with ('all') + # - if so, which event types are excluded? ('excludes') + # - which entire event types to include ('wildcards') + # - which concrete state keys to include ('concrete state keys') + (self_all, self_excludes), ( + self_wildcards, + self_concrete_keys, + ) = self._decompose_into_four_parts() + (other_all, other_excludes), ( + other_wildcards, + other_concrete_keys, + ) = other._decompose_into_four_parts() + + # Start with an estimate of the difference based on self + new_all = self_all + # Wildcards from the other can be added to the exclusion filter + new_excludes = self_excludes | other_wildcards + # We remove wildcards that appeared as wildcards in the other + new_wildcards = self_wildcards - other_wildcards + # We filter out the concrete state keys that appear in the other + # as wildcards or concrete state keys. + new_concrete_keys = { + (state_type, state_key) + for (state_type, state_key) in self_concrete_keys + if state_type not in other_wildcards + } - other_concrete_keys + + if other_all: + if self_all: + # If self starts with all, then we add as wildcards any + # types which appear in the other's exclusion filter (but + # aren't in the self exclusion filter). This is as the other + # filter will return everything BUT the types in its exclusion, so + # we need to add those excluded types that also match the self + # filter as wildcard types in the new filter. + new_wildcards |= other_excludes.difference(self_excludes) + + # If other is an `include_others` then the difference isn't. + new_all = False + # (We have no need for excludes when we don't start with all, as there + # is nothing to exclude.) + new_excludes = set() + + # We also filter out all state types that aren't in the exclusion + # list of the other. + new_wildcards &= other_excludes + new_concrete_keys = { + (state_type, state_key) + for (state_type, state_key) in new_concrete_keys + if state_type in other_excludes + } + + # Transform our newly-constructed state filter from the alternative + # representation back into the normal StateFilter representation. + return StateFilter._recompose_from_four_parts( + new_all, new_excludes, new_wildcards, new_concrete_keys + ) + + def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool: + """Check if we need to wait for full state to complete to calculate this state + + If we have a state filter which is completely satisfied even with partial + state, then we don't need to await_full_state before we can return it. + + Args: + is_mine_id: a callable which confirms if a given state_key matches a mxid + of a local user + """ + # if we haven't requested membership events, then it depends on the value of + # 'include_others' + if EventTypes.Member not in self.types: + return self.include_others + + # if we're looking for *all* membership events, then we have to wait + member_state_keys = self.types[EventTypes.Member] + if member_state_keys is None: + return True + + # otherwise, consider whose membership we are looking for. If it's entirely + # local users, then we don't need to wait. + for state_key in member_state_keys: + if not is_mine_id(state_key): + # remote user + return True + + # local users only + return False + + +_ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True) +_ALL_NON_MEMBER_STATE_FILTER = StateFilter( + types=frozendict({EventTypes.Member: frozenset()}), include_others=True +) +_NONE_STATE_FILTER = StateFilter(types=frozendict(), include_others=False) diff --git a/synapse/visibility.py b/synapse/visibility.py index b443857571..e442de3173 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -26,8 +26,8 @@ from synapse.events.utils import prune_event from synapse.logging.opentracing import trace from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore -from synapse.storage.state import StateFilter from synapse.types import RetentionPolicy, StateMap, get_domain_from_id +from synapse.types.state import StateFilter from synapse.util import Clock logger = logging.getLogger(__name__) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index d4e6d4236c..a433e70870 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -22,8 +22,8 @@ from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions from synapse.events import EventBase from synapse.server import HomeServer -from synapse.storage.state import StateFilter from synapse.types import JsonDict, RoomID, StateMap, UserID +from synapse.types.state import StateFilter from synapse.util import Clock from tests.unittest import HomeserverTestCase, TestCase -- cgit 1.5.1 From 62ed877433e23ba055cbc69a089c09d03c67681d Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 13 Dec 2022 13:19:19 +0000 Subject: Improve validation of field size limits in events. (#14664) --- changelog.d/14664.bugfix | 1 + stubs/synapse/synapse_rust/push.pyi | 2 +- synapse/api/constants.py | 1 + synapse/api/errors.py | 11 ++++- synapse/api/room_versions.py | 32 +++++++------- synapse/event_auth.py | 76 +++++++++++++++++++++++++++++--- synapse/handlers/federation_event.py | 20 +++++++++ synapse/push/bulk_push_rule_evaluator.py | 6 +-- 8 files changed, 119 insertions(+), 30 deletions(-) create mode 100644 changelog.d/14664.bugfix (limited to 'synapse/push') diff --git a/changelog.d/14664.bugfix b/changelog.d/14664.bugfix new file mode 100644 index 0000000000..a15df9a89d --- /dev/null +++ b/changelog.d/14664.bugfix @@ -0,0 +1 @@ +Improve validation of field size limits in events. \ No newline at end of file diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index a6a586a0b5..dab5d4aff7 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -45,7 +45,7 @@ class PushRuleEvaluator: notification_power_levels: Mapping[str, int], related_events_flattened: Mapping[str, Mapping[str, str]], related_event_match_enabled: bool, - room_version_feature_flags: list[str], + room_version_feature_flags: Tuple[str, ...], msc3931_enabled: bool, ): ... def run( diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 89723d24fa..6a5e7171da 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -152,6 +152,7 @@ class EduTypes: class RejectedReason: AUTH_ERROR: Final = "auth_error" + OVERSIZED_EVENT: Final = "oversized_event" class RoomCreationPreset: diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 76ef12ed3a..c2c177fd71 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -424,8 +424,17 @@ class ResourceLimitError(SynapseError): class EventSizeError(SynapseError): """An error raised when an event is too big.""" - def __init__(self, msg: str): + def __init__(self, msg: str, unpersistable: bool): + """ + unpersistable: + if True, the PDU must not be persisted, not even as a rejected PDU + when received over federation. + This is notably true when the entire PDU exceeds the size limit for a PDU, + (as opposed to an individual key's size limit being exceeded). + """ + super().__init__(413, msg, Codes.TOO_LARGE) + self.unpersistable = unpersistable class LoginError(SynapseError): diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index ac62011c9f..c397920fe5 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Callable, Dict, List, Optional +from typing import Callable, Dict, Optional, Tuple import attr @@ -103,7 +103,7 @@ class RoomVersion: # is not enough to mark it "supported": the push rule evaluator also needs to # support the flag. Unknown flags are ignored by the evaluator, making conditions # fail if used. - msc3931_push_features: List[str] # values from PushRuleRoomFlag + msc3931_push_features: Tuple[str, ...] # values from PushRuleRoomFlag class RoomVersions: @@ -124,7 +124,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V2 = RoomVersion( "2", @@ -143,7 +143,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V3 = RoomVersion( "3", @@ -162,7 +162,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V4 = RoomVersion( "4", @@ -181,7 +181,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V5 = RoomVersion( "5", @@ -200,7 +200,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V6 = RoomVersion( "6", @@ -219,7 +219,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) MSC2176 = RoomVersion( "org.matrix.msc2176", @@ -238,7 +238,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V7 = RoomVersion( "7", @@ -257,7 +257,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V8 = RoomVersion( "8", @@ -276,7 +276,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V9 = RoomVersion( "9", @@ -295,7 +295,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) MSC3787 = RoomVersion( "org.matrix.msc3787", @@ -314,7 +314,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=True, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) V10 = RoomVersion( "10", @@ -333,7 +333,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=True, msc3667_int_only_power_levels=True, - msc3931_push_features=[], + msc3931_push_features=(), ) MSC2716v4 = RoomVersion( "org.matrix.msc2716v4", @@ -352,7 +352,7 @@ class RoomVersions: msc2716_redactions=True, msc3787_knock_restricted_join_rule=False, msc3667_int_only_power_levels=False, - msc3931_push_features=[], + msc3931_push_features=(), ) MSC1767v10 = RoomVersion( # MSC1767 (Extensible Events) based on room version "10" @@ -372,7 +372,7 @@ class RoomVersions: msc2716_redactions=False, msc3787_knock_restricted_join_rule=True, msc3667_int_only_power_levels=True, - msc3931_push_features=[PushRuleRoomFlag.EXTENSIBLE_EVENTS], + msc3931_push_features=(PushRuleRoomFlag.EXTENSIBLE_EVENTS,), ) diff --git a/synapse/event_auth.py b/synapse/event_auth.py index bab31e33c5..d437b7e5d1 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -52,6 +52,7 @@ from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, EventFormatVersions, RoomVersion, + RoomVersions, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import MutableStateMap, StateMap, UserID, get_domain_from_id @@ -341,19 +342,80 @@ def check_state_dependent_auth_rules( logger.debug("Allowing! %s", event) +# Set of room versions where Synapse did not apply event key size limits +# in bytes, but rather in codepoints. +# In these room versions, we are more lenient with event size validation. +LENIENT_EVENT_BYTE_LIMITS_ROOM_VERSIONS = { + RoomVersions.V1, + RoomVersions.V2, + RoomVersions.V3, + RoomVersions.V4, + RoomVersions.V5, + RoomVersions.V6, + RoomVersions.MSC2176, + RoomVersions.V7, + RoomVersions.V8, + RoomVersions.V9, + RoomVersions.MSC3787, + RoomVersions.V10, + RoomVersions.MSC2716v4, + RoomVersions.MSC1767v10, +} + + def _check_size_limits(event: "EventBase") -> None: + """ + Checks the size limits in a PDU. + + The entire size limit of the PDU is checked first. + Then the size of fields is checked, first in codepoints and then in bytes. + + The codepoint size limits are only for Synapse compatibility. + + Raises: + EventSizeError: + when a size limit has been violated. + + unpersistable=True if Synapse never would have accepted the event and + the PDU must NOT be persisted. + + unpersistable=False if a prior version of Synapse would have accepted the + event and so the PDU must be persisted as rejected to avoid + breaking the room. + """ + + # Whole PDU check + if len(encode_canonical_json(event.get_pdu_json())) > MAX_PDU_SIZE: + raise EventSizeError("event too large", unpersistable=True) + + # Codepoint size check: Synapse always enforced these limits, so apply + # them strictly. if len(event.user_id) > 255: - raise EventSizeError("'user_id' too large") + raise EventSizeError("'user_id' too large", unpersistable=True) if len(event.room_id) > 255: - raise EventSizeError("'room_id' too large") + raise EventSizeError("'room_id' too large", unpersistable=True) if event.is_state() and len(event.state_key) > 255: - raise EventSizeError("'state_key' too large") + raise EventSizeError("'state_key' too large", unpersistable=True) if len(event.type) > 255: - raise EventSizeError("'type' too large") + raise EventSizeError("'type' too large", unpersistable=True) if len(event.event_id) > 255: - raise EventSizeError("'event_id' too large") - if len(encode_canonical_json(event.get_pdu_json())) > MAX_PDU_SIZE: - raise EventSizeError("event too large") + raise EventSizeError("'event_id' too large", unpersistable=True) + + strict_byte_limits = ( + event.room_version not in LENIENT_EVENT_BYTE_LIMITS_ROOM_VERSIONS + ) + + # Byte size check: if these fail, then be lenient to avoid breaking rooms. + if len(event.user_id.encode("utf-8")) > 255: + raise EventSizeError("'user_id' too large", unpersistable=strict_byte_limits) + if len(event.room_id.encode("utf-8")) > 255: + raise EventSizeError("'room_id' too large", unpersistable=strict_byte_limits) + if event.is_state() and len(event.state_key.encode("utf-8")) > 255: + raise EventSizeError("'state_key' too large", unpersistable=strict_byte_limits) + if len(event.type.encode("utf-8")) > 255: + raise EventSizeError("'type' too large", unpersistable=strict_byte_limits) + if len(event.event_id.encode("utf-8")) > 255: + raise EventSizeError("'event_id' too large", unpersistable=strict_byte_limits) def _check_create(event: "EventBase") -> None: diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index d2facdab60..66aca2f864 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -43,6 +43,7 @@ from synapse.api.constants import ( from synapse.api.errors import ( AuthError, Codes, + EventSizeError, FederationError, FederationPullAttemptBackoffError, HttpResponseException, @@ -1736,6 +1737,15 @@ class FederationEventHandler: except AuthError as e: logger.warning("Rejecting %r because %s", event, e) context.rejected = RejectedReason.AUTH_ERROR + except EventSizeError as e: + if e.unpersistable: + # This event is completely unpersistable. + raise e + # Otherwise, we are somewhat lenient and just persist the event + # as rejected, for moderate compatibility with older Synapse + # versions. + logger.warning("While validating received event %r: %s", event, e) + context.rejected = RejectedReason.OVERSIZED_EVENT events_and_contexts_to_persist.append((event, context)) @@ -1781,6 +1791,16 @@ class FederationEventHandler: # TODO: use a different rejected reason here? context.rejected = RejectedReason.AUTH_ERROR return + except EventSizeError as e: + if e.unpersistable: + # This event is completely unpersistable. + raise e + # Otherwise, we are somewhat lenient and just persist the event + # as rejected, for moderate compatibility with older Synapse + # versions. + logger.warning("While validating received event %r: %s", event, e) + context.rejected = RejectedReason.OVERSIZED_EVENT + return # next, check that we have all of the event's auth events. # diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 36e5b327ef..f27ba64d53 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -342,10 +342,6 @@ class BulkPushRuleEvaluator: for user_id, level in notification_levels.items(): notification_levels[user_id] = int(level) - room_version_features = event.room_version.msc3931_push_features - if not room_version_features: - room_version_features = [] - evaluator = PushRuleEvaluator( _flatten_dict(event, room_version=event.room_version), room_member_count, @@ -353,7 +349,7 @@ class BulkPushRuleEvaluator: notification_levels, related_events, self._related_event_match_enabled, - room_version_features, + event.room_version.msc3931_push_features, self.hs.config.experimental.msc1767_enabled, # MSC3931 flag ) -- cgit 1.5.1 From 58d2adc3da6a988452dbb9c6c4202a5ea19c4ca9 Mon Sep 17 00:00:00 2001 From: Jeyachandran Rathnam Date: Mon, 9 Jan 2023 12:17:24 -0500 Subject: Remove undocumented device from pushrules (#14727) * Remove undocumented device from pushrules * Add changelog * Update changelog.d/14727.misc * Rename 14727.misc to 14727.bugfix Co-authored-by: David Robertson --- changelog.d/14727.bugfix | 1 + synapse/push/clientformat.py | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14727.bugfix (limited to 'synapse/push') diff --git a/changelog.d/14727.bugfix b/changelog.d/14727.bugfix new file mode 100644 index 0000000000..25079496e4 --- /dev/null +++ b/changelog.d/14727.bugfix @@ -0,0 +1 @@ +Remove the unspecced `device` field from `/pushrules` responses. diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index 622a1e35c5..bb76c169c6 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -26,10 +26,7 @@ def format_push_rules_for_user( """Converts a list of rawrules and a enabled map into nested dictionaries to match the Matrix client-server format for push rules""" - rules: Dict[str, Dict[str, List[Dict[str, Any]]]] = { - "global": {}, - "device": {}, - } + rules: Dict[str, Dict[str, List[Dict[str, Any]]]] = {"global": {}} rules["global"] = _add_empty_priority_class_arrays(rules["global"]) -- cgit 1.5.1 From 2a51f3ec36abeb1f5c1db795541988d1d9698e41 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 27 Jan 2023 10:16:21 -0500 Subject: Implement MSC3952: Intentional mentions (#14823) MSC3952 defines push rules which searches for mentions in a list of Matrix IDs in the event body, instead of searching the entire event body for display name / local part. This is implemented behind an experimental configuration flag and does not yet implement the backwards compatibility pieces of the MSC. --- changelog.d/14823.feature | 1 + rust/src/push/base_rules.rs | 21 +++++++ rust/src/push/evaluator.rs | 25 +++++++- rust/src/push/mod.rs | 34 +++++++++++ stubs/synapse/synapse_rust/push.pyi | 5 +- synapse/api/constants.py | 3 + synapse/config/experimental.py | 5 ++ synapse/push/bulk_push_rule_evaluator.py | 25 +++++++- synapse/storage/databases/main/push_rule.py | 1 + tests/push/test_bulk_push_rule_evaluator.py | 88 +++++++++++++++++++++++++++++ tests/push/test_push_rule_evaluator.py | 66 +++++++++++++++++++--- 11 files changed, 263 insertions(+), 11 deletions(-) create mode 100644 changelog.d/14823.feature (limited to 'synapse/push') diff --git a/changelog.d/14823.feature b/changelog.d/14823.feature new file mode 100644 index 0000000000..8293e99eff --- /dev/null +++ b/changelog.d/14823.feature @@ -0,0 +1 @@ +Experimental support for [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952): intentional mentions. diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs index 9140a69bb6..880eed0ef4 100644 --- a/rust/src/push/base_rules.rs +++ b/rust/src/push/base_rules.rs @@ -131,6 +131,14 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ default: true, default_enabled: true, }, + PushRule { + rule_id: Cow::Borrowed(".org.matrix.msc3952.is_user_mentioned"), + priority_class: 5, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::IsUserMention)]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]), + default: true, + default_enabled: true, + }, PushRule { rule_id: Cow::Borrowed("global/override/.m.rule.contains_display_name"), priority_class: 5, @@ -139,6 +147,19 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ default: true, default_enabled: true, }, + PushRule { + rule_id: Cow::Borrowed(".org.matrix.msc3952.is_room_mentioned"), + priority_class: 5, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::IsRoomMention), + Condition::Known(KnownCondition::SenderNotificationPermission { + key: Cow::Borrowed("room"), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]), + default: true, + default_enabled: true, + }, PushRule { rule_id: Cow::Borrowed("global/override/.m.rule.roomnotif"), priority_class: 5, diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index 0242ee1c5f..aa71202e43 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use anyhow::{Context, Error}; use lazy_static::lazy_static; @@ -68,6 +68,11 @@ pub struct PushRuleEvaluator { /// The "content.body", if any. body: String, + /// The user mentions that were part of the message. + user_mentions: BTreeSet, + /// True if the message is a room message. + room_mention: bool, + /// The number of users in the room. room_member_count: u64, @@ -100,6 +105,8 @@ impl PushRuleEvaluator { #[new] pub fn py_new( flattened_keys: BTreeMap, + user_mentions: BTreeSet, + room_mention: bool, room_member_count: u64, sender_power_level: Option, notification_power_levels: BTreeMap, @@ -116,6 +123,8 @@ impl PushRuleEvaluator { Ok(PushRuleEvaluator { flattened_keys, body, + user_mentions, + room_mention, room_member_count, notification_power_levels, sender_power_level, @@ -229,6 +238,14 @@ impl PushRuleEvaluator { KnownCondition::RelatedEventMatch(event_match) => { self.match_related_event_match(event_match, user_id)? } + KnownCondition::IsUserMention => { + if let Some(uid) = user_id { + self.user_mentions.contains(uid) + } else { + false + } + } + KnownCondition::IsRoomMention => self.room_mention, KnownCondition::ContainsDisplayName => { if let Some(dn) = display_name { if !dn.is_empty() { @@ -424,6 +441,8 @@ fn push_rule_evaluator() { flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string()); let evaluator = PushRuleEvaluator::py_new( flattened_keys, + BTreeSet::new(), + false, 10, Some(0), BTreeMap::new(), @@ -449,6 +468,8 @@ fn test_requires_room_version_supports_condition() { let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()]; let evaluator = PushRuleEvaluator::py_new( flattened_keys, + BTreeSet::new(), + false, 10, Some(0), BTreeMap::new(), @@ -483,7 +504,7 @@ fn test_requires_room_version_supports_condition() { }; let rules = PushRules::new(vec![custom_rule]); result = evaluator.run( - &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true), + &FilteredPushRules::py_new(rules, BTreeMap::new(), true, false, true, false), None, None, ); diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 842b13c88b..7e449f2433 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -269,6 +269,10 @@ pub enum KnownCondition { EventMatch(EventMatchCondition), #[serde(rename = "im.nheko.msc3664.related_event_match")] RelatedEventMatch(RelatedEventMatchCondition), + #[serde(rename = "org.matrix.msc3952.is_user_mention")] + IsUserMention, + #[serde(rename = "org.matrix.msc3952.is_room_mention")] + IsRoomMention, ContainsDisplayName, RoomMemberCount { #[serde(skip_serializing_if = "Option::is_none")] @@ -414,6 +418,7 @@ pub struct FilteredPushRules { msc1767_enabled: bool, msc3381_polls_enabled: bool, msc3664_enabled: bool, + msc3952_intentional_mentions: bool, } #[pymethods] @@ -425,6 +430,7 @@ impl FilteredPushRules { msc1767_enabled: bool, msc3381_polls_enabled: bool, msc3664_enabled: bool, + msc3952_intentional_mentions: bool, ) -> Self { Self { push_rules, @@ -432,6 +438,7 @@ impl FilteredPushRules { msc1767_enabled, msc3381_polls_enabled, msc3664_enabled, + msc3952_intentional_mentions, } } @@ -465,6 +472,11 @@ impl FilteredPushRules { return false; } + if !self.msc3952_intentional_mentions && rule.rule_id.contains("org.matrix.msc3952") + { + return false; + } + true }) .map(|r| { @@ -522,6 +534,28 @@ fn test_deserialize_unstable_msc3931_condition() { )); } +#[test] +fn test_deserialize_unstable_msc3952_user_condition() { + let json = r#"{"kind":"org.matrix.msc3952.is_user_mention"}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::IsUserMention) + )); +} + +#[test] +fn test_deserialize_unstable_msc3952_room_condition() { + let json = r#"{"kind":"org.matrix.msc3952.is_room_mention"}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::IsRoomMention) + )); +} + #[test] fn test_deserialize_custom_condition() { let json = r#"{"kind":"custom_tag"}"#; diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index 304ed7111c..588d90c25a 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union from synapse.types import JsonDict @@ -46,6 +46,7 @@ class FilteredPushRules: msc1767_enabled: bool, msc3381_polls_enabled: bool, msc3664_enabled: bool, + msc3952_intentional_mentions: bool, ): ... def rules(self) -> Collection[Tuple[PushRule, bool]]: ... @@ -55,6 +56,8 @@ class PushRuleEvaluator: def __init__( self, flattened_keys: Mapping[str, str], + user_mentions: Set[str], + room_mention: bool, room_member_count: int, sender_power_level: Optional[int], notification_power_levels: Mapping[str, int], diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 6f9239d21c..0f224b34cd 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -233,6 +233,9 @@ class EventContentFields: # The authorising user for joining a restricted room. AUTHORISING_USER: Final = "join_authorised_via_users_server" + # Use for mentioning users. + MSC3952_MENTIONS: Final = "org.matrix.msc3952.mentions" + # an unspecced field added to to-device messages to identify them uniquely-ish TO_DEVICE_MSGID: Final = "org.matrix.msgid" diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 2590c88cde..d2d0270ddd 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -168,3 +168,8 @@ class ExperimentalConfig(Config): # MSC3925: do not replace events with their edits self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False) + + # MSC3952: Intentional mentions + self.msc3952_intentional_mentions = experimental.get( + "msc3952_intentional_mentions", False + ) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index f27ba64d53..deaec19564 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -22,13 +22,20 @@ from typing import ( List, Mapping, Optional, + Set, Tuple, Union, ) from prometheus_client import Counter -from synapse.api.constants import MAIN_TIMELINE, EventTypes, Membership, RelationTypes +from synapse.api.constants import ( + MAIN_TIMELINE, + EventContentFields, + EventTypes, + Membership, + RelationTypes, +) from synapse.api.room_versions import PushRuleRoomFlag, RoomVersion from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event @@ -342,8 +349,24 @@ class BulkPushRuleEvaluator: for user_id, level in notification_levels.items(): notification_levels[user_id] = int(level) + # Pull out any user and room mentions. + mentions = event.content.get(EventContentFields.MSC3952_MENTIONS) + user_mentions: Set[str] = set() + room_mention = False + if isinstance(mentions, dict): + # Remove out any non-string items and convert to a set. + user_mentions_raw = mentions.get("user_ids") + if isinstance(user_mentions_raw, list): + user_mentions = set( + filter(lambda item: isinstance(item, str), user_mentions_raw) + ) + # Room mention is only true if the value is exactly true. + room_mention = mentions.get("room") is True + evaluator = PushRuleEvaluator( _flatten_dict(event, room_version=event.room_version), + user_mentions, + room_mention, room_member_count, sender_power_level, notification_levels, diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 14ca167b34..466a1145b7 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -89,6 +89,7 @@ def _load_rules( msc1767_enabled=experimental_config.msc1767_enabled, msc3664_enabled=experimental_config.msc3664_enabled, msc3381_polls_enabled=experimental_config.msc3381_polls_enabled, + msc3952_intentional_mentions=experimental_config.msc3952_intentional_mentions, ) return filtered_rules diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 9c17a42b65..aba62b5dc8 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any from unittest.mock import patch from twisted.test.proto_helpers import MemoryReactor +from synapse.api.constants import EventContentFields from synapse.api.room_versions import RoomVersions from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator from synapse.rest import admin @@ -126,3 +128,89 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): # Ensure no actions are generated! self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) bulk_evaluator._action_for_event_by_user.assert_not_called() + + @override_config({"experimental_features": {"msc3952_intentional_mentions": True}}) + def test_mentions(self) -> None: + """Test the behavior of an event which includes invalid mentions.""" + bulk_evaluator = BulkPushRuleEvaluator(self.hs) + + sentinel = object() + + def create_and_process(mentions: Any = sentinel) -> bool: + """Returns true iff the `mentions` trigger an event push action.""" + content = {} + if mentions is not sentinel: + content[EventContentFields.MSC3952_MENTIONS] = mentions + + # Create a new message event which should cause a notification. + event, context = self.get_success( + self.event_creation_handler.create_event( + self.requester, + { + "type": "test", + "room_id": self.room_id, + "content": content, + "sender": f"@bob:{self.hs.hostname}", + }, + ) + ) + + # Ensure no actions are generated! + self.get_success( + bulk_evaluator.action_for_events_by_user([(event, context)]) + ) + + # If any actions are generated for this event, return true. + result = self.get_success( + self.hs.get_datastores().main.db_pool.simple_select_list( + table="event_push_actions_staging", + keyvalues={"event_id": event.event_id}, + retcols=("*",), + desc="get_event_push_actions_staging", + ) + ) + return len(result) > 0 + + # Not including the mentions field should not notify. + self.assertFalse(create_and_process()) + # An empty mentions field should not notify. + self.assertFalse(create_and_process({})) + + # Non-dict mentions should be ignored. + mentions: Any + for mentions in (None, True, False, 1, "foo", []): + self.assertFalse(create_and_process(mentions)) + + # A non-list should be ignored. + for mentions in (None, True, False, 1, "foo", {}): + self.assertFalse(create_and_process({"user_ids": mentions})) + + # The Matrix ID appearing anywhere in the list should notify. + self.assertTrue(create_and_process({"user_ids": [self.alice]})) + self.assertTrue(create_and_process({"user_ids": ["@another:test", self.alice]})) + + # Duplicate user IDs should notify. + self.assertTrue(create_and_process({"user_ids": [self.alice, self.alice]})) + + # Invalid entries in the list are ignored. + self.assertFalse(create_and_process({"user_ids": [None, True, False, {}, []]})) + self.assertTrue( + create_and_process({"user_ids": [None, True, False, {}, [], self.alice]}) + ) + + # Room mentions from those without power should not notify. + self.assertFalse(create_and_process({"room": True})) + + # Room mentions from those with power should notify. + self.helper.send_state( + self.room_id, + "m.room.power_levels", + {"notifications": {"room": 0}}, + self.token, + state_key="", + ) + self.assertTrue(create_and_process({"room": True})) + + # Invalid data should not notify. + for mentions in (None, False, 1, "foo", [], {}): + self.assertFalse(create_and_process({"room": mentions})) diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 1b87756b75..9d01c989d4 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Union, cast +from typing import Dict, List, Optional, Set, Union, cast import frozendict @@ -39,7 +39,12 @@ from tests.test_utils.event_injection import create_event, inject_member_event class PushRuleEvaluatorTestCase(unittest.TestCase): def _get_evaluator( - self, content: JsonMapping, related_events: Optional[JsonDict] = None + self, + content: JsonMapping, + *, + user_mentions: Optional[Set[str]] = None, + room_mention: bool = False, + related_events: Optional[JsonDict] = None, ) -> PushRuleEvaluator: event = FrozenEvent( { @@ -57,13 +62,15 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): power_levels: Dict[str, Union[int, Dict[str, int]]] = {} return PushRuleEvaluator( _flatten_dict(event), + user_mentions or set(), + room_mention, room_member_count, sender_power_level, cast(Dict[str, int], power_levels.get("notifications", {})), {} if related_events is None else related_events, - True, - event.room_version.msc3931_push_features, - True, + related_event_match_enabled=True, + room_version_feature_flags=event.room_version.msc3931_push_features, + msc3931_enabled=True, ) def test_display_name(self) -> None: @@ -90,6 +97,51 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): # A display name with spaces should work fine. self.assertTrue(evaluator.matches(condition, "@user:test", "foo bar")) + def test_user_mentions(self) -> None: + """Check for user mentions.""" + condition = {"kind": "org.matrix.msc3952.is_user_mention"} + + # No mentions shouldn't match. + evaluator = self._get_evaluator({}) + self.assertFalse(evaluator.matches(condition, "@user:test", None)) + + # An empty set shouldn't match + evaluator = self._get_evaluator({}, user_mentions=set()) + self.assertFalse(evaluator.matches(condition, "@user:test", None)) + + # The Matrix ID appearing anywhere in the mentions list should match + evaluator = self._get_evaluator({}, user_mentions={"@user:test"}) + self.assertTrue(evaluator.matches(condition, "@user:test", None)) + + evaluator = self._get_evaluator( + {}, user_mentions={"@another:test", "@user:test"} + ) + self.assertTrue(evaluator.matches(condition, "@user:test", None)) + + # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions + # since the BulkPushRuleEvaluator is what handles data sanitisation. + + def test_room_mentions(self) -> None: + """Check for room mentions.""" + condition = {"kind": "org.matrix.msc3952.is_room_mention"} + + # No room mention shouldn't match. + evaluator = self._get_evaluator({}) + self.assertFalse(evaluator.matches(condition, None, None)) + + # Room mention should match. + evaluator = self._get_evaluator({}, room_mention=True) + self.assertTrue(evaluator.matches(condition, None, None)) + + # A room mention and user mention is valid. + evaluator = self._get_evaluator( + {}, user_mentions={"@another:test"}, room_mention=True + ) + self.assertTrue(evaluator.matches(condition, None, None)) + + # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions + # since the BulkPushRuleEvaluator is what handles data sanitisation. + def _assert_matches( self, condition: JsonDict, content: JsonMapping, msg: Optional[str] = None ) -> None: @@ -308,7 +360,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): }, } }, - { + related_events={ "m.in_reply_to": { "event_id": "$parent_event_id", "type": "m.room.message", @@ -408,7 +460,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): }, } }, - { + related_events={ "m.in_reply_to": { "event_id": "$parent_event_id", "type": "m.room.message", -- cgit 1.5.1 From 510d4b06e7d346b4f94cb5598da90c9f668b62bb Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 30 Jan 2023 21:29:30 +0000 Subject: Handle malformed values of `notification.room` in power level events (#14942) * Better test for bad values in power levels events The previous test only checked that Synapse didn't raise an exception, but didn't check that we had correctly interpreted the value of the dodgy power level. It also conflated two things: bad room notification levels, and bad user levels. There _is_ logic for converting the latter to integers, but we should test it separately. * Check we ignore types that don't convert to int * Handle `None` values in `notifications.room` * Changelog * Also test that bad values are rejected by event auth * Docstring * linter scripttttttttt --- changelog.d/14942.bugfix | 1 + synapse/push/bulk_push_rule_evaluator.py | 19 +++++- tests/push/test_bulk_push_rule_evaluator.py | 93 +++++++++++++++++++++++++---- tests/test_event_auth.py | 32 +++++++++- 4 files changed, 128 insertions(+), 17 deletions(-) create mode 100644 changelog.d/14942.bugfix (limited to 'synapse/push') diff --git a/changelog.d/14942.bugfix b/changelog.d/14942.bugfix new file mode 100644 index 0000000000..a3ca3eb7e9 --- /dev/null +++ b/changelog.d/14942.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.68.0 where we were unable to service remote joins in rooms with `@room` notification levels set to `null` in their (malformed) power levels. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index deaec19564..88cfc05d05 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -69,6 +69,9 @@ STATE_EVENT_TYPES_TO_MARK_UNREAD = { } +SENTINEL = object() + + def _should_count_as_unread(event: EventBase, context: EventContext) -> bool: # Exclude rejected and soft-failed events. if context.rejected or event.internal_metadata.is_soft_failed(): @@ -343,11 +346,21 @@ class BulkPushRuleEvaluator: related_events = await self._related_events(event) # It's possible that old room versions have non-integer power levels (floats or - # strings). Workaround this by explicitly converting to int. + # strings; even the occasional `null`). For old rooms, we interpret these as if + # they were integers. Do this here for the `@room` power level threshold. + # Note that this is done automatically for the sender's power level by + # _get_power_levels_and_sender_level in its call to get_user_power_level + # (even for room V10.) notification_levels = power_levels.get("notifications", {}) if not event.room_version.msc3667_int_only_power_levels: - for user_id, level in notification_levels.items(): - notification_levels[user_id] = int(level) + keys = list(notification_levels.keys()) + for key in keys: + level = notification_levels.get(key, SENTINEL) + if level is not SENTINEL and type(level) is not int: + try: + notification_levels[key] = int(level) + except (TypeError, ValueError): + del notification_levels[key] # Pull out any user and room mentions. mentions = event.content.get(EventContentFields.MSC3952_MENTIONS) diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index aba62b5dc8..fda48d9f61 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -15,6 +15,8 @@ from typing import Any from unittest.mock import patch +from parameterized import parameterized + from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventContentFields @@ -48,35 +50,84 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): self.requester = create_requester(self.alice) self.room_id = self.helper.create_room_as( - self.alice, room_version=RoomVersions.V9.identifier, tok=self.token + # This is deliberately set to V9, because we want to test the logic which + # handles stringy power levels. Stringy power levels were outlawed in V10. + self.alice, + room_version=RoomVersions.V9.identifier, + tok=self.token, ) self.event_creation_handler = self.hs.get_event_creation_handler() - def test_action_for_event_by_user_handles_noninteger_power_levels(self) -> None: - """We should convert floats and strings to integers before passing to Rust. + @parameterized.expand( + [ + # The historically-permitted bad values. Alice's notification should be + # allowed if this threshold is at or below her power level (60) + ("100", False), + ("0", True), + (12.34, True), + (60.0, True), + (67.89, False), + # Values that int(...) would not successfully cast should be ignored. + # The room notification level should then default to 50, per the spec, so + # Alice's notification is allowed. + (None, True), + # We haven't seen `"room": []` or `"room": {}` in the wild (yet), but + # let's check them for paranoia's sake. + ([], True), + ({}, True), + ] + ) + def test_action_for_event_by_user_handles_noninteger_room_power_levels( + self, bad_room_level: object, should_permit: bool + ) -> None: + """We should convert strings in `room` to integers before passing to Rust. + + Test this as follows: + - Create a room as Alice and invite two other users Bob and Charlie. + - Set PLs so that Alice has PL 60 and `notifications.room` is set to a bad value. + - Have Alice create a message notifying @room. + - Evaluate notification actions for that message. This should not raise. + - Look in the DB to see if that message triggered a highlight for Bob. + + The test is parameterised with two arguments: + - the bad power level value for "room", before JSON serisalistion + - whether Bob should expect the message to be highlighted Reproduces #14060. A lack of validation: the gift that keeps on giving. """ - - # Alter the power levels in that room to include stringy and floaty levels. - # We need to suppress the validation logic or else it will reject these dodgy - # values. (Presumably this validation was not always present.) + # Join another user to the room, so that there is someone to see Alice's + # @room notification. + bob = self.register_user("bob", "pass") + bob_token = self.login(bob, "pass") + self.helper.join(self.room_id, bob, tok=bob_token) + + # Alter the power levels in that room to include the bad @room notification + # level. We need to suppress + # + # - canonicaljson validation, because canonicaljson forbids floats; + # - the event jsonschema validation, because it will forbid bad values; and + # - the auth rules checks, because they stop us from creating power levels + # with `"room": null`. (We want to test this case, because we have seen it + # in the wild.) + # + # We have seen stringy and null values for "room" in the wild, so presumably + # some of this validation was missing in the past. with patch("synapse.events.validator.validate_canonicaljson"), patch( "synapse.events.validator.jsonschema.validate" - ): - self.helper.send_state( + ), patch("synapse.handlers.event_auth.check_state_dependent_auth_rules"): + pl_event_id = self.helper.send_state( self.room_id, "m.room.power_levels", { - "users": {self.alice: "100"}, # stringy - "notifications": {"room": 100.0}, # float + "users": {self.alice: 60}, + "notifications": {"room": bad_room_level}, }, self.token, state_key="", - ) + )["event_id"] # Create a new message event, and try to evaluate it under the dodgy # power level event. @@ -88,10 +139,11 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): "room_id": self.room_id, "content": { "msgtype": "m.text", - "body": "helo", + "body": "helo @room", }, "sender": self.alice, }, + prev_event_ids=[pl_event_id], ) ) @@ -99,6 +151,21 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): # should not raise self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) + # Did Bob see Alice's @room notification? + highlighted_actions = self.get_success( + self.hs.get_datastores().main.db_pool.simple_select_list( + table="event_push_actions_staging", + keyvalues={ + "event_id": event.event_id, + "user_id": bob, + "highlight": 1, + }, + retcols=("*",), + desc="get_event_push_actions_staging", + ) + ) + self.assertEqual(len(highlighted_actions), int(should_permit)) + @override_config({"push": {"enabled": False}}) def test_action_for_event_by_user_disabled_by_config(self) -> None: """Ensure that push rules are not calculated when disabled in the config""" diff --git a/tests/test_event_auth.py b/tests/test_event_auth.py index f4d9fba0a1..0a7937f1cc 100644 --- a/tests/test_event_auth.py +++ b/tests/test_event_auth.py @@ -13,7 +13,7 @@ # limitations under the License. import unittest -from typing import Collection, Dict, Iterable, List, Optional +from typing import Any, Collection, Dict, Iterable, List, Optional from parameterized import parameterized @@ -728,6 +728,36 @@ class EventAuthTestCase(unittest.TestCase): pl_event.room_version, pl_event2, {("fake_type", "fake_key"): pl_event} ) + def test_room_v10_rejects_other_non_integer_power_levels(self) -> None: + """We should reject PLs that are non-integer, non-string JSON values. + + test_room_v10_rejects_string_power_levels above handles the string case. + """ + + def create_event(pl_event_content: Dict[str, Any]) -> EventBase: + return make_event_from_dict( + { + "room_id": TEST_ROOM_ID, + **_maybe_get_event_id_dict_for_room_version(RoomVersions.V10), + "type": "m.room.power_levels", + "sender": "@test:test.com", + "state_key": "", + "content": pl_event_content, + "signatures": {"test.com": {"ed25519:0": "some9signature"}}, + }, + room_version=RoomVersions.V10, + ) + + contents: Iterable[Dict[str, Any]] = [ + {"notifications": {"room": None}}, + {"users": {"@alice:wonderland": []}}, + {"users_default": {}}, + ] + for content in contents: + event = create_event(content) + with self.assertRaises(SynapseError): + event_auth._check_power_levels(event.room_version, event, {}) + # helpers for making events TEST_DOMAIN = "example.com" -- cgit 1.5.1 From 52700a0bcf2caaa792b94e2a8c12f29d1c61b91e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 3 Feb 2023 11:28:20 -0500 Subject: Support the backwards compatibility features in MSC3952. (#14958) If the feature is enabled and the event has a `m.mentions` property, skip processing of the legacy mentions rules. --- changelog.d/14958.feature | 1 + rust/benches/evaluator.rs | 4 + rust/src/push/evaluator.rs | 19 +++ stubs/synapse/synapse_rust/push.pyi | 1 + synapse/push/bulk_push_rule_evaluator.py | 9 +- tests/push/test_bulk_push_rule_evaluator.py | 191 ++++++++++++++++++++-------- tests/push/test_push_rule_evaluator.py | 18 ++- 7 files changed, 184 insertions(+), 59 deletions(-) create mode 100644 changelog.d/14958.feature (limited to 'synapse/push') diff --git a/changelog.d/14958.feature b/changelog.d/14958.feature new file mode 100644 index 0000000000..8293e99eff --- /dev/null +++ b/changelog.d/14958.feature @@ -0,0 +1 @@ +Experimental support for [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952): intentional mentions. diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index 6b16a3f75b..859d54961c 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -33,6 +33,7 @@ fn bench_match_exact(b: &mut Bencher) { let eval = PushRuleEvaluator::py_new( flattened_keys, + false, BTreeSet::new(), false, 10, @@ -71,6 +72,7 @@ fn bench_match_word(b: &mut Bencher) { let eval = PushRuleEvaluator::py_new( flattened_keys, + false, BTreeSet::new(), false, 10, @@ -109,6 +111,7 @@ fn bench_match_word_miss(b: &mut Bencher) { let eval = PushRuleEvaluator::py_new( flattened_keys, + false, BTreeSet::new(), false, 10, @@ -147,6 +150,7 @@ fn bench_eval_message(b: &mut Bencher) { let eval = PushRuleEvaluator::py_new( flattened_keys, + false, BTreeSet::new(), false, 10, diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index aa71202e43..da6f704c0e 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -68,6 +68,8 @@ pub struct PushRuleEvaluator { /// The "content.body", if any. body: String, + /// True if the event has a mentions property and MSC3952 support is enabled. + has_mentions: bool, /// The user mentions that were part of the message. user_mentions: BTreeSet, /// True if the message is a room message. @@ -105,6 +107,7 @@ impl PushRuleEvaluator { #[new] pub fn py_new( flattened_keys: BTreeMap, + has_mentions: bool, user_mentions: BTreeSet, room_mention: bool, room_member_count: u64, @@ -123,6 +126,7 @@ impl PushRuleEvaluator { Ok(PushRuleEvaluator { flattened_keys, body, + has_mentions, user_mentions, room_mention, room_member_count, @@ -155,6 +159,19 @@ impl PushRuleEvaluator { } let rule_id = &push_rule.rule_id().to_string(); + + // For backwards-compatibility the legacy mention rules are disabled + // if the event contains the 'm.mentions' property (and if the + // experimental feature is enabled, both of these are represented + // by the has_mentions flag). + if self.has_mentions + && (rule_id == "global/override/.m.rule.contains_display_name" + || rule_id == "global/content/.m.rule.contains_user_name" + || rule_id == "global/override/.m.rule.roomnotif") + { + continue; + } + let extev_flag = &RoomVersionFeatures::ExtensibleEvents.as_str().to_string(); let supports_extensible_events = self.room_version_feature_flags.contains(extev_flag); let safe_from_rver_condition = SAFE_EXTENSIBLE_EVENTS_RULE_IDS.contains(rule_id); @@ -441,6 +458,7 @@ fn push_rule_evaluator() { flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string()); let evaluator = PushRuleEvaluator::py_new( flattened_keys, + false, BTreeSet::new(), false, 10, @@ -468,6 +486,7 @@ fn test_requires_room_version_supports_condition() { let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()]; let evaluator = PushRuleEvaluator::py_new( flattened_keys, + false, BTreeSet::new(), false, 10, diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index 588d90c25a..c0af2af3df 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -56,6 +56,7 @@ class PushRuleEvaluator: def __init__( self, flattened_keys: Mapping[str, str], + has_mentions: bool, user_mentions: Set[str], room_mention: bool, room_member_count: int, diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 88cfc05d05..9bf92b9765 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -119,6 +119,9 @@ class BulkPushRuleEvaluator: self.should_calculate_push_rules = self.hs.config.push.enable_push self._related_event_match_enabled = self.hs.config.experimental.msc3664_enabled + self._intentional_mentions_enabled = ( + self.hs.config.experimental.msc3952_intentional_mentions + ) self.room_push_rule_cache_metrics = register_cache( "cache", @@ -364,9 +367,12 @@ class BulkPushRuleEvaluator: # Pull out any user and room mentions. mentions = event.content.get(EventContentFields.MSC3952_MENTIONS) + has_mentions = self._intentional_mentions_enabled and isinstance(mentions, dict) user_mentions: Set[str] = set() room_mention = False - if isinstance(mentions, dict): + if has_mentions: + # mypy seems to have lost the type even though it must be a dict here. + assert isinstance(mentions, dict) # Remove out any non-string items and convert to a set. user_mentions_raw = mentions.get("user_ids") if isinstance(user_mentions_raw, list): @@ -378,6 +384,7 @@ class BulkPushRuleEvaluator: evaluator = PushRuleEvaluator( _flatten_dict(event, room_version=event.room_version), + has_mentions, user_mentions, room_mention, room_member_count, diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index fda48d9f61..3b2d082dcb 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any +from typing import Any, Optional from unittest.mock import patch from parameterized import parameterized @@ -25,7 +25,7 @@ from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator from synapse.rest import admin from synapse.rest.client import login, register, room from synapse.server import HomeServer -from synapse.types import create_requester +from synapse.types import JsonDict, create_requester from synapse.util import Clock from tests.test_utils import simple_async_mock @@ -196,77 +196,144 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) bulk_evaluator._action_for_event_by_user.assert_not_called() - @override_config({"experimental_features": {"msc3952_intentional_mentions": True}}) - def test_mentions(self) -> None: - """Test the behavior of an event which includes invalid mentions.""" - bulk_evaluator = BulkPushRuleEvaluator(self.hs) - - sentinel = object() - - def create_and_process(mentions: Any = sentinel) -> bool: - """Returns true iff the `mentions` trigger an event push action.""" - content = {} - if mentions is not sentinel: - content[EventContentFields.MSC3952_MENTIONS] = mentions - - # Create a new message event which should cause a notification. - event, context = self.get_success( - self.event_creation_handler.create_event( - self.requester, - { - "type": "test", - "room_id": self.room_id, - "content": content, - "sender": f"@bob:{self.hs.hostname}", - }, - ) + def _create_and_process( + self, bulk_evaluator: BulkPushRuleEvaluator, content: Optional[JsonDict] = None + ) -> bool: + """Returns true iff the `mentions` trigger an event push action.""" + # Create a new message event which should cause a notification. + event, context = self.get_success( + self.event_creation_handler.create_event( + self.requester, + { + "type": "test", + "room_id": self.room_id, + "content": content or {}, + "sender": f"@bob:{self.hs.hostname}", + }, ) + ) - # Ensure no actions are generated! - self.get_success( - bulk_evaluator.action_for_events_by_user([(event, context)]) - ) + # Execute the push rule machinery. + self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) - # If any actions are generated for this event, return true. - result = self.get_success( - self.hs.get_datastores().main.db_pool.simple_select_list( - table="event_push_actions_staging", - keyvalues={"event_id": event.event_id}, - retcols=("*",), - desc="get_event_push_actions_staging", - ) + # If any actions are generated for this event, return true. + result = self.get_success( + self.hs.get_datastores().main.db_pool.simple_select_list( + table="event_push_actions_staging", + keyvalues={"event_id": event.event_id}, + retcols=("*",), + desc="get_event_push_actions_staging", ) - return len(result) > 0 + ) + return len(result) > 0 + + @override_config({"experimental_features": {"msc3952_intentional_mentions": True}}) + def test_user_mentions(self) -> None: + """Test the behavior of an event which includes invalid user mentions.""" + bulk_evaluator = BulkPushRuleEvaluator(self.hs) # Not including the mentions field should not notify. - self.assertFalse(create_and_process()) + self.assertFalse(self._create_and_process(bulk_evaluator)) # An empty mentions field should not notify. - self.assertFalse(create_and_process({})) + self.assertFalse( + self._create_and_process( + bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {}} + ) + ) # Non-dict mentions should be ignored. mentions: Any for mentions in (None, True, False, 1, "foo", []): - self.assertFalse(create_and_process(mentions)) + self.assertFalse( + self._create_and_process( + bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: mentions} + ) + ) # A non-list should be ignored. for mentions in (None, True, False, 1, "foo", {}): - self.assertFalse(create_and_process({"user_ids": mentions})) + self.assertFalse( + self._create_and_process( + bulk_evaluator, + {EventContentFields.MSC3952_MENTIONS: {"user_ids": mentions}}, + ) + ) # The Matrix ID appearing anywhere in the list should notify. - self.assertTrue(create_and_process({"user_ids": [self.alice]})) - self.assertTrue(create_and_process({"user_ids": ["@another:test", self.alice]})) + self.assertTrue( + self._create_and_process( + bulk_evaluator, + {EventContentFields.MSC3952_MENTIONS: {"user_ids": [self.alice]}}, + ) + ) + self.assertTrue( + self._create_and_process( + bulk_evaluator, + { + EventContentFields.MSC3952_MENTIONS: { + "user_ids": ["@another:test", self.alice] + } + }, + ) + ) # Duplicate user IDs should notify. - self.assertTrue(create_and_process({"user_ids": [self.alice, self.alice]})) + self.assertTrue( + self._create_and_process( + bulk_evaluator, + { + EventContentFields.MSC3952_MENTIONS: { + "user_ids": [self.alice, self.alice] + } + }, + ) + ) # Invalid entries in the list are ignored. - self.assertFalse(create_and_process({"user_ids": [None, True, False, {}, []]})) + self.assertFalse( + self._create_and_process( + bulk_evaluator, + { + EventContentFields.MSC3952_MENTIONS: { + "user_ids": [None, True, False, {}, []] + } + }, + ) + ) self.assertTrue( - create_and_process({"user_ids": [None, True, False, {}, [], self.alice]}) + self._create_and_process( + bulk_evaluator, + { + EventContentFields.MSC3952_MENTIONS: { + "user_ids": [None, True, False, {}, [], self.alice] + } + }, + ) ) + # The legacy push rule should not mention if the mentions field exists. + self.assertFalse( + self._create_and_process( + bulk_evaluator, + { + "body": self.alice, + "msgtype": "m.text", + EventContentFields.MSC3952_MENTIONS: {}, + }, + ) + ) + + @override_config({"experimental_features": {"msc3952_intentional_mentions": True}}) + def test_room_mentions(self) -> None: + """Test the behavior of an event which includes invalid room mentions.""" + bulk_evaluator = BulkPushRuleEvaluator(self.hs) + # Room mentions from those without power should not notify. - self.assertFalse(create_and_process({"room": True})) + self.assertFalse( + self._create_and_process( + bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {"room": True}} + ) + ) # Room mentions from those with power should notify. self.helper.send_state( @@ -276,8 +343,30 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): self.token, state_key="", ) - self.assertTrue(create_and_process({"room": True})) + self.assertTrue( + self._create_and_process( + bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: {"room": True}} + ) + ) # Invalid data should not notify. + mentions: Any for mentions in (None, False, 1, "foo", [], {}): - self.assertFalse(create_and_process({"room": mentions})) + self.assertFalse( + self._create_and_process( + bulk_evaluator, + {EventContentFields.MSC3952_MENTIONS: {"room": mentions}}, + ) + ) + + # The legacy push rule should not mention if the mentions field exists. + self.assertFalse( + self._create_and_process( + bulk_evaluator, + { + "body": "@room", + "msgtype": "m.text", + EventContentFields.MSC3952_MENTIONS: {}, + }, + ) + ) diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 9d01c989d4..81661e181b 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -42,6 +42,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): self, content: JsonMapping, *, + has_mentions: bool = False, user_mentions: Optional[Set[str]] = None, room_mention: bool = False, related_events: Optional[JsonDict] = None, @@ -62,6 +63,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): power_levels: Dict[str, Union[int, Dict[str, int]]] = {} return PushRuleEvaluator( _flatten_dict(event), + has_mentions, user_mentions or set(), room_mention, room_member_count, @@ -102,19 +104,21 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): condition = {"kind": "org.matrix.msc3952.is_user_mention"} # No mentions shouldn't match. - evaluator = self._get_evaluator({}) + evaluator = self._get_evaluator({}, has_mentions=True) self.assertFalse(evaluator.matches(condition, "@user:test", None)) # An empty set shouldn't match - evaluator = self._get_evaluator({}, user_mentions=set()) + evaluator = self._get_evaluator({}, has_mentions=True, user_mentions=set()) self.assertFalse(evaluator.matches(condition, "@user:test", None)) # The Matrix ID appearing anywhere in the mentions list should match - evaluator = self._get_evaluator({}, user_mentions={"@user:test"}) + evaluator = self._get_evaluator( + {}, has_mentions=True, user_mentions={"@user:test"} + ) self.assertTrue(evaluator.matches(condition, "@user:test", None)) evaluator = self._get_evaluator( - {}, user_mentions={"@another:test", "@user:test"} + {}, has_mentions=True, user_mentions={"@another:test", "@user:test"} ) self.assertTrue(evaluator.matches(condition, "@user:test", None)) @@ -126,16 +130,16 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): condition = {"kind": "org.matrix.msc3952.is_room_mention"} # No room mention shouldn't match. - evaluator = self._get_evaluator({}) + evaluator = self._get_evaluator({}, has_mentions=True) self.assertFalse(evaluator.matches(condition, None, None)) # Room mention should match. - evaluator = self._get_evaluator({}, room_mention=True) + evaluator = self._get_evaluator({}, has_mentions=True, room_mention=True) self.assertTrue(evaluator.matches(condition, None, None)) # A room mention and user mention is valid. evaluator = self._get_evaluator( - {}, user_mentions={"@another:test"}, room_mention=True + {}, has_mentions=True, user_mentions={"@another:test"}, room_mention=True ) self.assertTrue(evaluator.matches(condition, None, None)) -- cgit 1.5.1 From f0cae26d58f6f907236112be5f4eaecc376b1304 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 3 Feb 2023 11:48:13 -0500 Subject: Add a docstring & tests for _flatten_dict. (#14981) --- changelog.d/14981.misc | 1 + synapse/push/bulk_push_rule_evaluator.py | 23 +++++++++++++++++++++++ tests/push/test_push_rule_evaluator.py | 26 +++++++++++++++++++++++++- 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14981.misc (limited to 'synapse/push') diff --git a/changelog.d/14981.misc b/changelog.d/14981.misc new file mode 100644 index 0000000000..68ac8335fc --- /dev/null +++ b/changelog.d/14981.misc @@ -0,0 +1 @@ +Add tests for `_flatten_dict`. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 9bf92b9765..20369f3dfe 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -473,6 +473,29 @@ def _flatten_dict( prefix: Optional[List[str]] = None, result: Optional[Dict[str, str]] = None, ) -> Dict[str, str]: + """ + Given a JSON dictionary (or event) which might contain sub dictionaries, + flatten it into a single layer dictionary by combining the keys & sub-keys. + + Any (non-dictionary), non-string value is dropped. + + Transforms: + + {"foo": {"bar": "test"}} + + To: + + {"foo.bar": "test"} + + Args: + d: The event or content to continue flattening. + room_version: The room version object. + prefix: The key prefix (from outer dictionaries). + result: The result to mutate. + + Returns: + The resulting dictionary. + """ if prefix is None: prefix = [] if result is None: diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 81661e181b..7c430c4ecb 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Optional, Set, Union, cast +from typing import Any, Dict, List, Optional, Set, Union, cast import frozendict @@ -37,6 +37,30 @@ from tests import unittest from tests.test_utils.event_injection import create_event, inject_member_event +class FlattenDictTestCase(unittest.TestCase): + def test_simple(self) -> None: + """Test a dictionary that isn't modified.""" + input = {"foo": "abc"} + self.assertEqual(input, _flatten_dict(input)) + + def test_nested(self) -> None: + """Nested dictionaries become dotted paths.""" + input = {"foo": {"bar": "abc"}} + self.assertEqual({"foo.bar": "abc"}, _flatten_dict(input)) + + def test_non_string(self) -> None: + """Non-string items are dropped.""" + input: Dict[str, Any] = { + "woo": "woo", + "foo": True, + "bar": 1, + "baz": None, + "fuzz": [], + "boo": {}, + } + self.assertEqual({"woo": "woo"}, _flatten_dict(input)) + + class PushRuleEvaluatorTestCase(unittest.TestCase): def _get_evaluator( self, -- cgit 1.5.1