From 507c1cb3309e989d84ec3ff9557a96ae1fc7f369 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 11 Aug 2022 11:42:24 +0100 Subject: Update the rejected state of events during resync (#13459) Events can be un-rejected or newly-rejected during resync, so ensure we update the database and caches when that happens. --- synapse/storage/databases/main/events_worker.py | 60 +++++++++++++++++++++++++ synapse/storage/databases/main/state.py | 5 +++ 2 files changed, 65 insertions(+) (limited to 'synapse/storage/databases') diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index e9ff6cfb34..b07d812ae2 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -2200,3 +2200,63 @@ class EventsWorkerStore(SQLBaseStore): (room_id,), ) return [row[0] for row in txn] + + def mark_event_rejected_txn( + self, + txn: LoggingTransaction, + event_id: str, + rejection_reason: Optional[str], + ) -> None: + """Mark an event that was previously accepted as rejected, or vice versa + + This can happen, for example, when resyncing state during a faster join. + + Args: + txn: + event_id: ID of event to update + rejection_reason: reason it has been rejected, or None if it is now accepted + """ + if rejection_reason is None: + logger.info( + "Marking previously-processed event %s as accepted", + event_id, + ) + self.db_pool.simple_delete_txn( + txn, + "rejections", + keyvalues={"event_id": event_id}, + ) + else: + logger.info( + "Marking previously-processed event %s as rejected(%s)", + event_id, + rejection_reason, + ) + self.db_pool.simple_upsert_txn( + txn, + table="rejections", + keyvalues={"event_id": event_id}, + values={ + "reason": rejection_reason, + "last_check": self._clock.time_msec(), + }, + ) + self.db_pool.simple_update_txn( + txn, + table="events", + keyvalues={"event_id": event_id}, + updatevalues={"rejection_reason": rejection_reason}, + ) + + self.invalidate_get_event_cache_after_txn(txn, event_id) + + # TODO(faster_joins): invalidate the cache on workers. Ideally we'd just + # call '_send_invalidation_to_replication', but we actually need the other + # end to call _invalidate_local_get_event_cache() rather than (just) + # _get_event_cache.invalidate(). + # + # One solution might be to (somehow) get the workers to call + # _invalidate_caches_for_event() (though that will invalidate more than + # strictly necessary). + # + # https://github.com/matrix-org/synapse/issues/12994 diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index f70705a0af..0b10af0e58 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -430,6 +430,11 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): updatevalues={"state_group": state_group}, ) + # the event may now be rejected where it was not before, or vice versa, + # in which case we need to update the rejected flags. + if bool(context.rejected) != (event.rejected_reason is not None): + self.mark_event_rejected_txn(txn, event.event_id, context.rejected) + self.db_pool.simple_delete_one_txn( txn, table="partial_state_events", -- cgit 1.5.1 From 46bd7f4ed9020bbed459c03a11c26d7f7c3093b0 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 15 Aug 2022 09:33:17 -0400 Subject: Clarifications for event push action processing. (#13485) * Clarifies comments. * Fixes an erroneous comment (about return type) added in #13455 (ec24813220f9d54108924dc04aecd24555277b99). * Clarifies the name of a variable. * Simplifies logic of pulling out the latest join for the requesting user. --- changelog.d/13485.misc | 1 + .../storage/databases/main/event_push_actions.py | 53 ++++++++++++++-------- synapse/storage/databases/main/receipts.py | 2 +- 3 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 changelog.d/13485.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/13485.misc b/changelog.d/13485.misc new file mode 100644 index 0000000000..c75712b9ff --- /dev/null +++ b/changelog.d/13485.misc @@ -0,0 +1 @@ +Add comments about how event push actions are rotated. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 161aad0f89..f62aa45ca1 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -227,7 +227,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: str, ) -> NotifCounts: """Get the notification count, the highlight count and the unread message count - for a given user in a given room after the given read receipt. + for a given user in a given room after their latest read receipt. Note that this function assumes the user to be a current member of the room, since it's either called by the sync handler to handle joined room entries, or by @@ -238,9 +238,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: The user to retrieve the counts for. Returns - A dict containing the counts mentioned earlier in this docstring, - respectively under the keys "notify_count", "highlight_count" and - "unread_count". + A NotifCounts object containing the notification count, the highlight count + and the unread message count. """ return await self.db_pool.runInteraction( "get_unread_event_push_actions_by_room", @@ -255,6 +254,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas room_id: str, user_id: str, ) -> NotifCounts: + # Get the stream ordering of the user's latest receipt in the room. result = self.get_last_receipt_for_user_txn( txn, user_id, @@ -266,13 +266,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ), ) - stream_ordering = None if result: _, stream_ordering = result - if stream_ordering is None: - # Either last_read_event_id is None, or it's an event we don't have (e.g. - # because it's been purged), in which case retrieve the stream ordering for + else: + # If the user has no receipts in the room, retrieve the stream ordering for # the latest membership event from this user in this room (which we assume is # a join). event_id = self.db_pool.simple_select_one_onecol_txn( @@ -289,10 +287,26 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) def _get_unread_counts_by_pos_txn( - self, txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int + self, + txn: LoggingTransaction, + room_id: str, + user_id: str, + receipt_stream_ordering: int, ) -> NotifCounts: """Get the number of unread messages for a user/room that have happened since the given stream ordering. + + Args: + txn: The database transaction. + room_id: The room ID to get unread counts for. + user_id: The user ID to get unread counts for. + receipt_stream_ordering: The stream ordering of the user's latest + receipt in the room. If there are no receipts, the stream ordering + of the user's join event. + + Returns + A NotifCounts object containing the notification count, the highlight count + and the unread message count. """ counts = NotifCounts() @@ -320,7 +334,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas OR last_receipt_stream_ordering = ? ) """, - (room_id, user_id, stream_ordering, stream_ordering), + (room_id, user_id, receipt_stream_ordering, receipt_stream_ordering), ) row = txn.fetchone() @@ -338,17 +352,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas AND stream_ordering > ? AND highlight = 1 """ - txn.execute(sql, (user_id, room_id, stream_ordering)) + txn.execute(sql, (user_id, room_id, receipt_stream_ordering)) row = txn.fetchone() if row: counts.highlight_count += row[0] # Finally we need to count push actions that aren't included in the - # summary returned above, e.g. recent events that haven't been - # summarised yet, or the summary is empty due to a recent read receipt. - stream_ordering = max(stream_ordering, summary_stream_ordering) + # summary returned above. This might be due to recent events that haven't + # been summarised yet or the summary is out of date due to a recent read + # receipt. + start_unread_stream_ordering = max( + receipt_stream_ordering, summary_stream_ordering + ) notify_count, unread_count = self._get_notif_unread_count_for_user_room( - txn, room_id, user_id, stream_ordering + txn, room_id, user_id, start_unread_stream_ordering ) counts.notify_count += notify_count @@ -1151,8 +1168,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas txn: The database transaction. old_rotate_stream_ordering: The previous maximum event stream ordering. rotate_to_stream_ordering: The new maximum event stream ordering to summarise. - - Returns whether the archiving process has caught up or not. """ # Calculate the new counts that should be upserted into event_push_summary @@ -1238,9 +1253,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas (rotate_to_stream_ordering,), ) - async def _remove_old_push_actions_that_have_rotated( - self, - ) -> None: + async def _remove_old_push_actions_that_have_rotated(self) -> None: """Clear out old push actions that have been summarised.""" # We want to clear out anything that is older than a day that *has* already diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 0090c9f225..124c70ad37 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -161,7 +161,7 @@ class ReceiptsWorkerStore(SQLBaseStore): receipt_type: The receipt types to fetch. Returns: - The latest receipt, if one exists. + The event ID and stream ordering of the latest receipt, if one exists. """ clause, args = make_in_list_sql_clause( -- cgit 1.5.1 From 344a2f767c636259412f7fc2914c1554a5c4dc1d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Aug 2022 13:41:23 -0500 Subject: Instrument `FederationStateIdsServlet` - `/state_ids` (#13499) Instrument FederationStateIdsServlet - `/state_ids` so it's easier to follow what's going on in Jaeger when viewing a trace. --- changelog.d/13499.misc | 1 + synapse/federation/federation_server.py | 11 ++++++++++- synapse/handlers/federation.py | 4 +++- synapse/storage/databases/main/event_federation.py | 3 +++ synapse/util/ratelimitutils.py | 4 ++++ 5 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13499.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/13499.misc b/changelog.d/13499.misc new file mode 100644 index 0000000000..99dbcebec8 --- /dev/null +++ b/changelog.d/13499.misc @@ -0,0 +1 @@ +Instrument `FederationStateIdsServlet` (`/state_ids`) for understandable traces in Jaeger. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index db4b83a505..75fbc6073d 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -61,7 +61,12 @@ from synapse.logging.context import ( nested_logging_context, run_in_background, ) -from synapse.logging.opentracing import log_kv, start_active_span_from_edu, trace +from synapse.logging.opentracing import ( + log_kv, + start_active_span_from_edu, + tag_args, + trace, +) from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -547,6 +552,8 @@ class FederationServer(FederationBase): return 200, resp + @trace + @tag_args async def on_state_ids_request( self, origin: str, room_id: str, event_id: str ) -> Tuple[int, JsonDict]: @@ -569,6 +576,8 @@ class FederationServer(FederationBase): return 200, resp + @trace + @tag_args async def _on_state_ids_request_compute( self, room_id: str, event_id: str ) -> JsonDict: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5042236742..6f5ab86ac4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,7 +59,7 @@ from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import tag_args, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -1081,6 +1081,8 @@ class FederationHandler: return event + @trace + @tag_args async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: """Returns the state at the event. i.e. not including said event.""" event = await self.store.get_event(event_id, check_room_id=room_id) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index eec55b6478..0bc8401f2b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -33,6 +33,7 @@ from synapse.api.constants import MAX_DEPTH, EventTypes from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict +from synapse.logging.opentracing import tag_args, trace from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( @@ -126,6 +127,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) return await self.get_events_as_list(event_ids) + @trace + @tag_args async def get_auth_chain_ids( self, room_id: str, diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 6394cc39ac..e1beaec5a3 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -27,6 +27,7 @@ from synapse.logging.context import ( make_deferred_yieldable, run_in_background, ) +from synapse.logging.opentracing import start_active_span from synapse.util import Clock if typing.TYPE_CHECKING: @@ -176,8 +177,11 @@ class _PerHostRatelimiter: # Ensure that we've properly cleaned up. self.sleeping_requests.discard(request_id) self.ready_request_queue.pop(request_id, None) + wait_span_scope.__exit__(None, None, None) return r + wait_span_scope = start_active_span("ratelimit wait") + wait_span_scope.__enter__() ret_defer.addCallbacks(on_start, on_err) ret_defer.addBoth(on_both) return make_deferred_yieldable(ret_defer) -- cgit 1.5.1 From 5442891cbca67d3af27c448791589e0b9abeb7f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Aug 2022 12:22:17 +0100 Subject: Make push rules use proper structures. (#13522) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves load times for push rules: | Version | Time per user | Time for 1k users | | -------------------- | ------------- | ----------------- | | Before | 138 µs | 138ms | | Now (with custom) | 2.11 µs | 2.11ms | | Now (without custom) | 49.7 ns | 0.05 ms | This therefore has a large impact on send times for rooms with large numbers of local users in the room. --- changelog.d/13522.misc | 1 + synapse/push/baserules.py | 518 +++++++++++++-------- synapse/push/bulk_push_rule_evaluator.py | 37 +- synapse/push/clientformat.py | 68 +-- synapse/push/push_rule_evaluator.py | 27 +- .../storage/databases/main/event_push_actions.py | 22 +- synapse/storage/databases/main/push_rule.py | 121 +++-- tests/handlers/test_deactivate_account.py | 33 +- 8 files changed, 494 insertions(+), 333 deletions(-) create mode 100644 changelog.d/13522.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/13522.misc b/changelog.d/13522.misc new file mode 100644 index 0000000000..0a8827205d --- /dev/null +++ b/changelog.d/13522.misc @@ -0,0 +1 @@ +Improve performance of sending messages in rooms with thousands of local users. diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index 6c0cc5a6ce..c3e072033c 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -14,128 +14,224 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy -from typing import Any, Dict, List - -from synapse.push.rulekinds import PRIORITY_CLASS_INVERSE_MAP, PRIORITY_CLASS_MAP +""" +Push rules is the system used to determine which events trigger a push (and a +bump in notification counts). + +This consists of a list of "push rules" for each user, where a push rule is a +pair of "conditions" and "actions". When a user receives an event Synapse +iterates over the list of push rules until it finds one where all the conditions +match the event, at which point "actions" describe the outcome (e.g. notify, +highlight, etc). + +Push rules are split up into 5 different "kinds" (aka "priority classes"), which +are run in order: + 1. Override — highest priority rules, e.g. always ignore notices + 2. Content — content specific rules, e.g. @ notifications + 3. Room — per room rules, e.g. enable/disable notifications for all messages + in a room + 4. Sender — per sender rules, e.g. never notify for messages from a given + user + 5. Underride — the lowest priority "default" rules, e.g. notify for every + message. + +The set of "base rules" are the list of rules that every user has by default. A +user can modify their copy of the push rules in one of three ways: + + 1. Adding a new push rule of a certain kind + 2. Changing the actions of a base rule + 3. Enabling/disabling a base rule. + +The base rules are split into whether they come before or after a particular +kind, so the order of push rule evaluation would be: base rules for before +"override" kind, user defined "override" rules, base rules after "override" +kind, etc, etc. +""" + +import itertools +from typing import Dict, Iterator, List, Mapping, Sequence, Tuple, Union + +import attr + +from synapse.config.experimental import ExperimentalConfig +from synapse.push.rulekinds import PRIORITY_CLASS_MAP + + +@attr.s(auto_attribs=True, slots=True, frozen=True) +class PushRule: + """A push rule + + Attributes: + rule_id: a unique ID for this rule + priority_class: what "kind" of push rule this is (see + `PRIORITY_CLASS_MAP` for mapping between int and kind) + conditions: the sequence of conditions that all need to match + actions: the actions to apply if all conditions are met + default: is this a base rule? + default_enabled: is this enabled by default? + """ + rule_id: str + priority_class: int + conditions: Sequence[Mapping[str, str]] + actions: Sequence[Union[str, Mapping]] + default: bool = False + default_enabled: bool = True -def list_with_base_rules(rawrules: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Combine the list of rules set by the user with the default push rules - Args: - rawrules: The rules the user has modified or set. +@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False) +class PushRules: + """A collection of push rules for an account. - Returns: - A new list with the rules set by the user combined with the defaults. + Can be iterated over, producing push rules in priority order. """ - ruleslist = [] - # Grab the base rules that the user has modified. - # The modified base rules have a priority_class of -1. - modified_base_rules = {r["rule_id"]: r for r in rawrules if r["priority_class"] < 0} + # A mapping from rule ID to push rule that overrides a base rule. These will + # be returned instead of the base rule. + overriden_base_rules: Dict[str, PushRule] = attr.Factory(dict) + + # The following stores the custom push rules at each priority class. + # + # We keep these separate (rather than combining into one big list) to avoid + # copying the base rules around all the time. + override: List[PushRule] = attr.Factory(list) + content: List[PushRule] = attr.Factory(list) + room: List[PushRule] = attr.Factory(list) + sender: List[PushRule] = attr.Factory(list) + underride: List[PushRule] = attr.Factory(list) + + def __iter__(self) -> Iterator[PushRule]: + # When iterating over the push rules we need to return the base rules + # interspersed at the correct spots. + for rule in itertools.chain( + BASE_PREPEND_OVERRIDE_RULES, + self.override, + BASE_APPEND_OVERRIDE_RULES, + self.content, + BASE_APPEND_CONTENT_RULES, + self.room, + self.sender, + self.underride, + BASE_APPEND_UNDERRIDE_RULES, + ): + # Check if a base rule has been overriden by a custom rule. If so + # return that instead. + override_rule = self.overriden_base_rules.get(rule.rule_id) + if override_rule: + yield override_rule + else: + yield rule + + def __len__(self) -> int: + # The length is mostly used by caches to get a sense of "size" / amount + # of memory this object is using, so we only count the number of custom + # rules. + return ( + len(self.overriden_base_rules) + + len(self.override) + + len(self.content) + + len(self.room) + + len(self.sender) + + len(self.underride) + ) - # Remove the modified base rules from the list, They'll be added back - # in the default positions in the list. - rawrules = [r for r in rawrules if r["priority_class"] >= 0] - # shove the server default rules for each kind onto the end of each - current_prio_class = list(PRIORITY_CLASS_INVERSE_MAP)[-1] +@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False) +class FilteredPushRules: + """A wrapper around `PushRules` that filters out disabled experimental push + rules, and includes the "enabled" state for each rule when iterated over. + """ - ruleslist.extend( - make_base_prepend_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules - ) - ) + push_rules: PushRules + enabled_map: Dict[str, bool] + experimental_config: ExperimentalConfig - for r in rawrules: - if r["priority_class"] < current_prio_class: - while r["priority_class"] < current_prio_class: - ruleslist.extend( - make_base_append_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], - modified_base_rules, - ) - ) - current_prio_class -= 1 - if current_prio_class > 0: - ruleslist.extend( - make_base_prepend_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], - modified_base_rules, - ) - ) - - ruleslist.append(r) - - while current_prio_class > 0: - ruleslist.extend( - make_base_append_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules - ) - ) - current_prio_class -= 1 - if current_prio_class > 0: - ruleslist.extend( - make_base_prepend_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules - ) - ) + def __iter__(self) -> Iterator[Tuple[PushRule, bool]]: + for rule in self.push_rules: + if not _is_experimental_rule_enabled( + rule.rule_id, self.experimental_config + ): + continue - return ruleslist + enabled = self.enabled_map.get(rule.rule_id, rule.default_enabled) + yield rule, enabled -def make_base_append_rules( - kind: str, modified_base_rules: Dict[str, Dict[str, Any]] -) -> List[Dict[str, Any]]: - rules = [] + def __len__(self) -> int: + return len(self.push_rules) - if kind == "override": - rules = BASE_APPEND_OVERRIDE_RULES - elif kind == "underride": - rules = BASE_APPEND_UNDERRIDE_RULES - elif kind == "content": - rules = BASE_APPEND_CONTENT_RULES - # Copy the rules before modifying them - rules = copy.deepcopy(rules) - for r in rules: - # Only modify the actions, keep the conditions the same. - assert isinstance(r["rule_id"], str) - modified = modified_base_rules.get(r["rule_id"]) - if modified: - r["actions"] = modified["actions"] +DEFAULT_EMPTY_PUSH_RULES = PushRules() - return rules +def compile_push_rules(rawrules: List[PushRule]) -> PushRules: + """Given a set of custom push rules return a `PushRules` instance (which + includes the base rules). + """ + + if not rawrules: + # Fast path to avoid allocating empty lists when there are no custom + # rules for the user. + return DEFAULT_EMPTY_PUSH_RULES -def make_base_prepend_rules( - kind: str, - modified_base_rules: Dict[str, Dict[str, Any]], -) -> List[Dict[str, Any]]: - rules = [] + rules = PushRules() - if kind == "override": - rules = BASE_PREPEND_OVERRIDE_RULES + for rule in rawrules: + # We need to decide which bucket each custom push rule goes into. - # Copy the rules before modifying them - rules = copy.deepcopy(rules) - for r in rules: - # Only modify the actions, keep the conditions the same. - assert isinstance(r["rule_id"], str) - modified = modified_base_rules.get(r["rule_id"]) - if modified: - r["actions"] = modified["actions"] + # If it has the same ID as a base rule then it overrides that... + overriden_base_rule = BASE_RULES_BY_ID.get(rule.rule_id) + if overriden_base_rule: + rules.overriden_base_rules[rule.rule_id] = attr.evolve( + overriden_base_rule, actions=rule.actions + ) + continue + + # ... otherwise it gets added to the appropriate priority class bucket + collection: List[PushRule] + if rule.priority_class == 5: + collection = rules.override + elif rule.priority_class == 4: + collection = rules.content + elif rule.priority_class == 3: + collection = rules.room + elif rule.priority_class == 2: + collection = rules.sender + elif rule.priority_class == 1: + collection = rules.underride + else: + raise Exception(f"Unknown priority class: {rule.priority_class}") + + collection.append(rule) return rules -# We have to annotate these types, otherwise mypy infers them as -# `List[Dict[str, Sequence[Collection[str]]]]`. -BASE_APPEND_CONTENT_RULES: List[Dict[str, Any]] = [ - { - "rule_id": "global/content/.m.rule.contains_user_name", - "conditions": [ +def _is_experimental_rule_enabled( + rule_id: str, experimental_config: ExperimentalConfig +) -> bool: + """Used by `FilteredPushRules` to filter out experimental rules when they + have not been enabled. + """ + if ( + rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl" + and not experimental_config.msc3786_enabled + ): + return False + if ( + rule_id == "global/underride/.org.matrix.msc3772.thread_reply" + and not experimental_config.msc3772_enabled + ): + return False + return True + + +BASE_APPEND_CONTENT_RULES = [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["content"], + rule_id="global/content/.m.rule.contains_user_name", + conditions=[ { "kind": "event_match", "key": "content.body", @@ -143,29 +239,33 @@ BASE_APPEND_CONTENT_RULES: List[Dict[str, Any]] = [ "pattern_type": "user_localpart", } ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight"}, ], - } + ) ] -BASE_PREPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ - { - "rule_id": "global/override/.m.rule.master", - "enabled": False, - "conditions": [], - "actions": ["dont_notify"], - } +BASE_PREPEND_OVERRIDE_RULES = [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.master", + default_enabled=False, + conditions=[], + actions=["dont_notify"], + ) ] -BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ - { - "rule_id": "global/override/.m.rule.suppress_notices", - "conditions": [ +BASE_APPEND_OVERRIDE_RULES = [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.suppress_notices", + conditions=[ { "kind": "event_match", "key": "content.msgtype", @@ -173,13 +273,15 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_suppress_notices", } ], - "actions": ["dont_notify"], - }, + actions=["dont_notify"], + ), # NB. .m.rule.invite_for_me must be higher prio than .m.rule.member_event # otherwise invites will be matched by .m.rule.member_event - { - "rule_id": "global/override/.m.rule.invite_for_me", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.invite_for_me", + conditions=[ { "kind": "event_match", "key": "type", @@ -195,21 +297,23 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ # Match the requester's MXID. {"kind": "event_match", "key": "state_key", "pattern_type": "user_id"}, ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight", "value": False}, ], - }, + ), # Will we sometimes want to know about people joining and leaving? # Perhaps: if so, this could be expanded upon. Seems the most usual case # is that we don't though. We add this override rule so that even if # the room rule is set to notify, we don't get notifications about # join/leave/avatar/displayname events. # See also: https://matrix.org/jira/browse/SYN-607 - { - "rule_id": "global/override/.m.rule.member_event", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.member_event", + conditions=[ { "kind": "event_match", "key": "type", @@ -217,24 +321,28 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_member", } ], - "actions": ["dont_notify"], - }, + actions=["dont_notify"], + ), # This was changed from underride to override so it's closer in priority # to the content rules where the user name highlight rule lives. This # way a room rule is lower priority than both but a custom override rule # is higher priority than both. - { - "rule_id": "global/override/.m.rule.contains_display_name", - "conditions": [{"kind": "contains_display_name"}], - "actions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.contains_display_name", + conditions=[{"kind": "contains_display_name"}], + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight"}, ], - }, - { - "rule_id": "global/override/.m.rule.roomnotif", - "conditions": [ + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.roomnotif", + conditions=[ { "kind": "event_match", "key": "content.body", @@ -247,11 +355,13 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_roomnotif_pl", }, ], - "actions": ["notify", {"set_tweak": "highlight", "value": True}], - }, - { - "rule_id": "global/override/.m.rule.tombstone", - "conditions": [ + actions=["notify", {"set_tweak": "highlight", "value": True}], + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.tombstone", + conditions=[ { "kind": "event_match", "key": "type", @@ -265,11 +375,13 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_tombstone_statekey", }, ], - "actions": ["notify", {"set_tweak": "highlight", "value": True}], - }, - { - "rule_id": "global/override/.m.rule.reaction", - "conditions": [ + actions=["notify", {"set_tweak": "highlight", "value": True}], + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.reaction", + conditions=[ { "kind": "event_match", "key": "type", @@ -277,14 +389,16 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_reaction", } ], - "actions": ["dont_notify"], - }, + actions=["dont_notify"], + ), # XXX: This is an experimental rule that is only enabled if msc3786_enabled # is enabled, if it is not the rule gets filtered out in _load_rules() in # PushRulesWorkerStore - { - "rule_id": "global/override/.org.matrix.msc3786.rule.room.server_acl", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.org.matrix.msc3786.rule.room.server_acl", + conditions=[ { "kind": "event_match", "key": "type", @@ -298,15 +412,17 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_room_server_acl_state_key", }, ], - "actions": [], - }, + actions=[], + ), ] -BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ - { - "rule_id": "global/underride/.m.rule.call", - "conditions": [ +BASE_APPEND_UNDERRIDE_RULES = [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.call", + conditions=[ { "kind": "event_match", "key": "type", @@ -314,17 +430,19 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_call", } ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "ring"}, {"set_tweak": "highlight", "value": False}, ], - }, + ), # XXX: once m.direct is standardised everywhere, we should use it to detect # a DM from the user's perspective rather than this heuristic. - { - "rule_id": "global/underride/.m.rule.room_one_to_one", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.room_one_to_one", + conditions=[ {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"}, { "kind": "event_match", @@ -333,17 +451,19 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_message", }, ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight", "value": False}, ], - }, + ), # XXX: this is going to fire for events which aren't m.room.messages # but are encrypted (e.g. m.call.*)... - { - "rule_id": "global/underride/.m.rule.encrypted_room_one_to_one", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.encrypted_room_one_to_one", + conditions=[ {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"}, { "kind": "event_match", @@ -352,15 +472,17 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_encrypted", }, ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight", "value": False}, ], - }, - { - "rule_id": "global/underride/.org.matrix.msc3772.thread_reply", - "conditions": [ + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.org.matrix.msc3772.thread_reply", + conditions=[ { "kind": "org.matrix.msc3772.relation_match", "rel_type": "m.thread", @@ -368,11 +490,13 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "sender_type": "user_id", } ], - "actions": ["notify", {"set_tweak": "highlight", "value": False}], - }, - { - "rule_id": "global/underride/.m.rule.message", - "conditions": [ + actions=["notify", {"set_tweak": "highlight", "value": False}], + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.message", + conditions=[ { "kind": "event_match", "key": "type", @@ -380,13 +504,15 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_message", } ], - "actions": ["notify", {"set_tweak": "highlight", "value": False}], - }, + actions=["notify", {"set_tweak": "highlight", "value": False}], + ), # XXX: this is going to fire for events which aren't m.room.messages # but are encrypted (e.g. m.call.*)... - { - "rule_id": "global/underride/.m.rule.encrypted", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.encrypted", + conditions=[ { "kind": "event_match", "key": "type", @@ -394,11 +520,13 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_encrypted", } ], - "actions": ["notify", {"set_tweak": "highlight", "value": False}], - }, - { - "rule_id": "global/underride/.im.vector.jitsi", - "conditions": [ + actions=["notify", {"set_tweak": "highlight", "value": False}], + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.im.vector.jitsi", + conditions=[ { "kind": "event_match", "key": "type", @@ -418,29 +546,27 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_is_state_event", }, ], - "actions": ["notify", {"set_tweak": "highlight", "value": False}], - }, + actions=["notify", {"set_tweak": "highlight", "value": False}], + ), ] BASE_RULE_IDS = set() +BASE_RULES_BY_ID: Dict[str, PushRule] = {} + for r in BASE_APPEND_CONTENT_RULES: - r["priority_class"] = PRIORITY_CLASS_MAP["content"] - r["default"] = True - BASE_RULE_IDS.add(r["rule_id"]) + BASE_RULE_IDS.add(r.rule_id) + BASE_RULES_BY_ID[r.rule_id] = r for r in BASE_PREPEND_OVERRIDE_RULES: - r["priority_class"] = PRIORITY_CLASS_MAP["override"] - r["default"] = True - BASE_RULE_IDS.add(r["rule_id"]) + BASE_RULE_IDS.add(r.rule_id) + BASE_RULES_BY_ID[r.rule_id] = r for r in BASE_APPEND_OVERRIDE_RULES: - r["priority_class"] = PRIORITY_CLASS_MAP["override"] - r["default"] = True - BASE_RULE_IDS.add(r["rule_id"]) + BASE_RULE_IDS.add(r.rule_id) + BASE_RULES_BY_ID[r.rule_id] = r for r in BASE_APPEND_UNDERRIDE_RULES: - r["priority_class"] = PRIORITY_CLASS_MAP["underride"] - r["default"] = True - BASE_RULE_IDS.add(r["rule_id"]) + BASE_RULE_IDS.add(r.rule_id) + BASE_RULES_BY_ID[r.rule_id] = r diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 713dcf6950..ccd512be54 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -15,7 +15,18 @@ import itertools import logging -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Set, + Tuple, + Union, +) from prometheus_client import Counter @@ -30,6 +41,7 @@ from synapse.util.caches import register_cache from synapse.util.metrics import measure_func from synapse.visibility import filter_event_for_clients_with_state +from .baserules import FilteredPushRules, PushRule from .push_rule_evaluator import PushRuleEvaluatorForEvent if TYPE_CHECKING: @@ -112,7 +124,7 @@ class BulkPushRuleEvaluator: async def _get_rules_for_event( self, event: EventBase, - ) -> Dict[str, List[Dict[str, Any]]]: + ) -> Dict[str, FilteredPushRules]: """Get the push rules for all users who may need to be notified about the event. @@ -186,7 +198,7 @@ class BulkPushRuleEvaluator: return pl_event.content if pl_event else {}, sender_level async def _get_mutual_relations( - self, event: EventBase, rules: Iterable[Dict[str, Any]] + self, event: EventBase, rules: Iterable[Tuple[PushRule, bool]] ) -> Dict[str, Set[Tuple[str, str]]]: """ Fetch event metadata for events which related to the same event as the given event. @@ -216,12 +228,11 @@ class BulkPushRuleEvaluator: # Pre-filter to figure out which relation types are interesting. rel_types = set() - for rule in rules: - # Skip disabled rules. - if "enabled" in rule and not rule["enabled"]: + for rule, enabled in rules: + if not enabled: continue - for condition in rule["conditions"]: + for condition in rule.conditions: if condition["kind"] != "org.matrix.msc3772.relation_match": continue @@ -254,7 +265,7 @@ class BulkPushRuleEvaluator: count_as_unread = _should_count_as_unread(event, context) rules_by_user = await self._get_rules_for_event(event) - actions_by_user: Dict[str, List[Union[dict, str]]] = {} + actions_by_user: Dict[str, Collection[Union[Mapping, str]]] = {} room_member_count = await self.store.get_number_joined_users_in_room( event.room_id @@ -317,15 +328,13 @@ class BulkPushRuleEvaluator: # current user, it'll be added to the dict later. actions_by_user[uid] = [] - for rule in rules: - if "enabled" in rule and not rule["enabled"]: + for rule, enabled in rules: + if not enabled: continue - matches = evaluator.check_conditions( - rule["conditions"], uid, display_name - ) + matches = evaluator.check_conditions(rule.conditions, uid, display_name) if matches: - actions = [x for x in rule["actions"] if x != "dont_notify"] + actions = [x for x in rule.actions if x != "dont_notify"] if actions and "notify" in actions: # Push rules say we should notify the user of this event actions_by_user[uid] = actions diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index 5117ef6854..73618d9234 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -18,16 +18,15 @@ from typing import Any, Dict, List, Optional from synapse.push.rulekinds import PRIORITY_CLASS_INVERSE_MAP, PRIORITY_CLASS_MAP from synapse.types import UserID +from .baserules import FilteredPushRules, PushRule + def format_push_rules_for_user( - user: UserID, ruleslist: List + user: UserID, ruleslist: FilteredPushRules ) -> Dict[str, Dict[str, list]]: """Converts a list of rawrules and a enabled map into nested dictionaries to match the Matrix client-server format for push rules""" - # We're going to be mutating this a lot, so do a deep copy - ruleslist = copy.deepcopy(ruleslist) - rules: Dict[str, Dict[str, List[Dict[str, Any]]]] = { "global": {}, "device": {}, @@ -35,11 +34,30 @@ def format_push_rules_for_user( rules["global"] = _add_empty_priority_class_arrays(rules["global"]) - for r in ruleslist: - template_name = _priority_class_to_template_name(r["priority_class"]) + for r, enabled in ruleslist: + template_name = _priority_class_to_template_name(r.priority_class) + + rulearray = rules["global"][template_name] + + template_rule = _rule_to_template(r) + if not template_rule: + continue + + rulearray.append(template_rule) + + template_rule["enabled"] = enabled + + if "conditions" not in template_rule: + # Not all formatted rules have explicit conditions, e.g. "room" + # rules omit them as they can be derived from the kind and rule ID. + # + # If the formatted rule has no conditions then we can skip the + # formatting of conditions. + continue # Remove internal stuff. - for c in r["conditions"]: + template_rule["conditions"] = copy.deepcopy(template_rule["conditions"]) + for c in template_rule["conditions"]: c.pop("_cache_key", None) pattern_type = c.pop("pattern_type", None) @@ -52,16 +70,6 @@ def format_push_rules_for_user( if sender_type == "user_id": c["sender"] = user.to_string() - rulearray = rules["global"][template_name] - - template_rule = _rule_to_template(r) - if template_rule: - if "enabled" in r: - template_rule["enabled"] = r["enabled"] - else: - template_rule["enabled"] = True - rulearray.append(template_rule) - return rules @@ -71,24 +79,24 @@ def _add_empty_priority_class_arrays(d: Dict[str, list]) -> Dict[str, list]: return d -def _rule_to_template(rule: Dict[str, Any]) -> Optional[Dict[str, Any]]: - unscoped_rule_id = None - if "rule_id" in rule: - unscoped_rule_id = _rule_id_from_namespaced(rule["rule_id"]) +def _rule_to_template(rule: PushRule) -> Optional[Dict[str, Any]]: + templaterule: Dict[str, Any] + + unscoped_rule_id = _rule_id_from_namespaced(rule.rule_id) - template_name = _priority_class_to_template_name(rule["priority_class"]) + template_name = _priority_class_to_template_name(rule.priority_class) if template_name in ["override", "underride"]: - templaterule = {k: rule[k] for k in ["conditions", "actions"]} + templaterule = {"conditions": rule.conditions, "actions": rule.actions} elif template_name in ["sender", "room"]: - templaterule = {"actions": rule["actions"]} - unscoped_rule_id = rule["conditions"][0]["pattern"] + templaterule = {"actions": rule.actions} + unscoped_rule_id = rule.conditions[0]["pattern"] elif template_name == "content": - if len(rule["conditions"]) != 1: + if len(rule.conditions) != 1: return None - thecond = rule["conditions"][0] + thecond = rule.conditions[0] if "pattern" not in thecond: return None - templaterule = {"actions": rule["actions"]} + templaterule = {"actions": rule.actions} templaterule["pattern"] = thecond["pattern"] else: # This should not be reached unless this function is not kept in sync @@ -97,8 +105,8 @@ def _rule_to_template(rule: Dict[str, Any]) -> Optional[Dict[str, Any]]: if unscoped_rule_id: templaterule["rule_id"] = unscoped_rule_id - if "default" in rule: - templaterule["default"] = rule["default"] + if rule.default: + templaterule["default"] = True return templaterule diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 2e8a017add..3c5632cd91 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -15,7 +15,18 @@ import logging import re -from typing import Any, Dict, List, Mapping, Optional, Pattern, Set, Tuple, Union +from typing import ( + Any, + Dict, + List, + Mapping, + Optional, + Pattern, + Sequence, + Set, + Tuple, + Union, +) from matrix_common.regex import glob_to_regex, to_word_pattern @@ -32,14 +43,14 @@ INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$") def _room_member_count( - ev: EventBase, condition: Dict[str, Any], room_member_count: int + ev: EventBase, condition: Mapping[str, Any], room_member_count: int ) -> bool: return _test_ineq_condition(condition, room_member_count) def _sender_notification_permission( ev: EventBase, - condition: Dict[str, Any], + condition: Mapping[str, Any], sender_power_level: int, power_levels: Dict[str, Union[int, Dict[str, int]]], ) -> bool: @@ -54,7 +65,7 @@ def _sender_notification_permission( return sender_power_level >= room_notif_level -def _test_ineq_condition(condition: Dict[str, Any], number: int) -> bool: +def _test_ineq_condition(condition: Mapping[str, Any], number: int) -> bool: if "is" not in condition: return False m = INEQUALITY_EXPR.match(condition["is"]) @@ -137,7 +148,7 @@ class PushRuleEvaluatorForEvent: self._condition_cache: Dict[str, bool] = {} def check_conditions( - self, conditions: List[dict], uid: str, display_name: Optional[str] + self, conditions: Sequence[Mapping], uid: str, display_name: Optional[str] ) -> bool: """ Returns true if a user's conditions/user ID/display name match the event. @@ -169,7 +180,7 @@ class PushRuleEvaluatorForEvent: return True def matches( - self, condition: Dict[str, Any], user_id: str, display_name: Optional[str] + self, condition: Mapping[str, Any], user_id: str, display_name: Optional[str] ) -> bool: """ Returns true if a user's condition/user ID/display name match the event. @@ -204,7 +215,7 @@ class PushRuleEvaluatorForEvent: # endpoint with an unknown kind, see _rule_tuple_from_request_object. return True - def _event_match(self, condition: dict, user_id: str) -> bool: + def _event_match(self, condition: Mapping, user_id: str) -> bool: """ Check an "event_match" push rule condition. @@ -269,7 +280,7 @@ class PushRuleEvaluatorForEvent: return bool(r.search(body)) - def _relation_match(self, condition: dict, user_id: str) -> bool: + def _relation_match(self, condition: Mapping, user_id: str) -> bool: """ Check an "relation_match" push rule condition. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index f62aa45ca1..eabf9c9739 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -74,7 +74,17 @@ receipt. """ import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union, cast +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + List, + Mapping, + Optional, + Tuple, + Union, + cast, +) import attr @@ -154,7 +164,9 @@ class NotifCounts: highlight_count: int = 0 -def _serialize_action(actions: List[Union[dict, str]], is_highlight: bool) -> str: +def _serialize_action( + actions: Collection[Union[Mapping, str]], is_highlight: bool +) -> str: """Custom serializer for actions. This allows us to "compress" common actions. We use the fact that most users have the same actions for notifs (and for @@ -750,7 +762,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas async def add_push_actions_to_staging( self, event_id: str, - user_id_actions: Dict[str, List[Union[dict, str]]], + user_id_actions: Dict[str, Collection[Union[Mapping, str]]], count_as_unread: bool, ) -> None: """Add the push actions for the event to the push action staging area. @@ -767,7 +779,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # This is a helper function for generating the necessary tuple that # can be used to insert into the `event_push_actions_staging` table. def _gen_entry( - user_id: str, actions: List[Union[dict, str]] + user_id: str, actions: Collection[Union[Mapping, str]] ) -> Tuple[str, str, str, int, int, int]: is_highlight = 1 if _action_has_highlight(actions) else 0 notif = 1 if "notify" in actions else 0 @@ -1410,7 +1422,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): ] -def _action_has_highlight(actions: List[Union[dict, str]]) -> bool: +def _action_has_highlight(actions: Collection[Union[Mapping, str]]) -> bool: for action in actions: if not isinstance(action, dict): continue diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 768f95d16c..255620f996 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -14,11 +14,23 @@ # limitations under the License. import abc import logging -from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Tuple, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) from synapse.api.errors import StoreError from synapse.config.homeserver import ExperimentalConfig -from synapse.push.baserules import list_with_base_rules +from synapse.push.baserules import FilteredPushRules, PushRule, compile_push_rules from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import ( @@ -50,60 +62,30 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -def _is_experimental_rule_enabled( - rule_id: str, experimental_config: ExperimentalConfig -) -> bool: - """Used by `_load_rules` to filter out experimental rules when they - have not been enabled. - """ - if ( - rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl" - and not experimental_config.msc3786_enabled - ): - return False - if ( - rule_id == "global/underride/.org.matrix.msc3772.thread_reply" - and not experimental_config.msc3772_enabled - ): - return False - return True - - def _load_rules( rawrules: List[JsonDict], enabled_map: Dict[str, bool], experimental_config: ExperimentalConfig, -) -> List[JsonDict]: - ruleslist = [] - for rawrule in rawrules: - rule = dict(rawrule) - rule["conditions"] = db_to_json(rawrule["conditions"]) - rule["actions"] = db_to_json(rawrule["actions"]) - rule["default"] = False - ruleslist.append(rule) - - # We're going to be mutating this a lot, so copy it. We also filter out - # any experimental default push rules that aren't enabled. - rules = [ - rule - for rule in list_with_base_rules(ruleslist) - if _is_experimental_rule_enabled(rule["rule_id"], experimental_config) - ] +) -> FilteredPushRules: + """Take the DB rows returned from the DB and convert them into a full + `FilteredPushRules` object. + """ - for i, rule in enumerate(rules): - rule_id = rule["rule_id"] + ruleslist = [ + PushRule( + rule_id=rawrule["rule_id"], + priority_class=rawrule["priority_class"], + conditions=db_to_json(rawrule["conditions"]), + actions=db_to_json(rawrule["actions"]), + ) + for rawrule in rawrules + ] - if rule_id not in enabled_map: - continue - if rule.get("enabled", True) == bool(enabled_map[rule_id]): - continue + push_rules = compile_push_rules(ruleslist) - # Rules are cached across users. - rule = dict(rule) - rule["enabled"] = bool(enabled_map[rule_id]) - rules[i] = rule + filtered_rules = FilteredPushRules(push_rules, enabled_map, experimental_config) - return rules + return filtered_rules # The ABCMeta metaclass ensures that it cannot be instantiated without @@ -162,7 +144,7 @@ class PushRulesWorkerStore( raise NotImplementedError() @cached(max_entries=5000) - async def get_push_rules_for_user(self, user_id: str) -> List[JsonDict]: + async def get_push_rules_for_user(self, user_id: str) -> FilteredPushRules: rows = await self.db_pool.simple_select_list( table="push_rules", keyvalues={"user_name": user_id}, @@ -216,11 +198,11 @@ class PushRulesWorkerStore( @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids") async def bulk_get_push_rules( self, user_ids: Collection[str] - ) -> Dict[str, List[JsonDict]]: + ) -> Dict[str, FilteredPushRules]: if not user_ids: return {} - results: Dict[str, List[JsonDict]] = {user_id: [] for user_id in user_ids} + raw_rules: Dict[str, List[JsonDict]] = {user_id: [] for user_id in user_ids} rows = await self.db_pool.simple_select_many_batch( table="push_rules", @@ -234,11 +216,13 @@ class PushRulesWorkerStore( rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"]))) for row in rows: - results.setdefault(row["user_name"], []).append(row) + raw_rules.setdefault(row["user_name"], []).append(row) enabled_map_by_user = await self.bulk_get_push_rules_enabled(user_ids) - for user_id, rules in results.items(): + results: Dict[str, FilteredPushRules] = {} + + for user_id, rules in raw_rules.items(): results[user_id] = _load_rules( rules, enabled_map_by_user.get(user_id, {}), self.hs.config.experimental ) @@ -345,8 +329,8 @@ class PushRuleStore(PushRulesWorkerStore): user_id: str, rule_id: str, priority_class: int, - conditions: List[Dict[str, str]], - actions: List[Union[JsonDict, str]], + conditions: Sequence[Mapping[str, str]], + actions: Sequence[Union[Mapping[str, Any], str]], before: Optional[str] = None, after: Optional[str] = None, ) -> None: @@ -817,7 +801,7 @@ class PushRuleStore(PushRulesWorkerStore): return self._push_rules_stream_id_gen.get_current_token() async def copy_push_rule_from_room_to_room( - self, new_room_id: str, user_id: str, rule: dict + self, new_room_id: str, user_id: str, rule: PushRule ) -> None: """Copy a single push rule from one room to another for a specific user. @@ -827,21 +811,27 @@ class PushRuleStore(PushRulesWorkerStore): rule: A push rule. """ # Create new rule id - rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1]) + rule_id_scope = "/".join(rule.rule_id.split("/")[:-1]) new_rule_id = rule_id_scope + "/" + new_room_id + new_conditions = [] + # Change room id in each condition - for condition in rule.get("conditions", []): + for condition in rule.conditions: + new_condition = condition if condition.get("key") == "room_id": - condition["pattern"] = new_room_id + new_condition = dict(condition) + new_condition["pattern"] = new_room_id + + new_conditions.append(new_condition) # Add the rule for the new room await self.add_push_rule( user_id=user_id, rule_id=new_rule_id, - priority_class=rule["priority_class"], - conditions=rule["conditions"], - actions=rule["actions"], + priority_class=rule.priority_class, + conditions=new_conditions, + actions=rule.actions, ) async def copy_push_rules_from_room_to_room_for_user( @@ -859,8 +849,11 @@ class PushRuleStore(PushRulesWorkerStore): user_push_rules = await self.get_push_rules_for_user(user_id) # Get rules relating to the old room and copy them to the new room - for rule in user_push_rules: - conditions = rule.get("conditions", []) + for rule, enabled in user_push_rules: + if not enabled: + continue + + conditions = rule.conditions if any( (c.get("key") == "room_id" and c.get("pattern") == old_room_id) for c in conditions diff --git a/tests/handlers/test_deactivate_account.py b/tests/handlers/test_deactivate_account.py index ff9f2e8edb..82baa8f154 100644 --- a/tests/handlers/test_deactivate_account.py +++ b/tests/handlers/test_deactivate_account.py @@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import AccountDataTypes +from synapse.push.baserules import PushRule from synapse.push.rulekinds import PRIORITY_CLASS_MAP from synapse.rest import admin from synapse.rest.client import account, login @@ -130,12 +130,12 @@ class DeactivateAccountTestCase(HomeserverTestCase): ), ) - def _is_custom_rule(self, push_rule: Dict[str, Any]) -> bool: + def _is_custom_rule(self, push_rule: PushRule) -> bool: """ Default rules start with a dot: such as .m.rule and .im.vector. This function returns true iff a rule is custom (not default). """ - return "/." not in push_rule["rule_id"] + return "/." not in push_rule.rule_id def test_push_rules_deleted_upon_account_deactivation(self) -> None: """ @@ -157,22 +157,21 @@ class DeactivateAccountTestCase(HomeserverTestCase): ) # Test the rule exists - push_rules = self.get_success(self._store.get_push_rules_for_user(self.user)) + filtered_push_rules = self.get_success( + self._store.get_push_rules_for_user(self.user) + ) # Filter out default rules; we don't care - push_rules = list(filter(self._is_custom_rule, push_rules)) + push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)] # Check our rule made it self.assertEqual( push_rules, [ - { - "user_name": "@user:test", - "rule_id": "personal.override.rule1", - "priority_class": 5, - "priority": 0, - "conditions": [], - "actions": [], - "default": False, - } + PushRule( + rule_id="personal.override.rule1", + priority_class=5, + conditions=[], + actions=[], + ) ], push_rules, ) @@ -180,9 +179,11 @@ class DeactivateAccountTestCase(HomeserverTestCase): # Request the deactivation of our account self._deactivate_my_account() - push_rules = self.get_success(self._store.get_push_rules_for_user(self.user)) + filtered_push_rules = self.get_success( + self._store.get_push_rules_for_user(self.user) + ) # Filter out default rules; we don't care - push_rules = list(filter(self._is_custom_rule, push_rules)) + push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)] # Check our rule no longer exists self.assertEqual(push_rules, [], push_rules) -- cgit 1.5.1 From c3516e9decc355b75a297d72a13b98a43d312e66 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 16 Aug 2022 12:16:56 +0000 Subject: Faster room joins: make `/joined_members` block whilst the room is partial stated. (#13514) --- changelog.d/13514.bugfix | 1 + synapse/handlers/message.py | 6 +++++- synapse/storage/controllers/state.py | 13 +++++++++++++ synapse/storage/databases/main/roommember.py | 3 +++ 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13514.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/13514.bugfix b/changelog.d/13514.bugfix new file mode 100644 index 0000000000..7498af0e47 --- /dev/null +++ b/changelog.d/13514.bugfix @@ -0,0 +1 @@ +Faster room joins: make `/joined_members` block whilst the room is partial stated. \ No newline at end of file diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6b03603598..8f29ee9a87 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -331,7 +331,11 @@ class MessageHandler: msg="Getting joined members while not being a current member of the room is forbidden.", ) - users_with_profile = await self.store.get_users_in_room_with_profiles(room_id) + users_with_profile = ( + await self._state_storage_controller.get_users_in_room_with_profiles( + room_id + ) + ) # If this is an AS, double check that they are allowed to see the members. # This can either be because the AS user is in the room or because there diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 0d480f1014..0c78eb735e 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -30,6 +30,7 @@ from typing import ( from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.logging.opentracing import trace +from synapse.storage.roommember import ProfileInfo from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( PartialCurrentStateTracker, @@ -506,3 +507,15 @@ class StateStorageController: await self._partial_state_room_tracker.await_full_state(room_id) return await self.stores.main.get_current_hosts_in_room(room_id) + + async def get_users_in_room_with_profiles( + self, room_id: str + ) -> Dict[str, ProfileInfo]: + """ + Get the current users in the room with their profiles. + If the room is currently partial-stated, this will block until the room has + full state. + """ + await self._partial_state_room_tracker.await_full_state(room_id) + + return await self.stores.main.get_users_in_room_with_profiles(room_id) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 93ff4816c8..5e5f607a14 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -283,6 +283,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): Returns: A mapping from user ID to ProfileInfo. + + Preconditions: + - There is full state available for the room (it is not partial-stated). """ def _get_users_in_room_with_profiles( -- cgit 1.5.1 From 0a4efbc1ddc3a58a6d75ad5d4d960b9ed367481e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Aug 2022 12:39:40 -0500 Subject: Instrument the federation/backfill part of `/messages` (#13489) Instrument the federation/backfill part of `/messages` so it's easier to follow what's going on in Jaeger when viewing a trace. Split out from https://github.com/matrix-org/synapse/pull/13440 Follow-up from https://github.com/matrix-org/synapse/pull/13368 Part of https://github.com/matrix-org/synapse/issues/13356 --- changelog.d/13489.misc | 1 + synapse/federation/federation_client.py | 27 ++++- synapse/handlers/federation.py | 10 +- synapse/handlers/federation_event.py | 112 ++++++++++++++++++--- synapse/logging/opentracing.py | 19 +++- synapse/storage/controllers/persist_events.py | 30 ++++-- synapse/storage/controllers/state.py | 5 +- synapse/storage/databases/main/event_federation.py | 6 ++ synapse/storage/databases/main/events.py | 2 + synapse/storage/databases/main/events_worker.py | 38 +++++-- .../storage/util/partial_state_events_tracker.py | 3 + 11 files changed, 220 insertions(+), 33 deletions(-) create mode 100644 changelog.d/13489.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/13489.misc b/changelog.d/13489.misc new file mode 100644 index 0000000000..5e4853860e --- /dev/null +++ b/changelog.d/13489.misc @@ -0,0 +1 @@ +Instrument the federation/backfill part of `/messages` for understandable traces in Jaeger. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 54ffbd8170..987f6dad46 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -61,7 +61,7 @@ from synapse.federation.federation_base import ( ) from synapse.federation.transport.client import SendJoinResponse from synapse.http.types import QueryParams -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache @@ -235,6 +235,7 @@ class FederationClient(FederationBase): ) @trace + @tag_args async def backfill( self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> Optional[List[EventBase]]: @@ -337,6 +338,8 @@ class FederationClient(FederationBase): return None + @trace + @tag_args async def get_pdu( self, destinations: Iterable[str], @@ -448,6 +451,8 @@ class FederationClient(FederationBase): return event_copy + @trace + @tag_args async def get_room_state_ids( self, destination: str, room_id: str, event_id: str ) -> Tuple[List[str], List[str]]: @@ -467,6 +472,23 @@ class FederationClient(FederationBase): state_event_ids = result["pdu_ids"] auth_event_ids = result.get("auth_chain_ids", []) + set_tag( + SynapseTags.RESULT_PREFIX + "state_event_ids", + str(state_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "state_event_ids.length", + str(len(state_event_ids)), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "auth_event_ids", + str(auth_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "auth_event_ids.length", + str(len(auth_event_ids)), + ) + if not isinstance(state_event_ids, list) or not isinstance( auth_event_ids, list ): @@ -474,6 +496,8 @@ class FederationClient(FederationBase): return state_event_ids, auth_event_ids + @trace + @tag_args async def get_room_state( self, destination: str, @@ -533,6 +557,7 @@ class FederationClient(FederationBase): return valid_state_events, valid_auth_events + @trace async def _check_sigs_and_hash_and_fetch( self, origin: str, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6f5ab86ac4..d13011d138 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,7 +59,7 @@ from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import tag_args, trace +from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -370,6 +370,14 @@ class FederationHandler: logger.debug( "_maybe_backfill_inner: extremities_to_request %s", extremities_to_request ) + set_tag( + SynapseTags.RESULT_PREFIX + "extremities_to_request", + str(extremities_to_request), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "extremities_to_request.length", + str(len(extremities_to_request)), + ) # Now we need to decide which hosts to hit first. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 8968b705d4..dd0d610fe9 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -59,7 +59,13 @@ from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.federation.federation_client import InvalidResponseError from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import ( + SynapseTags, + set_tag, + start_active_span, + tag_args, + trace, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet from synapse.replication.http.federation import ( @@ -410,6 +416,7 @@ class FederationEventHandler: prev_member_event, ) + @trace async def process_remote_join( self, origin: str, @@ -715,7 +722,7 @@ class FederationEventHandler: @trace async def _process_pulled_events( - self, origin: str, events: Iterable[EventBase], backfilled: bool + self, origin: str, events: Collection[EventBase], backfilled: bool ) -> None: """Process a batch of events we have pulled from a remote server @@ -730,6 +737,15 @@ class FederationEventHandler: backfilled: True if this is part of a historical batch of events (inhibits notification to clients, and validation of device keys.) """ + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str([event.event_id for event in events]), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(events)), + ) + set_tag(SynapseTags.FUNC_ARG_PREFIX + "backfilled", str(backfilled)) logger.debug( "processing pulled backfilled=%s events=%s", backfilled, @@ -753,6 +769,7 @@ class FederationEventHandler: await self._process_pulled_event(origin, ev, backfilled=backfilled) @trace + @tag_args async def _process_pulled_event( self, origin: str, event: EventBase, backfilled: bool ) -> None: @@ -854,6 +871,7 @@ class FederationEventHandler: else: raise + @trace async def _compute_event_context_with_maybe_missing_prevs( self, dest: str, event: EventBase ) -> EventContext: @@ -970,6 +988,8 @@ class FederationEventHandler: event, state_ids_before_event=state_map, partial_state=partial_state ) + @trace + @tag_args async def _get_state_ids_after_missing_prev_event( self, destination: str, @@ -1009,10 +1029,10 @@ class FederationEventHandler: logger.debug("Fetching %i events from cache/store", len(desired_events)) have_events = await self._store.have_seen_events(room_id, desired_events) - missing_desired_events = desired_events - have_events + missing_desired_event_ids = desired_events - have_events logger.debug( "We are missing %i events (got %i)", - len(missing_desired_events), + len(missing_desired_event_ids), len(have_events), ) @@ -1024,13 +1044,30 @@ class FederationEventHandler: # already have a bunch of the state events. It would be nice if the # federation api gave us a way of finding out which we actually need. - missing_auth_events = set(auth_event_ids) - have_events - missing_auth_events.difference_update( - await self._store.have_seen_events(room_id, missing_auth_events) + missing_auth_event_ids = set(auth_event_ids) - have_events + missing_auth_event_ids.difference_update( + await self._store.have_seen_events(room_id, missing_auth_event_ids) ) - logger.debug("We are also missing %i auth events", len(missing_auth_events)) + logger.debug("We are also missing %i auth events", len(missing_auth_event_ids)) - missing_events = missing_desired_events | missing_auth_events + missing_event_ids = missing_desired_event_ids | missing_auth_event_ids + + set_tag( + SynapseTags.RESULT_PREFIX + "missing_auth_event_ids", + str(missing_auth_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_auth_event_ids.length", + str(len(missing_auth_event_ids)), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_desired_event_ids", + str(missing_desired_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_desired_event_ids.length", + str(len(missing_desired_event_ids)), + ) # Making an individual request for each of 1000s of events has a lot of # overhead. On the other hand, we don't really want to fetch all of the events @@ -1041,13 +1078,13 @@ class FederationEventHandler: # # TODO: might it be better to have an API which lets us do an aggregate event # request - if (len(missing_events) * 10) >= len(auth_event_ids) + len(state_event_ids): + if (len(missing_event_ids) * 10) >= len(auth_event_ids) + len(state_event_ids): logger.debug("Requesting complete state from remote") await self._get_state_and_persist(destination, room_id, event_id) else: - logger.debug("Fetching %i events from remote", len(missing_events)) + logger.debug("Fetching %i events from remote", len(missing_event_ids)) await self._get_events_and_persist( - destination=destination, room_id=room_id, event_ids=missing_events + destination=destination, room_id=room_id, event_ids=missing_event_ids ) # We now need to fill out the state map, which involves fetching the @@ -1104,6 +1141,14 @@ class FederationEventHandler: event_id, failed_to_fetch, ) + set_tag( + SynapseTags.RESULT_PREFIX + "failed_to_fetch", + str(failed_to_fetch), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "failed_to_fetch.length", + str(len(failed_to_fetch)), + ) if remote_event.is_state() and remote_event.rejected_reason is None: state_map[ @@ -1112,6 +1157,8 @@ class FederationEventHandler: return state_map + @trace + @tag_args async def _get_state_and_persist( self, destination: str, room_id: str, event_id: str ) -> None: @@ -1133,6 +1180,7 @@ class FederationEventHandler: destination=destination, room_id=room_id, event_ids=(event_id,) ) + @trace async def _process_received_pdu( self, origin: str, @@ -1283,6 +1331,7 @@ class FederationEventHandler: except Exception: logger.exception("Failed to resync device for %s", sender) + @trace async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> None: """Handles backfilling the insertion event when we receive a marker event that points to one. @@ -1414,6 +1463,8 @@ class FederationEventHandler: return event_from_response + @trace + @tag_args async def _get_events_and_persist( self, destination: str, room_id: str, event_ids: Collection[str] ) -> None: @@ -1459,6 +1510,7 @@ class FederationEventHandler: logger.info("Fetched %i events of %i requested", len(events), len(event_ids)) await self._auth_and_persist_outliers(room_id, events) + @trace async def _auth_and_persist_outliers( self, room_id: str, events: Iterable[EventBase] ) -> None: @@ -1477,6 +1529,16 @@ class FederationEventHandler: """ event_map = {event.event_id: event for event in events} + event_ids = event_map.keys() + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str(event_ids), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(event_ids)), + ) + # filter out any events we have already seen. This might happen because # the events were eagerly pushed to us (eg, during a room join), or because # another thread has raced against us since we decided to request the event. @@ -1593,6 +1655,7 @@ class FederationEventHandler: backfilled=True, ) + @trace async def _check_event_auth( self, origin: Optional[str], event: EventBase, context: EventContext ) -> None: @@ -1631,6 +1694,14 @@ class FederationEventHandler: claimed_auth_events = await self._load_or_fetch_auth_events_for_event( origin, event ) + set_tag( + SynapseTags.RESULT_PREFIX + "claimed_auth_events", + str([ev.event_id for ev in claimed_auth_events]), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "claimed_auth_events.length", + str(len(claimed_auth_events)), + ) # ... and check that the event passes auth at those auth events. # https://spec.matrix.org/v1.3/server-server-api/#checks-performed-on-receipt-of-a-pdu: @@ -1728,6 +1799,7 @@ class FederationEventHandler: ) context.rejected = RejectedReason.AUTH_ERROR + @trace async def _maybe_kick_guest_users(self, event: EventBase) -> None: if event.type != EventTypes.GuestAccess: return @@ -1935,6 +2007,8 @@ class FederationEventHandler: # instead we raise an AuthError, which will make the caller ignore it. raise AuthError(code=HTTPStatus.FORBIDDEN, msg="Auth events could not be found") + @trace + @tag_args async def _get_remote_auth_chain_for_event( self, destination: str, room_id: str, event_id: str ) -> None: @@ -1963,6 +2037,7 @@ class FederationEventHandler: await self._auth_and_persist_outliers(room_id, remote_auth_events) + @trace async def _run_push_actions_and_persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False ) -> None: @@ -2071,8 +2146,17 @@ class FederationEventHandler: self._message_handler.maybe_schedule_expiry(event) if not backfilled: # Never notify for backfilled events - for event in events: - await self._notify_persisted_event(event, max_stream_token) + with start_active_span("notify_persisted_events"): + set_tag( + SynapseTags.RESULT_PREFIX + "event_ids", + str([ev.event_id for ev in events]), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "event_ids.length", + str(len(events)), + ) + for event in events: + await self._notify_persisted_event(event, max_stream_token) return max_stream_token.stream diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index d1fa2cf8ae..482316a1ff 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -310,6 +310,19 @@ class SynapseTags: # The name of the external cache CACHE_NAME = "cache.name" + # Used to tag function arguments + # + # Tag a named arg. The name of the argument should be appended to this prefix. + FUNC_ARG_PREFIX = "ARG." + # Tag extra variadic number of positional arguments (`def foo(first, second, *extras)`) + FUNC_ARGS = "args" + # Tag keyword args + FUNC_KWARGS = "kwargs" + + # Some intermediate result that's interesting to the function. The label for + # the result should be appended to this prefix. + RESULT_PREFIX = "RESULT." + class SynapseBaggage: FORCE_TRACING = "synapse-force-tracing" @@ -967,9 +980,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]: # first argument only if it's named `self` or `cls`. This isn't fool-proof # but handles the idiomatic cases. for i, arg in enumerate(args[1:], start=1): # type: ignore[index] - set_tag("ARG_" + argspec.args[i], str(arg)) - set_tag("args", str(args[len(argspec.args) :])) # type: ignore[index] - set_tag("kwargs", str(kwargs)) + set_tag(SynapseTags.FUNC_ARG_PREFIX + argspec.args[i], str(arg)) + set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :])) # type: ignore[index] + set_tag(SynapseTags.FUNC_KWARGS, str(kwargs)) yield return _custom_sync_async_decorator(func, _wrapping_logic) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index cf98b0ab48..dad3731b9b 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -45,8 +45,14 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext -from synapse.logging import opentracing from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable +from synapse.logging.opentracing import ( + SynapseTags, + active_span, + set_tag, + start_active_span_follows_from, + trace, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases @@ -223,7 +229,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): queue.append(end_item) # also add our active opentracing span to the item so that we get a link back - span = opentracing.active_span() + span = active_span() if span: end_item.parent_opentracing_span_contexts.append(span.context) @@ -234,7 +240,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): res = await make_deferred_yieldable(end_item.deferred.observe()) # add another opentracing span which links to the persist trace. - with opentracing.start_active_span_follows_from( + with start_active_span_follows_from( f"{task.name}_complete", (end_item.opentracing_span_context,) ): pass @@ -266,7 +272,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): queue = self._get_drainining_queue(room_id) for item in queue: try: - with opentracing.start_active_span_follows_from( + with start_active_span_follows_from( item.task.name, item.parent_opentracing_span_contexts, inherit_force_tracing=True, @@ -355,7 +361,7 @@ class EventsPersistenceStorageController: f"Found an unexpected task type in event persistence queue: {task}" ) - @opentracing.trace + @trace async def persist_events( self, events_and_contexts: Iterable[Tuple[EventBase, EventContext]], @@ -380,9 +386,21 @@ class EventsPersistenceStorageController: PartialStateConflictError: if attempting to persist a partial state event in a room that has been un-partial stated. """ + event_ids: List[str] = [] partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} for event, ctx in events_and_contexts: partitioned.setdefault(event.room_id, []).append((event, ctx)) + event_ids.append(event.event_id) + + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str(event_ids), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(event_ids)), + ) + set_tag(SynapseTags.FUNC_ARG_PREFIX + "backfilled", str(backfilled)) async def enqueue( item: Tuple[str, List[Tuple[EventBase, EventContext]]] @@ -418,7 +436,7 @@ class EventsPersistenceStorageController: self.main_store.get_room_max_token(), ) - @opentracing.trace + @trace async def persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False ) -> Tuple[EventBase, PersistedEventPosition, RoomStreamToken]: diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 0c78eb735e..1ad002f57b 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -29,7 +29,7 @@ from typing import ( from synapse.api.constants import EventTypes from synapse.events import EventBase -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import tag_args, trace from synapse.storage.roommember import ProfileInfo from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( @@ -229,6 +229,7 @@ class StateStorageController: return {event: event_to_state[event] for event in event_ids} @trace + @tag_args async def get_state_ids_for_events( self, event_ids: Collection[str], @@ -333,6 +334,7 @@ class StateStorageController: ) @trace + @tag_args async def get_state_group_for_events( self, event_ids: Collection[str], @@ -474,6 +476,7 @@ class StateStorageController: prev_stream_id, max_stream_id ) + @trace async def get_current_state( self, room_id: str, state_filter: Optional[StateFilter] = None ) -> StateMap[EventBase]: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 0bc8401f2b..c836078da6 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -712,6 +712,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # Return all events where not all sets can reach them. return {eid for eid, n in event_to_missing_sets.items() if n} + @trace + @tag_args async def get_oldest_event_ids_with_depth_in_room( self, room_id: str ) -> List[Tuple[str, int]]: @@ -770,6 +772,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas room_id, ) + @trace async def get_insertion_event_backward_extremities_in_room( self, room_id: str ) -> List[Tuple[str, int]]: @@ -1342,6 +1345,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas event_results.reverse() return event_results + @trace + @tag_args async def get_successor_events(self, event_id: str) -> List[str]: """Fetch all events that have the given event as a prev event @@ -1378,6 +1383,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas _delete_old_forward_extrem_cache_txn, ) + @trace async def insert_insertion_extremity(self, event_id: str, room_id: str) -> None: await self.db_pool.simple_upsert( table="insertion_event_extremities", diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 5560b38a48..a4010ee28d 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -40,6 +40,7 @@ from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext +from synapse.logging.opentracing import trace from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -145,6 +146,7 @@ class PersistEventsStore: self._backfill_id_gen: AbstractStreamIdGenerator = self.store._backfill_id_gen self._stream_id_gen: AbstractStreamIdGenerator = self.store._stream_id_gen + @trace async def _persist_events_and_state_updates( self, events_and_contexts: List[Tuple[EventBase, EventContext]], diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index b07d812ae2..8a7cdb024d 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -54,6 +54,7 @@ from synapse.logging.context import ( current_context, make_deferred_yieldable, ) +from synapse.logging.opentracing import start_active_span, tag_args, trace from synapse.metrics.background_process_metrics import ( run_as_background_process, wrap_as_background_process, @@ -430,6 +431,8 @@ class EventsWorkerStore(SQLBaseStore): return {e.event_id: e for e in events} + @trace + @tag_args async def get_events_as_list( self, event_ids: Collection[str], @@ -1090,23 +1093,42 @@ class EventsWorkerStore(SQLBaseStore): """ fetched_event_ids: Set[str] = set() fetched_events: Dict[str, _EventRow] = {} - events_to_fetch = event_ids - while events_to_fetch: - row_map = await self._enqueue_events(events_to_fetch) + async def _fetch_event_ids_and_get_outstanding_redactions( + event_ids_to_fetch: Collection[str], + ) -> Collection[str]: + """ + Fetch all of the given event_ids and return any associated redaction event_ids + that we still need to fetch in the next iteration. + """ + row_map = await self._enqueue_events(event_ids_to_fetch) # we need to recursively fetch any redactions of those events redaction_ids: Set[str] = set() - for event_id in events_to_fetch: + for event_id in event_ids_to_fetch: row = row_map.get(event_id) fetched_event_ids.add(event_id) if row: fetched_events[event_id] = row redaction_ids.update(row.redactions) - events_to_fetch = redaction_ids.difference(fetched_event_ids) - if events_to_fetch: - logger.debug("Also fetching redaction events %s", events_to_fetch) + event_ids_to_fetch = redaction_ids.difference(fetched_event_ids) + return event_ids_to_fetch + + # Grab the initial list of events requested + event_ids_to_fetch = await _fetch_event_ids_and_get_outstanding_redactions( + event_ids + ) + # Then go and recursively find all of the associated redactions + with start_active_span("recursively fetching redactions"): + while event_ids_to_fetch: + logger.debug("Also fetching redaction events %s", event_ids_to_fetch) + + event_ids_to_fetch = ( + await _fetch_event_ids_and_get_outstanding_redactions( + event_ids_to_fetch + ) + ) # build a map from event_id to EventBase event_map: Dict[str, EventBase] = {} @@ -1424,6 +1446,8 @@ class EventsWorkerStore(SQLBaseStore): return {r["event_id"] for r in rows} + @trace + @tag_args async def have_seen_events( self, room_id: str, event_ids: Iterable[str] ) -> Set[str]: diff --git a/synapse/storage/util/partial_state_events_tracker.py b/synapse/storage/util/partial_state_events_tracker.py index 466e5137f2..b4bf49dace 100644 --- a/synapse/storage/util/partial_state_events_tracker.py +++ b/synapse/storage/util/partial_state_events_tracker.py @@ -20,6 +20,7 @@ from twisted.internet import defer from twisted.internet.defer import Deferred from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable +from synapse.logging.opentracing import trace_with_opname from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.room import RoomWorkerStore from synapse.util import unwrapFirstError @@ -58,6 +59,7 @@ class PartialStateEventsTracker: for o in observers: o.callback(None) + @trace_with_opname("PartialStateEventsTracker.await_full_state") async def await_full_state(self, event_ids: Collection[str]) -> None: """Wait for all the given events to have full state. @@ -151,6 +153,7 @@ class PartialCurrentStateTracker: for o in observers: o.callback(None) + @trace_with_opname("PartialCurrentStateTracker.await_full_state") async def await_full_state(self, room_id: str) -> None: # We add the deferred immediately so that the DB call to check for # partial state doesn't race when we unpartial the room. -- cgit 1.5.1 From d75512d19ebea6c0f9e38e9f55474fdb6da02b46 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 17 Aug 2022 11:42:01 +0200 Subject: Add forgotten status to Room Details API (#13503) --- changelog.d/13503.feature | 1 + docs/admin_api/rooms.md | 5 +- synapse/rest/admin/rooms.py | 1 + synapse/storage/databases/main/roommember.py | 24 ++++++++++ tests/rest/admin/test_room.py | 1 + tests/storage/test_roommember.py | 70 ++++++++++++++++++++++++++++ 6 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13503.feature (limited to 'synapse/storage/databases') diff --git a/changelog.d/13503.feature b/changelog.d/13503.feature new file mode 100644 index 0000000000..4baabd1e32 --- /dev/null +++ b/changelog.d/13503.feature @@ -0,0 +1 @@ +Add forgotten status to Room Details API. \ No newline at end of file diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md index 9aa489e4a3..ac7c54c20e 100644 --- a/docs/admin_api/rooms.md +++ b/docs/admin_api/rooms.md @@ -302,6 +302,8 @@ The following fields are possible in the JSON response body: * `state_events` - Total number of state_events of a room. Complexity of the room. * `room_type` - The type of the room taken from the room's creation event; for example "m.space" if the room is a space. If the room does not define a type, the value will be `null`. +* `forgotten` - Whether all local users have + [forgotten](https://spec.matrix.org/latest/client-server-api/#leaving-rooms) the room. The API is: @@ -330,7 +332,8 @@ A response body like the following is returned: "guest_access": null, "history_visibility": "shared", "state_events": 93534, - "room_type": "m.space" + "room_type": "m.space", + "forgotten": false } ``` diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 9d953d58de..68054ffc28 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -303,6 +303,7 @@ class RoomRestServlet(RestServlet): members = await self.store.get_users_in_room(room_id) ret["joined_local_devices"] = await self.store.count_devices_by_users(members) + ret["forgotten"] = await self.store.is_locally_forgotten_room(room_id) return HTTPStatus.OK, ret diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 5e5f607a14..827c1f1efd 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1215,6 +1215,30 @@ class RoomMemberWorkerStore(EventsWorkerStore): "get_forgotten_rooms_for_user", _get_forgotten_rooms_for_user_txn ) + async def is_locally_forgotten_room(self, room_id: str) -> bool: + """Returns whether all local users have forgotten this room_id. + + Args: + room_id: The room ID to query. + + Returns: + Whether the room is forgotten. + """ + + sql = """ + SELECT count(*) > 0 FROM local_current_membership + INNER JOIN room_memberships USING (room_id, event_id) + WHERE + room_id = ? + AND forgotten = 0; + """ + + rows = await self.db_pool.execute("is_forgotten_room", None, sql, room_id) + + # `count(*)` returns always an integer + # If any rows still exist it means someone has not forgotten this room yet + return not rows[0][0] + async def get_rooms_user_has_been_in(self, user_id: str) -> Set[str]: """Get all rooms that the user has ever been in. diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index dd5000679a..fd6da557c1 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -1633,6 +1633,7 @@ class RoomTestCase(unittest.HomeserverTestCase): self.assertIn("history_visibility", channel.json_body) self.assertIn("state_events", channel.json_body) self.assertIn("room_type", channel.json_body) + self.assertIn("forgotten", channel.json_body) self.assertEqual(room_id_1, channel.json_body["room_id"]) def test_single_room_devices(self) -> None: diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 240b02cb9f..ceec690285 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -23,6 +23,7 @@ from synapse.util import Clock from tests import unittest from tests.server import TestHomeServer +from tests.test_utils import event_injection class RoomMemberStoreTestCase(unittest.HomeserverTestCase): @@ -157,6 +158,75 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): # Check that alice's display name is now None self.assertEqual(row[0]["display_name"], None) + def test_room_is_locally_forgotten(self): + """Test that when the last local user has forgotten a room it is known as forgotten.""" + # join two local and one remote user + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.get_success( + event_injection.inject_member_event(self.hs, self.room, self.u_bob, "join") + ) + self.get_success( + event_injection.inject_member_event( + self.hs, self.room, self.u_charlie.to_string(), "join" + ) + ) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # local users leave the room and the room is not forgotten + self.get_success( + event_injection.inject_member_event( + self.hs, self.room, self.u_alice, "leave" + ) + ) + self.get_success( + event_injection.inject_member_event(self.hs, self.room, self.u_bob, "leave") + ) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # first user forgets the room, room is not forgotten + self.get_success(self.store.forget(self.u_alice, self.room)) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # second (last local) user forgets the room and the room is forgotten + self.get_success(self.store.forget(self.u_bob, self.room)) + self.assertTrue( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + def test_join_locally_forgotten_room(self): + """Tests if a user joins a forgotten room the room is not forgotten anymore.""" + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # after leaving and forget the room, it is forgotten + self.get_success( + event_injection.inject_member_event( + self.hs, self.room, self.u_alice, "leave" + ) + ) + self.get_success(self.store.forget(self.u_alice, self.room)) + self.assertTrue( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # after rejoin the room is not forgotten anymore + self.get_success( + event_injection.inject_member_event( + self.hs, self.room, self.u_alice, "join" + ) + ) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + class CurrentStateMembershipUpdateTestCase(unittest.HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: -- cgit 1.5.1 From 8bdf2bd31ef003f0e89a588d8977d4f689ef6856 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Wed, 17 Aug 2022 18:08:23 +0000 Subject: Fix a bug in the `/event_reports` Admin API which meant that the total count could be larger than the number of results you can actually query for. (#13525) Co-authored-by: Brendan Abolivier --- changelog.d/13525.bugfix | 1 + synapse/storage/databases/main/room.py | 6 ++++++ tests/rest/admin/test_event_reports.py | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 changelog.d/13525.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/13525.bugfix b/changelog.d/13525.bugfix new file mode 100644 index 0000000000..dbd1adbc88 --- /dev/null +++ b/changelog.d/13525.bugfix @@ -0,0 +1 @@ +Fix a bug in the `/event_reports` Admin API which meant that the total count could be larger than the number of results you can actually query for. \ No newline at end of file diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 0f1f0d11ea..b7d4baa6bb 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -2001,9 +2001,15 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" + # We join on room_stats_state despite not using any columns from it + # because the join can influence the number of rows returned; + # e.g. a room that doesn't have state, maybe because it was deleted. + # The query returning the total count should be consistent with + # the query returning the results. sql = """ SELECT COUNT(*) as total_event_reports FROM event_reports AS er + JOIN room_stats_state ON room_stats_state.room_id = er.room_id {} """.format( where_clause diff --git a/tests/rest/admin/test_event_reports.py b/tests/rest/admin/test_event_reports.py index fbc490f46d..8a4e5c3f77 100644 --- a/tests/rest/admin/test_event_reports.py +++ b/tests/rest/admin/test_event_reports.py @@ -410,6 +410,33 @@ class EventReportsTestCase(unittest.HomeserverTestCase): self.assertIn("score", c) self.assertIn("reason", c) + def test_count_correct_despite_table_deletions(self) -> None: + """ + Tests that the count matches the number of rows, even if rows in joined tables + are missing. + """ + + # Delete rows from room_stats_state for one of our rooms. + self.get_success( + self.hs.get_datastores().main.db_pool.simple_delete( + "room_stats_state", {"room_id": self.room_id1}, desc="_" + ) + ) + + channel = self.make_request( + "GET", + self.url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + # The 'total' field is 10 because only 10 reports will actually + # be retrievable since we deleted the rows in the room_stats_state + # table. + self.assertEqual(channel.json_body["total"], 10) + # This is consistent with the number of rows actually returned. + self.assertEqual(len(channel.json_body["event_reports"]), 10) + class EventReportDetailTestCase(unittest.HomeserverTestCase): servlets = [ -- cgit 1.5.1