From 126a15794c95002560709283640ad412636b29b8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 14 Oct 2022 08:30:05 -0400 Subject: Do not allow a None-limit on PaginationConfig. (#14146) The callers either set a default limit or manually handle a None-limit later on (by setting a default value). Update the callers to always instantiate PaginationConfig with a default limit and then assume the limit is non-None. --- synapse/handlers/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/handlers/room.py') diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 57ab05ad25..4e1aacb408 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1646,7 +1646,7 @@ class RoomEventSource(EventSource[RoomStreamToken, EventBase]): self, user: UserID, from_key: RoomStreamToken, - limit: Optional[int], + limit: int, room_ids: Collection[str], is_guest: bool, explicit_room_id: Optional[str] = None, -- cgit 1.5.1 From 847e2393f3198b88809c9b99de5c681efbf1c92e Mon Sep 17 00:00:00 2001 From: Shay Date: Tue, 18 Oct 2022 09:58:47 -0700 Subject: Prepatory work for adding power level event to batched events (#14214) --- changelog.d/14214.misc | 1 + synapse/event_auth.py | 19 ++++++++++++++++++- synapse/handlers/event_auth.py | 18 +++++++++++++----- synapse/handlers/federation.py | 12 +++++------- synapse/handlers/message.py | 10 +++++++++- synapse/handlers/room.py | 4 +--- 6 files changed, 47 insertions(+), 17 deletions(-) create mode 100644 changelog.d/14214.misc (limited to 'synapse/handlers/room.py') diff --git a/changelog.d/14214.misc b/changelog.d/14214.misc new file mode 100644 index 0000000000..102928b575 --- /dev/null +++ b/changelog.d/14214.misc @@ -0,0 +1 @@ +When authenticating batched events, check for auth events in batch as well as DB. diff --git a/synapse/event_auth.py b/synapse/event_auth.py index c7d5ef92fc..bab31e33c5 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -15,7 +15,18 @@ import logging import typing -from typing import Any, Collection, Dict, Iterable, List, Optional, Set, Tuple, Union +from typing import ( + Any, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Set, + Tuple, + Union, +) from canonicaljson import encode_canonical_json from signedjson.key import decode_verify_key_bytes @@ -134,6 +145,7 @@ def validate_event_for_room_version(event: "EventBase") -> None: async def check_state_independent_auth_rules( store: _EventSourceStore, event: "EventBase", + batched_auth_events: Optional[Mapping[str, "EventBase"]] = None, ) -> None: """Check that an event complies with auth rules that are independent of room state @@ -143,6 +155,8 @@ async def check_state_independent_auth_rules( Args: store: the datastore; used to fetch the auth events for validation event: the event being checked. + batched_auth_events: if the event being authed is part of a batch, any events + from the same batch that may be necessary to auth the current event Raises: AuthError if the checks fail @@ -162,6 +176,9 @@ async def check_state_independent_auth_rules( redact_behaviour=EventRedactBehaviour.as_is, allow_rejected=True, ) + if batched_auth_events: + auth_events.update(batched_auth_events) + room_id = event.room_id auth_dict: MutableStateMap[str] = {} expected_auth_types = auth_types_for_event(event.room_version, event) diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index 8249ca1ed2..3bbad0271b 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Collection, List, Optional, Union +from typing import TYPE_CHECKING, Collection, List, Mapping, Optional, Union from synapse import event_auth from synapse.api.constants import ( @@ -29,7 +29,6 @@ from synapse.event_auth import ( ) from synapse.events import EventBase from synapse.events.builder import EventBuilder -from synapse.events.snapshot import EventContext from synapse.types import StateMap, get_domain_from_id if TYPE_CHECKING: @@ -51,12 +50,21 @@ class EventAuthHandler: async def check_auth_rules_from_context( self, event: EventBase, - context: EventContext, + batched_auth_events: Optional[Mapping[str, EventBase]] = None, ) -> None: - """Check an event passes the auth rules at its own auth events""" - await check_state_independent_auth_rules(self._store, event) + """Check an event passes the auth rules at its own auth events + Args: + event: event to be authed + batched_auth_events: if the event being authed is part of a batch, any events + from the same batch that may be necessary to auth the current event + """ + await check_state_independent_auth_rules( + self._store, event, batched_auth_events + ) auth_event_ids = event.auth_event_ids() auth_events_by_id = await self._store.get_events(auth_event_ids) + if batched_auth_events: + auth_events_by_id.update(batched_auth_events) check_state_dependent_auth_rules(event, auth_events_by_id.values()) def compute_auth_events( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index ccc045d36f..275a37a575 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -942,7 +942,7 @@ class FederationHandler: # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_join_request` - await self._event_auth_handler.check_auth_rules_from_context(event, context) + await self._event_auth_handler.check_auth_rules_from_context(event) return event async def on_invite_request( @@ -1123,7 +1123,7 @@ class FederationHandler: try: # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_leave_request` - await self._event_auth_handler.check_auth_rules_from_context(event, context) + await self._event_auth_handler.check_auth_rules_from_context(event) except AuthError as e: logger.warning("Failed to create new leave %r because %s", event, e) raise e @@ -1182,7 +1182,7 @@ class FederationHandler: try: # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_knock_request` - await self._event_auth_handler.check_auth_rules_from_context(event, context) + await self._event_auth_handler.check_auth_rules_from_context(event) except AuthError as e: logger.warning("Failed to create new knock %r because %s", event, e) raise e @@ -1348,9 +1348,7 @@ class FederationHandler: try: validate_event_for_room_version(event) - await self._event_auth_handler.check_auth_rules_from_context( - event, context - ) + await self._event_auth_handler.check_auth_rules_from_context(event) except AuthError as e: logger.warning("Denying new third party invite %r because %s", event, e) raise e @@ -1400,7 +1398,7 @@ class FederationHandler: try: validate_event_for_room_version(event) - await self._event_auth_handler.check_auth_rules_from_context(event, context) + await self._event_auth_handler.check_auth_rules_from_context(event) except AuthError as e: logger.warning("Denying third party invite %r because %s", event, e) raise e diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4e55ebba0b..15b828dd74 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1360,8 +1360,16 @@ class EventCreationHandler: else: try: validate_event_for_room_version(event) + # If we are persisting a batch of events the event(s) needed to auth the + # current event may be part of the batch and will not be in the DB yet + event_id_to_event = {e.event_id: e for e, _ in events_and_context} + batched_auth_events = {} + for event_id in event.auth_event_ids(): + auth_event = event_id_to_event.get(event_id) + if auth_event: + batched_auth_events[event_id] = auth_event await self._event_auth_handler.check_auth_rules_from_context( - event, context + event, batched_auth_events ) except AuthError as err: logger.warning("Denying new event %r because %s", event, err) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 4e1aacb408..638f54051a 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -229,9 +229,7 @@ class RoomCreationHandler: }, ) validate_event_for_room_version(tombstone_event) - await self._event_auth_handler.check_auth_rules_from_context( - tombstone_event, tombstone_context - ) + await self._event_auth_handler.check_auth_rules_from_context(tombstone_event) # Upgrade the room # -- cgit 1.5.1 From b7a7ff6ee39da4981dcfdce61bf8ac4735e3d047 Mon Sep 17 00:00:00 2001 From: Shay Date: Fri, 21 Oct 2022 10:46:22 -0700 Subject: Add initial power level event to batch of bulk persisted events when creating a new room. (#14228) --- changelog.d/14228.misc | 1 + synapse/handlers/federation.py | 4 +- synapse/handlers/federation_event.py | 4 +- synapse/handlers/message.py | 14 ++---- synapse/handlers/room.py | 39 ++++----------- synapse/push/bulk_push_rule_evaluator.py | 74 ++++++++++++++++++++++++----- tests/push/test_bulk_push_rule_evaluator.py | 2 +- tests/replication/_base.py | 2 +- 8 files changed, 82 insertions(+), 58 deletions(-) create mode 100644 changelog.d/14228.misc (limited to 'synapse/handlers/room.py') diff --git a/changelog.d/14228.misc b/changelog.d/14228.misc new file mode 100644 index 0000000000..14fe31a8bc --- /dev/null +++ b/changelog.d/14228.misc @@ -0,0 +1 @@ +Add initial power level event to batch of bulk persisted events when creating a new room. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 275a37a575..4fbc79a6cb 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1017,7 +1017,9 @@ class FederationHandler: context = EventContext.for_outlier(self._storage_controllers) - await self._bulk_push_rule_evaluator.action_for_event_by_user(event, context) + await self._bulk_push_rule_evaluator.action_for_events_by_user( + [(event, context)] + ) try: await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 06e41b5cc0..7da6316a82 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -2171,8 +2171,8 @@ class FederationEventHandler: min_depth, ) else: - await self._bulk_push_rule_evaluator.action_for_event_by_user( - event, context + await self._bulk_push_rule_evaluator.action_for_events_by_user( + [(event, context)] ) try: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 15b828dd74..468900a07f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1433,17 +1433,9 @@ class EventCreationHandler: a room that has been un-partial stated. """ - for event, context in events_and_context: - # Skip push notification actions for historical messages - # because we don't want to notify people about old history back in time. - # The historical messages also do not have the proper `context.current_state_ids` - # and `state_groups` because they have `prev_events` that aren't persisted yet - # (historical messages persisted in reverse-chronological order). - if not event.internal_metadata.is_historical(): - with opentracing.start_active_span("calculate_push_actions"): - await self._bulk_push_rule_evaluator.action_for_event_by_user( - event, context - ) + await self._bulk_push_rule_evaluator.action_for_events_by_user( + events_and_context + ) try: # If we're a worker we need to hit out to the master. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 638f54051a..cc1e5c8f97 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1055,9 +1055,6 @@ class RoomCreationHandler: event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""} depth = 1 - # the last event sent/persisted to the db - last_sent_event_id: Optional[str] = None - # the most recently created event prev_event: List[str] = [] # a map of event types, state keys -> event_ids. We collect these mappings this as events are @@ -1102,26 +1099,6 @@ class RoomCreationHandler: return new_event, new_context - async def send( - event: EventBase, - context: synapse.events.snapshot.EventContext, - creator: Requester, - ) -> int: - nonlocal last_sent_event_id - - ev = await self.event_creation_handler.handle_new_client_event( - requester=creator, - events_and_context=[(event, context)], - ratelimit=False, - ignore_shadow_ban=True, - ) - - last_sent_event_id = ev.event_id - - # we know it was persisted, so must have a stream ordering - assert ev.internal_metadata.stream_ordering - return ev.internal_metadata.stream_ordering - try: config = self._presets_dict[preset_config] except KeyError: @@ -1135,10 +1112,14 @@ class RoomCreationHandler: ) logger.debug("Sending %s in new room", EventTypes.Member) - await send(creation_event, creation_context, creator) + ev = await self.event_creation_handler.handle_new_client_event( + requester=creator, + events_and_context=[(creation_event, creation_context)], + ratelimit=False, + ignore_shadow_ban=True, + ) + last_sent_event_id = ev.event_id - # Room create event must exist at this point - assert last_sent_event_id is not None member_event_id, _ = await self.room_member_handler.update_membership( creator, creator.user, @@ -1157,6 +1138,7 @@ class RoomCreationHandler: depth += 1 state_map[(EventTypes.Member, creator.user.to_string())] = member_event_id + events_to_send = [] # We treat the power levels override specially as this needs to be one # of the first events that get sent into a room. pl_content = initial_state.pop((EventTypes.PowerLevels, ""), None) @@ -1165,7 +1147,7 @@ class RoomCreationHandler: EventTypes.PowerLevels, pl_content, False ) current_state_group = power_context._state_group - await send(power_event, power_context, creator) + events_to_send.append((power_event, power_context)) else: power_level_content: JsonDict = { "users": {creator_id: 100}, @@ -1214,9 +1196,8 @@ class RoomCreationHandler: False, ) current_state_group = pl_context._state_group - await send(pl_event, pl_context, creator) + events_to_send.append((pl_event, pl_context)) - events_to_send = [] if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state: room_alias_event, room_alias_context = await create_event( EventTypes.CanonicalAlias, {"alias": room_alias.to_string()}, True diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index a75386f6a0..d7795a9080 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -165,8 +165,21 @@ class BulkPushRuleEvaluator: return rules_by_user async def _get_power_levels_and_sender_level( - self, event: EventBase, context: EventContext + self, + event: EventBase, + context: EventContext, + event_id_to_event: Mapping[str, EventBase], ) -> Tuple[dict, Optional[int]]: + """ + Given an event and an event context, get the power level event relevant to the event + and the power level of the sender of the event. + Args: + event: event to check + context: context of event to check + event_id_to_event: a mapping of event_id to event for a set of events being + batch persisted. This is needed as the sought-after power level event may + be in this batch rather than the DB + """ # There are no power levels and sender levels possible to get from outlier if event.internal_metadata.is_outlier(): return {}, None @@ -177,15 +190,26 @@ class BulkPushRuleEvaluator: ) pl_event_id = prev_state_ids.get(POWER_KEY) + # fastpath: if there's a power level event, that's all we need, and + # not having a power level event is an extreme edge case if pl_event_id: - # fastpath: if there's a power level event, that's all we need, and - # not having a power level event is an extreme edge case - auth_events = {POWER_KEY: await self.store.get_event(pl_event_id)} + # Get the power level event from the batch, or fall back to the database. + pl_event = event_id_to_event.get(pl_event_id) + if pl_event: + auth_events = {POWER_KEY: pl_event} + else: + auth_events = {POWER_KEY: await self.store.get_event(pl_event_id)} else: auth_events_ids = self._event_auth_handler.compute_auth_events( event, prev_state_ids, for_verification=False ) auth_events_dict = await self.store.get_events(auth_events_ids) + # Some needed auth events might be in the batch, combine them with those + # fetched from the database. + for auth_event_id in auth_events_ids: + auth_event = event_id_to_event.get(auth_event_id) + if auth_event: + auth_events_dict[auth_event_id] = auth_event auth_events = {(e.type, e.state_key): e for e in auth_events_dict.values()} sender_level = get_user_power_level(event.sender, auth_events) @@ -194,16 +218,38 @@ class BulkPushRuleEvaluator: return pl_event.content if pl_event else {}, sender_level - @measure_func("action_for_event_by_user") - async def action_for_event_by_user( - self, event: EventBase, context: EventContext + async def action_for_events_by_user( + self, events_and_context: List[Tuple[EventBase, EventContext]] ) -> None: - """Given an event and context, evaluate the push rules, check if the message - should increment the unread count, and insert the results into the - event_push_actions_staging table. + """Given a list of events and their associated contexts, evaluate the push rules + for each event, check if the message should increment the unread count, and + insert the results into the event_push_actions_staging table. """ - if not event.internal_metadata.is_notifiable(): - # Push rules for events that aren't notifiable can't be processed by this + # For batched events the power level events may not have been persisted yet, + # so we pass in the batched events. Thus if the event cannot be found in the + # database we can check in the batch. + event_id_to_event = {e.event_id: e for e, _ in events_and_context} + for event, context in events_and_context: + await self._action_for_event_by_user(event, context, event_id_to_event) + + @measure_func("action_for_event_by_user") + async def _action_for_event_by_user( + self, + event: EventBase, + context: EventContext, + event_id_to_event: Mapping[str, EventBase], + ) -> None: + + if ( + not event.internal_metadata.is_notifiable() + or event.internal_metadata.is_historical() + ): + # Push rules for events that aren't notifiable can't be processed by this and + # we want to skip push notification actions for historical messages + # because we don't want to notify people about old history back in time. + # The historical messages also do not have the proper `context.current_state_ids` + # and `state_groups` because they have `prev_events` that aren't persisted yet + # (historical messages persisted in reverse-chronological order). return # Disable counting as unread unless the experimental configuration is @@ -223,7 +269,9 @@ class BulkPushRuleEvaluator: ( power_levels, sender_power_level, - ) = await self._get_power_levels_and_sender_level(event, context) + ) = await self._get_power_levels_and_sender_level( + event, context, event_id_to_event + ) # Find the event's thread ID. relation = relation_from_event(event) diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 675d7df2ac..594e7937a8 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -71,4 +71,4 @@ class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase): bulk_evaluator = BulkPushRuleEvaluator(self.hs) # should not raise - self.get_success(bulk_evaluator.action_for_event_by_user(event, context)) + self.get_success(bulk_evaluator.action_for_events_by_user([(event, context)])) diff --git a/tests/replication/_base.py b/tests/replication/_base.py index ce53f808db..121f3d8d65 100644 --- a/tests/replication/_base.py +++ b/tests/replication/_base.py @@ -371,7 +371,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase): config=worker_hs.config.server.listeners[0], resource=resource, server_version_string="1", - max_request_body_size=4096, + max_request_body_size=8192, reactor=self.reactor, ) -- cgit 1.5.1 From 40fa8294e3096132819287dd0c6d6bd71a408902 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 Oct 2022 16:10:55 -0500 Subject: Refactor MSC3030 `/timestamp_to_event` to move away from our snowflake pull from `destination` pattern (#14096) 1. `federation_client.timestamp_to_event(...)` now handles all `destination` looping and uses our generic `_try_destination_list(...)` helper. 2. Consistently handling `NotRetryingDestination` and `FederationDeniedError` across `get_pdu` , backfill, and the generic `_try_destination_list` which is used for many places we use this pattern. 3. `get_pdu(...)` now returns `PulledPduInfo` so we know which `destination` we ended up pulling the PDU from --- changelog.d/14096.misc | 1 + synapse/federation/federation_client.py | 130 ++++++++++++++++++++++++----- synapse/handlers/federation.py | 15 ++-- synapse/handlers/federation_event.py | 31 ++++--- synapse/handlers/room.py | 126 +++++++++++----------------- synapse/util/retryutils.py | 2 +- tests/federation/test_federation_client.py | 12 ++- 7 files changed, 191 insertions(+), 126 deletions(-) create mode 100644 changelog.d/14096.misc (limited to 'synapse/handlers/room.py') diff --git a/changelog.d/14096.misc b/changelog.d/14096.misc new file mode 100644 index 0000000000..2c07dc673b --- /dev/null +++ b/changelog.d/14096.misc @@ -0,0 +1 @@ +Refactor [MSC3030](https://github.com/matrix-org/matrix-spec-proposals/pull/3030) `/timestamp_to_event` endpoint to loop over federation destinations with standard pattern and error handling. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index b220ab43fc..fa225182be 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -80,6 +80,18 @@ PDU_RETRY_TIME_MS = 1 * 60 * 1000 T = TypeVar("T") +@attr.s(frozen=True, slots=True, auto_attribs=True) +class PulledPduInfo: + """ + A result object that stores the PDU and info about it like which homeserver we + pulled it from (`pull_origin`) + """ + + pdu: EventBase + # Which homeserver we pulled the PDU from + pull_origin: str + + class InvalidResponseError(RuntimeError): """Helper for _try_destination_list: indicates that the server returned a response we couldn't parse @@ -114,7 +126,9 @@ class FederationClient(FederationBase): self.hostname = hs.hostname self.signing_key = hs.signing_key - self._get_pdu_cache: ExpiringCache[str, EventBase] = ExpiringCache( + # Cache mapping `event_id` to a tuple of the event itself and the `pull_origin` + # (which server we pulled the event from) + self._get_pdu_cache: ExpiringCache[str, Tuple[EventBase, str]] = ExpiringCache( cache_name="get_pdu_cache", clock=self._clock, max_len=1000, @@ -352,11 +366,11 @@ class FederationClient(FederationBase): @tag_args async def get_pdu( self, - destinations: Iterable[str], + destinations: Collection[str], event_id: str, room_version: RoomVersion, timeout: Optional[int] = None, - ) -> Optional[EventBase]: + ) -> Optional[PulledPduInfo]: """Requests the PDU with given origin and ID from the remote home servers. @@ -371,11 +385,11 @@ class FederationClient(FederationBase): moving to the next destination. None indicates no timeout. Returns: - The requested PDU, or None if we were unable to find it. + The requested PDU wrapped in `PulledPduInfo`, or None if we were unable to find it. """ logger.debug( - "get_pdu: event_id=%s from destinations=%s", event_id, destinations + "get_pdu(event_id=%s): from destinations=%s", event_id, destinations ) # TODO: Rate limit the number of times we try and get the same event. @@ -384,19 +398,25 @@ class FederationClient(FederationBase): # it gets persisted to the database), so we cache the results of the lookup. # Note that this is separate to the regular get_event cache which caches # events once they have been persisted. - event = self._get_pdu_cache.get(event_id) + get_pdu_cache_entry = self._get_pdu_cache.get(event_id) + event = None + pull_origin = None + if get_pdu_cache_entry: + event, pull_origin = get_pdu_cache_entry # If we don't see the event in the cache, go try to fetch it from the # provided remote federated destinations - if not event: + else: pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {}) + # TODO: We can probably refactor this to use `_try_destination_list` for destination in destinations: now = self._clock.time_msec() last_attempt = pdu_attempts.get(destination, 0) if last_attempt + PDU_RETRY_TIME_MS > now: logger.debug( - "get_pdu: skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)", + "get_pdu(event_id=%s): skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)", + event_id, destination, last_attempt, PDU_RETRY_TIME_MS, @@ -411,43 +431,48 @@ class FederationClient(FederationBase): room_version=room_version, timeout=timeout, ) + pull_origin = destination pdu_attempts[destination] = now if event: # Prime the cache - self._get_pdu_cache[event.event_id] = event + self._get_pdu_cache[event.event_id] = (event, pull_origin) # Now that we have an event, we can break out of this # loop and stop asking other destinations. break + except NotRetryingDestination as e: + logger.info("get_pdu(event_id=%s): %s", event_id, e) + continue + except FederationDeniedError: + logger.info( + "get_pdu(event_id=%s): Not attempting to fetch PDU from %s because the homeserver is not on our federation whitelist", + event_id, + destination, + ) + continue except SynapseError as e: logger.info( - "Failed to get PDU %s from %s because %s", + "get_pdu(event_id=%s): Failed to get PDU from %s because %s", event_id, destination, e, ) continue - except NotRetryingDestination as e: - logger.info(str(e)) - continue - except FederationDeniedError as e: - logger.info(str(e)) - continue except Exception as e: pdu_attempts[destination] = now logger.info( - "Failed to get PDU %s from %s because %s", + "get_pdu(event_id=): Failed to get PDU from %s because %s", event_id, destination, e, ) continue - if not event: + if not event or not pull_origin: return None # `event` now refers to an object stored in `get_pdu_cache`. Our @@ -459,7 +484,7 @@ class FederationClient(FederationBase): event.room_version, ) - return event_copy + return PulledPduInfo(event_copy, pull_origin) @trace @tag_args @@ -699,12 +724,14 @@ class FederationClient(FederationBase): pdu_origin = get_domain_from_id(pdu.sender) if not res and pdu_origin != origin: try: - res = await self.get_pdu( + pulled_pdu_info = await self.get_pdu( destinations=[pdu_origin], event_id=pdu.event_id, room_version=room_version, timeout=10000, ) + if pulled_pdu_info is not None: + res = pulled_pdu_info.pdu except SynapseError: pass @@ -806,6 +833,7 @@ class FederationClient(FederationBase): ) for destination in destinations: + # We don't want to ask our own server for information we don't have if destination == self.server_name: continue @@ -814,9 +842,21 @@ class FederationClient(FederationBase): except ( RequestSendFailed, InvalidResponseError, - NotRetryingDestination, ) as e: logger.warning("Failed to %s via %s: %s", description, destination, e) + # Skip to the next homeserver in the list to try. + continue + except NotRetryingDestination as e: + logger.info("%s: %s", description, e) + continue + except FederationDeniedError: + logger.info( + "%s: Not attempting to %s from %s because the homeserver is not on our federation whitelist", + description, + description, + destination, + ) + continue except UnsupportedRoomVersionError: raise except HttpResponseException as e: @@ -1609,6 +1649,54 @@ class FederationClient(FederationBase): return result async def timestamp_to_event( + self, *, destinations: List[str], room_id: str, timestamp: int, direction: str + ) -> Optional["TimestampToEventResponse"]: + """ + Calls each remote federating server from `destinations` asking for their closest + event to the given timestamp in the given direction until we get a response. + Also validates the response to always return the expected keys or raises an + error. + + Args: + destinations: The domains of homeservers to try fetching from + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + A parsed TimestampToEventResponse including the closest event_id + and origin_server_ts or None if no destination has a response. + """ + + async def _timestamp_to_event_from_destination( + destination: str, + ) -> TimestampToEventResponse: + return await self._timestamp_to_event_from_destination( + destination, room_id, timestamp, direction + ) + + try: + # Loop through each homeserver candidate until we get a succesful response + timestamp_to_event_response = await self._try_destination_list( + "timestamp_to_event", + destinations, + # TODO: The requested timestamp may lie in a part of the + # event graph that the remote server *also* didn't have, + # in which case they will have returned another event + # which may be nowhere near the requested timestamp. In + # the future, we may need to reconcile that gap and ask + # other homeservers, and/or extend `/timestamp_to_event` + # to return events on *both* sides of the timestamp to + # help reconcile the gap faster. + _timestamp_to_event_from_destination, + ) + return timestamp_to_event_response + except SynapseError: + return None + + async def _timestamp_to_event_from_destination( self, destination: str, room_id: str, timestamp: int, direction: str ) -> "TimestampToEventResponse": """ diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 4fbc79a6cb..5fc3b8bc8c 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -442,6 +442,15 @@ class FederationHandler: # appropriate stuff. # TODO: We can probably do something more intelligent here. return True + except NotRetryingDestination as e: + logger.info("_maybe_backfill_inner: %s", e) + continue + except FederationDeniedError: + logger.info( + "_maybe_backfill_inner: Not attempting to backfill from %s because the homeserver is not on our federation whitelist", + dom, + ) + continue except (SynapseError, InvalidResponseError) as e: logger.info("Failed to backfill from %s because %s", dom, e) continue @@ -477,15 +486,9 @@ class FederationHandler: logger.info("Failed to backfill from %s because %s", dom, e) continue - except NotRetryingDestination as e: - logger.info(str(e)) - continue except RequestSendFailed as e: logger.info("Failed to get backfill from %s because %s", dom, e) continue - except FederationDeniedError as e: - logger.info(e) - continue except Exception as e: logger.exception("Failed to backfill from %s because %s", dom, e) continue diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 7da6316a82..9ca5df7c78 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -58,7 +58,7 @@ from synapse.event_auth import ( ) from synapse.events import EventBase from synapse.events.snapshot import EventContext -from synapse.federation.federation_client import InvalidResponseError +from synapse.federation.federation_client import InvalidResponseError, PulledPduInfo from synapse.logging.context import nested_logging_context from synapse.logging.opentracing import ( SynapseTags, @@ -1517,8 +1517,8 @@ class FederationEventHandler: ) async def backfill_event_id( - self, destination: str, room_id: str, event_id: str - ) -> EventBase: + self, destinations: List[str], room_id: str, event_id: str + ) -> PulledPduInfo: """Backfill a single event and persist it as a non-outlier which means we also pull in all of the state and auth events necessary for it. @@ -1530,24 +1530,21 @@ class FederationEventHandler: Raises: FederationError if we are unable to find the event from the destination """ - logger.info( - "backfill_event_id: event_id=%s from destination=%s", event_id, destination - ) + logger.info("backfill_event_id: event_id=%s", event_id) room_version = await self._store.get_room_version(room_id) - event_from_response = await self._federation_client.get_pdu( - [destination], + pulled_pdu_info = await self._federation_client.get_pdu( + destinations, event_id, room_version, ) - if not event_from_response: + if not pulled_pdu_info: raise FederationError( "ERROR", 404, - "Unable to find event_id=%s from destination=%s to backfill." - % (event_id, destination), + f"Unable to find event_id={event_id} from remote servers to backfill.", affected=event_id, ) @@ -1555,13 +1552,13 @@ class FederationEventHandler: # and auth events to de-outlier it. This also sets up the necessary # `state_groups` for the event. await self._process_pulled_events( - destination, - [event_from_response], + pulled_pdu_info.pull_origin, + [pulled_pdu_info.pdu], # Prevent notifications going to clients backfilled=True, ) - return event_from_response + return pulled_pdu_info @trace @tag_args @@ -1584,19 +1581,19 @@ class FederationEventHandler: async def get_event(event_id: str) -> None: with nested_logging_context(event_id): try: - event = await self._federation_client.get_pdu( + pulled_pdu_info = await self._federation_client.get_pdu( [destination], event_id, room_version, ) - if event is None: + if pulled_pdu_info is None: logger.warning( "Server %s didn't return event %s", destination, event_id, ) return - events.append(event) + events.append(pulled_pdu_info.pdu) except Exception as e: logger.warning( diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index cc1e5c8f97..de97886ea9 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -49,7 +49,6 @@ from synapse.api.constants import ( from synapse.api.errors import ( AuthError, Codes, - HttpResponseException, LimitExceededError, NotFoundError, StoreError, @@ -60,7 +59,6 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase from synapse.events.utils import copy_and_fixup_power_levels_contents -from synapse.federation.federation_client import InvalidResponseError from synapse.handlers.relations import BundledAggregations from synapse.module_api import NOT_SPAM from synapse.rest.admin._base import assert_user_is_admin @@ -1472,7 +1470,12 @@ class TimestampLookupHandler: Raises: SynapseError if unable to find any event locally in the given direction """ - + logger.debug( + "get_event_for_timestamp(room_id=%s, timestamp=%s, direction=%s) Finding closest event...", + room_id, + timestamp, + direction, + ) local_event_id = await self.store.get_event_id_for_timestamp( room_id, timestamp, direction ) @@ -1524,85 +1527,54 @@ class TimestampLookupHandler: ) ) - # Loop through each homeserver candidate until we get a succesful response - for domain in likely_domains: - # We don't want to ask our own server for information we don't have - if domain == self.server_name: - continue + remote_response = await self.federation_client.timestamp_to_event( + destinations=likely_domains, + room_id=room_id, + timestamp=timestamp, + direction=direction, + ) + if remote_response is not None: + logger.debug( + "get_event_for_timestamp: remote_response=%s", + remote_response, + ) - try: - remote_response = await self.federation_client.timestamp_to_event( - domain, room_id, timestamp, direction - ) - logger.debug( - "get_event_for_timestamp: response from domain(%s)=%s", - domain, - remote_response, - ) + remote_event_id = remote_response.event_id + remote_origin_server_ts = remote_response.origin_server_ts - remote_event_id = remote_response.event_id - remote_origin_server_ts = remote_response.origin_server_ts - - # Backfill this event so we can get a pagination token for - # it with `/context` and paginate `/messages` from this - # point. - # - # TODO: The requested timestamp may lie in a part of the - # event graph that the remote server *also* didn't have, - # in which case they will have returned another event - # which may be nowhere near the requested timestamp. In - # the future, we may need to reconcile that gap and ask - # other homeservers, and/or extend `/timestamp_to_event` - # to return events on *both* sides of the timestamp to - # help reconcile the gap faster. - remote_event = ( - await self.federation_event_handler.backfill_event_id( - domain, room_id, remote_event_id - ) - ) + # Backfill this event so we can get a pagination token for + # it with `/context` and paginate `/messages` from this + # point. + pulled_pdu_info = await self.federation_event_handler.backfill_event_id( + likely_domains, room_id, remote_event_id + ) + remote_event = pulled_pdu_info.pdu - # XXX: When we see that the remote server is not trustworthy, - # maybe we should not ask them first in the future. - if remote_origin_server_ts != remote_event.origin_server_ts: - logger.info( - "get_event_for_timestamp: Remote server (%s) claimed that remote_event_id=%s occured at remote_origin_server_ts=%s but that isn't true (actually occured at %s). Their claims are dubious and we should consider not trusting them.", - domain, - remote_event_id, - remote_origin_server_ts, - remote_event.origin_server_ts, - ) - - # Only return the remote event if it's closer than the local event - if not local_event or ( - abs(remote_event.origin_server_ts - timestamp) - < abs(local_event.origin_server_ts - timestamp) - ): - logger.info( - "get_event_for_timestamp: returning remote_event_id=%s (%s) since it's closer to timestamp=%s than local_event=%s (%s)", - remote_event_id, - remote_event.origin_server_ts, - timestamp, - local_event.event_id if local_event else None, - local_event.origin_server_ts if local_event else None, - ) - return remote_event_id, remote_origin_server_ts - except (HttpResponseException, InvalidResponseError) as ex: - # Let's not put a high priority on some other homeserver - # failing to respond or giving a random response - logger.debug( - "get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s", - domain, - type(ex).__name__, - ex, - ex.args, + # XXX: When we see that the remote server is not trustworthy, + # maybe we should not ask them first in the future. + if remote_origin_server_ts != remote_event.origin_server_ts: + logger.info( + "get_event_for_timestamp: Remote server (%s) claimed that remote_event_id=%s occured at remote_origin_server_ts=%s but that isn't true (actually occured at %s). Their claims are dubious and we should consider not trusting them.", + pulled_pdu_info.pull_origin, + remote_event_id, + remote_origin_server_ts, + remote_event.origin_server_ts, ) - except Exception: - # But we do want to see some exceptions in our code - logger.warning( - "get_event_for_timestamp: Failed to fetch /timestamp_to_event from %s because of exception", - domain, - exc_info=True, + + # Only return the remote event if it's closer than the local event + if not local_event or ( + abs(remote_event.origin_server_ts - timestamp) + < abs(local_event.origin_server_ts - timestamp) + ): + logger.info( + "get_event_for_timestamp: returning remote_event_id=%s (%s) since it's closer to timestamp=%s than local_event=%s (%s)", + remote_event_id, + remote_event.origin_server_ts, + timestamp, + local_event.event_id if local_event else None, + local_event.origin_server_ts if local_event else None, ) + return remote_event_id, remote_origin_server_ts # To appease mypy, we have to add both of these conditions to check for # `None`. We only expect `local_event` to be `None` when diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index d0a69ff843..dcc037b982 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -51,7 +51,7 @@ class NotRetryingDestination(Exception): destination: the domain in question """ - msg = "Not retrying server %s." % (destination,) + msg = f"Not retrying server {destination} because we tried it recently retry_last_ts={retry_last_ts} and we won't check for another retry_interval={retry_interval}ms." super().__init__(msg) self.retry_last_ts = retry_last_ts diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py index 51d3bb8fff..e67f405826 100644 --- a/tests/federation/test_federation_client.py +++ b/tests/federation/test_federation_client.py @@ -142,14 +142,14 @@ class FederationClientTest(FederatingHomeserverTestCase): def test_get_pdu_returns_nothing_when_event_does_not_exist(self): """No event should be returned when the event does not exist""" - remote_pdu = self.get_success( + pulled_pdu_info = self.get_success( self.hs.get_federation_client().get_pdu( ["yet.another.server"], "event_should_not_exist", RoomVersions.V9, ) ) - self.assertEqual(remote_pdu, None) + self.assertEqual(pulled_pdu_info, None) def test_get_pdu(self): """Test to make sure an event is returned by `get_pdu()`""" @@ -169,13 +169,15 @@ class FederationClientTest(FederatingHomeserverTestCase): remote_pdu.internal_metadata.outlier = True # Get the event again. This time it should read it from cache. - remote_pdu2 = self.get_success( + pulled_pdu_info2 = self.get_success( self.hs.get_federation_client().get_pdu( ["yet.another.server"], remote_pdu.event_id, RoomVersions.V9, ) ) + self.assertIsNotNone(pulled_pdu_info2) + remote_pdu2 = pulled_pdu_info2.pdu # Sanity check that we are working against the same event self.assertEqual(remote_pdu.event_id, remote_pdu2.event_id) @@ -215,13 +217,15 @@ class FederationClientTest(FederatingHomeserverTestCase): ) ) - remote_pdu = self.get_success( + pulled_pdu_info = self.get_success( self.hs.get_federation_client().get_pdu( ["yet.another.server"], "event_id", RoomVersions.V9, ) ) + self.assertIsNotNone(pulled_pdu_info) + remote_pdu = pulled_pdu_info.pdu # check the right call got made to the agent self._mock_agent.request.assert_called_once_with( -- cgit 1.5.1 From 6a6e1e8c0711939338f25d8d41d1e4d33d984949 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Fri, 28 Oct 2022 10:53:34 +0000 Subject: Fix room creation being rate limited too aggressively since Synapse v1.69.0. (#14314) * Introduce a test for the old behaviour which we want to restore * Reintroduce the old behaviour in a simpler way * Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) * Use 1 credit instead of 2 for creating a room: be more lenient than before Notably, the UI in Element Web was still broken after restoring to prior behaviour. After discussion, we agreed that it would be sensible to increase the limit. Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/14314.bugfix | 1 + synapse/api/ratelimiting.py | 8 +++++- synapse/handlers/room.py | 16 ++++++++---- tests/rest/client/test_rooms.py | 54 ++++++++++++++++++++++++++++++++++++++--- 4 files changed, 70 insertions(+), 9 deletions(-) create mode 100644 changelog.d/14314.bugfix (limited to 'synapse/handlers/room.py') diff --git a/changelog.d/14314.bugfix b/changelog.d/14314.bugfix new file mode 100644 index 0000000000..8be47ee083 --- /dev/null +++ b/changelog.d/14314.bugfix @@ -0,0 +1 @@ +Fix room creation being rate limited too aggressively since Synapse v1.69.0. \ No newline at end of file diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index 044c7d4926..511790c7c5 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -343,6 +343,7 @@ class RequestRatelimiter: requester: Requester, update: bool = True, is_admin_redaction: bool = False, + n_actions: int = 1, ) -> None: """Ratelimits requests. @@ -355,6 +356,8 @@ class RequestRatelimiter: is_admin_redaction: Whether this is a room admin/moderator redacting an event. If so then we may apply different ratelimits depending on config. + n_actions: Multiplier for the number of actions to apply to the + rate limiter at once. Raises: LimitExceededError if the request should be ratelimited @@ -383,7 +386,9 @@ class RequestRatelimiter: if is_admin_redaction and self.admin_redaction_ratelimiter: # If we have separate config for admin redactions, use a separate # ratelimiter as to not have user_ids clash - await self.admin_redaction_ratelimiter.ratelimit(requester, update=update) + await self.admin_redaction_ratelimiter.ratelimit( + requester, update=update, n_actions=n_actions + ) else: # Override rate and burst count per-user await self.request_ratelimiter.ratelimit( @@ -391,4 +396,5 @@ class RequestRatelimiter: rate_hz=messages_per_second, burst_count=burst_count, update=update, + n_actions=n_actions, ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 638f54051a..d74b675adc 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -559,7 +559,6 @@ class RoomCreationHandler: invite_list=[], initial_state=initial_state, creation_content=creation_content, - ratelimit=False, ) # Transfer membership events @@ -753,6 +752,10 @@ class RoomCreationHandler: ) if ratelimit: + # Rate limit once in advance, but don't rate limit the individual + # events in the room — room creation isn't atomic and it's very + # janky if half the events in the initial state don't make it because + # of rate limiting. await self.request_ratelimiter.ratelimit(requester) room_version_id = config.get( @@ -913,7 +916,6 @@ class RoomCreationHandler: room_alias=room_alias, power_level_content_override=power_level_content_override, creator_join_profile=creator_join_profile, - ratelimit=ratelimit, ) if "name" in config: @@ -1037,7 +1039,6 @@ class RoomCreationHandler: room_alias: Optional[RoomAlias] = None, power_level_content_override: Optional[JsonDict] = None, creator_join_profile: Optional[JsonDict] = None, - ratelimit: bool = True, ) -> Tuple[int, str, int]: """Sends the initial events into a new room. Sends the room creation, membership, and power level events into the room sequentially, then creates and batches up the @@ -1046,6 +1047,8 @@ class RoomCreationHandler: `power_level_content_override` doesn't apply when initial state has power level state event content. + Rate limiting should already have been applied by this point. + Returns: A tuple containing the stream ID, event ID and depth of the last event sent to the room. @@ -1144,7 +1147,7 @@ class RoomCreationHandler: creator.user, room_id, "join", - ratelimit=ratelimit, + ratelimit=False, content=creator_join_profile, new_room=True, prev_event_ids=[last_sent_event_id], @@ -1269,7 +1272,10 @@ class RoomCreationHandler: events_to_send.append((encryption_event, encryption_context)) last_event = await self.event_creation_handler.handle_new_client_event( - creator, events_to_send, ignore_shadow_ban=True + creator, + events_to_send, + ignore_shadow_ban=True, + ratelimit=False, ) assert last_event.internal_metadata.stream_ordering is not None return last_event.internal_metadata.stream_ordering, last_event.event_id, depth diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 716366eb90..1084d4ad9d 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -54,6 +54,7 @@ from tests.http.server._base import make_request_with_cancellation_test from tests.storage.test_stream import PaginationTestCase from tests.test_utils import make_awaitable from tests.test_utils.event_injection import create_event +from tests.unittest import override_config PATH_PREFIX = b"/_matrix/client/api/v1" @@ -871,6 +872,41 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(channel.code, HTTPStatus.OK, channel.json_body) self.assertEqual(join_mock.call_count, 0) + def _create_basic_room(self) -> Tuple[int, object]: + """ + Tries to create a basic room and returns the response code. + """ + channel = self.make_request( + "POST", + "/createRoom", + {}, + ) + return channel.code, channel.json_body + + @override_config( + { + "rc_message": {"per_second": 0.2, "burst_count": 10}, + } + ) + def test_room_creation_ratelimiting(self) -> None: + """ + Regression test for #14312, where ratelimiting was made too strict. + Clients should be able to create 10 rooms in a row + without hitting rate limits, using default rate limit config. + (We override rate limiting config back to its default value.) + + To ensure we don't make ratelimiting too generous accidentally, + also check that we can't create an 11th room. + """ + + for _ in range(10): + code, json_body = self._create_basic_room() + self.assertEqual(code, HTTPStatus.OK, json_body) + + # The 6th room hits the rate limit. + code, json_body = self._create_basic_room() + self.assertEqual(code, HTTPStatus.TOO_MANY_REQUESTS, json_body) + class RoomTopicTestCase(RoomBase): """Tests /rooms/$room_id/topic REST events.""" @@ -1390,10 +1426,22 @@ class RoomJoinRatelimitTestCase(RoomBase): ) def test_join_local_ratelimit(self) -> None: """Tests that local joins are actually rate-limited.""" - for _ in range(3): - self.helper.create_room_as(self.user_id) + # Create 4 rooms + room_ids = [ + self.helper.create_room_as(self.user_id, is_public=True) for _ in range(4) + ] + + joiner_user_id = self.register_user("joiner", "secret") + # Now make a new user try to join some of them. - self.helper.create_room_as(self.user_id, expect_code=429) + # The user can join 3 rooms + for room_id in room_ids[0:3]: + self.helper.join(room_id, joiner_user_id) + + # But the user cannot join a 4th room + self.helper.join( + room_ids[3], joiner_user_id, expect_code=HTTPStatus.TOO_MANY_REQUESTS + ) @unittest.override_config( {"rc_joins": {"local": {"per_second": 0.5, "burst_count": 3}}} -- cgit 1.5.1 From 7894251bcea7714b47e3849e509ea717bb18e9f5 Mon Sep 17 00:00:00 2001 From: Shay Date: Mon, 7 Nov 2022 13:38:50 -0800 Subject: Correctly create power level event during initial room creation (#14361) --- changelog.d/14361.bugfix | 1 + synapse/handlers/room.py | 25 +++++++++++++++++++++++-- tests/rest/client/test_rooms.py | 4 ++-- 3 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14361.bugfix (limited to 'synapse/handlers/room.py') diff --git a/changelog.d/14361.bugfix b/changelog.d/14361.bugfix new file mode 100644 index 0000000000..33ba1d92af --- /dev/null +++ b/changelog.d/14361.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.71.0rc1 where the power level event was incorrectly created during initial room creation. \ No newline at end of file diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index f10cfca073..66a50bca6e 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1080,6 +1080,19 @@ class RoomCreationHandler: for_batch: bool, **kwargs: Any, ) -> Tuple[EventBase, synapse.events.snapshot.EventContext]: + """ + Creates an event and associated event context. + Args: + etype: the type of event to be created + content: content of the event + for_batch: whether the event is being created for batch persisting. If + bool for_batch is true, this will create an event using the prev_event_ids, + and will create an event context for the event using the parameters state_map + and current_state_group, thus these parameters must be provided in this + case if for_batch is True. The subsequently created event and context + are suitable for being batched up and bulk persisted to the database + with other similarly created events. + """ nonlocal depth nonlocal prev_event @@ -1139,13 +1152,21 @@ class RoomCreationHandler: depth += 1 state_map[(EventTypes.Member, creator.user.to_string())] = member_event_id + # we need the state group of the membership event as it is the current state group + event_to_state = ( + await self._storage_controllers.state.get_state_group_for_events( + [member_event_id] + ) + ) + current_state_group = event_to_state[member_event_id] + events_to_send = [] # We treat the power levels override specially as this needs to be one # of the first events that get sent into a room. pl_content = initial_state.pop((EventTypes.PowerLevels, ""), None) if pl_content is not None: power_event, power_context = await create_event( - EventTypes.PowerLevels, pl_content, False + EventTypes.PowerLevels, pl_content, True ) current_state_group = power_context._state_group events_to_send.append((power_event, power_context)) @@ -1194,7 +1215,7 @@ class RoomCreationHandler: pl_event, pl_context = await create_event( EventTypes.PowerLevels, power_level_content, - False, + True, ) current_state_group = pl_context._state_group events_to_send.append((pl_event, pl_context)) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 1084d4ad9d..e919e089cb 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -715,7 +715,7 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(34, channel.resource_usage.db_txn_count) + self.assertEqual(33, channel.resource_usage.db_txn_count) def test_post_room_initial_state(self) -> None: # POST with initial_state config key, expect new room id @@ -728,7 +728,7 @@ class RoomsCreateTestCase(RoomBase): self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(37, channel.resource_usage.db_txn_count) + self.assertEqual(36, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id -- cgit 1.5.1 From 945a0928c793c0bd8573e179583d983187e5f392 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 16 Nov 2022 12:09:33 +0000 Subject: Don't filter state in /context response (#14461) We don't filter state usually, so doing so here is a waste of time. This is not much of an issue for clients that enable lazy loading of members, since there will be fewer state events. --- changelog.d/14461.misc | 1 + synapse/handlers/room.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/14461.misc (limited to 'synapse/handlers/room.py') diff --git a/changelog.d/14461.misc b/changelog.d/14461.misc new file mode 100644 index 0000000000..cdfa577a4c --- /dev/null +++ b/changelog.d/14461.misc @@ -0,0 +1 @@ +Improve performance of `/context` in large rooms. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 66a50bca6e..6dcfd86fdf 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1451,7 +1451,7 @@ class RoomContextHandler: events_before=events_before, event=event, events_after=events_after, - state=await filter_evts(state_events), + state=state_events, aggregations=aggregations, start=await token.copy_and_replace( StreamKeyType.ROOM, results.start -- cgit 1.5.1 From b5b5f6608462a988b05502a3b70b6a57ca3846d2 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 12 Dec 2022 16:19:30 +0000 Subject: Move `StateFilter` to `synapse.types` (#14668) * Move `StateFilter` to `synapse.types` * Changelog --- changelog.d/14668.misc | 1 + synapse/events/builder.py | 2 +- synapse/events/snapshot.py | 2 +- synapse/handlers/federation.py | 2 +- synapse/handlers/federation_event.py | 2 +- synapse/handlers/message.py | 2 +- synapse/handlers/pagination.py | 2 +- synapse/handlers/register.py | 2 +- synapse/handlers/room.py | 2 +- synapse/handlers/room_member.py | 2 +- synapse/handlers/search.py | 2 +- synapse/handlers/sync.py | 2 +- synapse/module_api/__init__.py | 2 +- synapse/push/bulk_push_rule_evaluator.py | 2 +- synapse/push/mailer.py | 2 +- synapse/rest/admin/rooms.py | 2 +- synapse/rest/client/room.py | 2 +- synapse/state/__init__.py | 2 +- synapse/storage/controllers/persist_events.py | 2 +- synapse/storage/controllers/state.py | 2 +- synapse/storage/databases/main/state.py | 2 +- synapse/storage/databases/state/bg_updates.py | 2 +- synapse/storage/databases/state/store.py | 2 +- synapse/storage/state.py | 567 ---------------- synapse/types.py | 928 -------------------------- synapse/types/__init__.py | 928 ++++++++++++++++++++++++++ synapse/types/state.py | 567 ++++++++++++++++ synapse/visibility.py | 2 +- tests/storage/test_state.py | 2 +- 29 files changed, 1520 insertions(+), 1519 deletions(-) create mode 100644 changelog.d/14668.misc delete mode 100644 synapse/storage/state.py delete mode 100644 synapse/types.py create mode 100644 synapse/types/__init__.py create mode 100644 synapse/types/state.py (limited to 'synapse/handlers/room.py') diff --git a/changelog.d/14668.misc b/changelog.d/14668.misc new file mode 100644 index 0000000000..5269d8a97d --- /dev/null +++ b/changelog.d/14668.misc @@ -0,0 +1 @@ +Move `StateFilter` to `synapse.types`. diff --git a/synapse/events/builder.py b/synapse/events/builder.py index d62906043f..94dd1298e1 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -28,8 +28,8 @@ from synapse.event_auth import auth_types_for_event from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict from synapse.state import StateHandler from synapse.storage.databases.main import DataStore -from synapse.storage.state import StateFilter from synapse.types import EventID, JsonDict +from synapse.types.state import StateFilter from synapse.util import Clock from synapse.util.stringutils import random_string diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 1c0e96bec7..6eaef8b57a 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -23,7 +23,7 @@ from synapse.types import JsonDict, StateMap if TYPE_CHECKING: from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore - from synapse.storage.state import StateFilter + from synapse.types.state import StateFilter @attr.s(slots=True, auto_attribs=True) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3398fcaf7d..b2784d7333 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -70,8 +70,8 @@ from synapse.replication.http.federation import ( ) from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import JsonDict, get_domain_from_id +from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination from synapse.visibility import filter_events_for_server diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f7223b03c3..d2facdab60 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -75,7 +75,6 @@ from synapse.replication.http.federation import ( from synapse.state import StateResolutionStore from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import ( PersistedEventPosition, RoomStreamToken, @@ -83,6 +82,7 @@ from synapse.types import ( UserID, get_domain_from_id, ) +from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer, concurrently_execute from synapse.util.iterutils import batch_iter from synapse.util.retryutils import NotRetryingDestination diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5cbe89f4fd..d6e90ef259 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -59,7 +59,6 @@ from synapse.replication.http.send_event import ReplicationSendEventRestServlet from synapse.replication.http.send_events import ReplicationSendEventsRestServlet from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import ( MutableStateMap, PersistedEventPosition, @@ -70,6 +69,7 @@ from synapse.types import ( UserID, create_requester, ) +from synapse.types.state import StateFilter from synapse.util import json_decoder, json_encoder, log_failure, unwrapFirstError from synapse.util.async_helpers import Linearizer, gather_results from synapse.util.caches.expiringcache import ExpiringCache diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index c572508a02..8c8ff18a1a 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -27,9 +27,9 @@ from synapse.handlers.room import ShutdownRoomResponse from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.admin._base import assert_user_is_admin -from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import JsonDict, Requester, StreamKeyType +from synapse.types.state import StateFilter from synapse.util.async_helpers import ReadWriteLock from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 6307fa9c5d..c611efb760 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -46,8 +46,8 @@ from synapse.replication.http.register import ( ReplicationRegisterServlet, ) from synapse.spam_checker_api import RegistrationBehaviour -from synapse.storage.state import StateFilter from synapse.types import RoomAlias, UserID, create_requester +from synapse.types.state import StateFilter if TYPE_CHECKING: from synapse.server import HomeServer diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 6dcfd86fdf..f81241c2b3 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -62,7 +62,6 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.handlers.relations import BundledAggregations from synapse.module_api import NOT_SPAM from synapse.rest.admin._base import assert_user_is_admin -from synapse.storage.state import StateFilter from synapse.streams import EventSource from synapse.types import ( JsonDict, @@ -77,6 +76,7 @@ from synapse.types import ( UserID, create_requester, ) +from synapse.types.state import StateFilter from synapse.util import stringutils from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import parse_and_validate_server_name diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6ad2b38b8f..0c39e852a1 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -34,7 +34,6 @@ from synapse.events.snapshot import EventContext from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN from synapse.logging import opentracing from synapse.module_api import NOT_SPAM -from synapse.storage.state import StateFilter from synapse.types import ( JsonDict, Requester, @@ -45,6 +44,7 @@ from synapse.types import ( create_requester, get_domain_from_id, ) +from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer from synapse.util.distributor import user_left_room diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index bcab98c6d5..33115ce488 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -23,8 +23,8 @@ from synapse.api.constants import EventTypes, Membership from synapse.api.errors import NotFoundError, SynapseError from synapse.api.filtering import Filter from synapse.events import EventBase -from synapse.storage.state import StateFilter from synapse.types import JsonDict, StreamKeyType, UserID +from synapse.types.state import StateFilter from synapse.visibility import filter_events_for_client if TYPE_CHECKING: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index dace9b606f..7d6a653747 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -49,7 +49,6 @@ from synapse.push.clientformat import format_push_rules_for_user from synapse.storage.databases.main.event_push_actions import RoomNotifCounts from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.roommember import MemberSummary -from synapse.storage.state import StateFilter from synapse.types import ( DeviceListUpdates, JsonDict, @@ -61,6 +60,7 @@ from synapse.types import ( StreamToken, UserID, ) +from synapse.types.state import StateFilter from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.caches.lrucache import LruCache diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 96a661177a..0092a03c59 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -111,7 +111,6 @@ from synapse.storage.background_updates import ( ) from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo -from synapse.storage.state import StateFilter from synapse.types import ( DomainSpecificString, JsonDict, @@ -124,6 +123,7 @@ from synapse.types import ( UserProfile, create_requester, ) +from synapse.types.state import StateFilter from synapse.util import Clock from synapse.util.async_helpers import maybe_awaitable from synapse.util.caches.descriptors import CachedFunction, cached diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 9ed35d8461..36e5b327ef 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -35,8 +35,8 @@ from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership -from synapse.storage.state import StateFilter from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator +from synapse.types.state import StateFilter from synapse.util.caches import register_cache from synapse.util.metrics import measure_func from synapse.visibility import filter_event_for_clients_with_state diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index c2575ba3d9..93b255ced5 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -37,8 +37,8 @@ from synapse.push.push_types import ( TemplateVars, ) from synapse.storage.databases.main.event_push_actions import EmailPushAction -from synapse.storage.state import StateFilter from synapse.types import StateMap, UserID +from synapse.types.state import StateFilter from synapse.util.async_helpers import concurrently_execute from synapse.visibility import filter_events_for_client diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 747e6fda83..e957aa28ca 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -34,9 +34,9 @@ from synapse.rest.admin._base import ( assert_user_is_admin, ) from synapse.storage.databases.main.room import RoomSortOrder -from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import JsonDict, RoomID, UserID, create_requester +from synapse.types.state import StateFilter from synapse.util import json_decoder if TYPE_CHECKING: diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 514eb6afc8..790614d721 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -55,9 +55,9 @@ from synapse.logging.opentracing import set_tag from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.client._base import client_patterns from synapse.rest.client.transactions import HttpTransactionCache -from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig from synapse.types import JsonDict, StreamToken, ThirdPartyInstanceID, UserID +from synapse.types.state import StateFilter from synapse.util import json_decoder from synapse.util.cancellation import cancellable from synapse.util.stringutils import parse_and_validate_server_name, random_string diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 833ffec3de..ee5469d5a8 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -44,8 +44,8 @@ from synapse.logging.context import ContextResourceUsage from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import StateMap +from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.metrics import Measure, measure_func diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 33ffef521b..f1d2c71c91 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -58,13 +58,13 @@ from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.state import StateFilter from synapse.types import ( PersistedEventPosition, RoomStreamToken, StateMap, get_domain_from_id, ) +from synapse.types.state import StateFilter from synapse.util.async_helpers import ObservableDeferred, yieldable_gather_results from synapse.util.metrics import Measure diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 2b31ce54bb..26d79c6e62 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -31,12 +31,12 @@ from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.logging.opentracing import tag_args, trace from synapse.storage.roommember import ProfileInfo -from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( PartialCurrentStateTracker, PartialStateEventsTracker, ) from synapse.types import MutableStateMap, StateMap +from synapse.types.state import StateFilter from synapse.util.cancellation import cancellable if TYPE_CHECKING: diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index af7bebee80..c801a93b5b 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -33,8 +33,8 @@ from synapse.storage.database import ( ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.roommember import RoomMemberWorkerStore -from synapse.storage.state import StateFilter from synapse.types import JsonDict, JsonMapping, StateMap +from synapse.types.state import StateFilter from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedList from synapse.util.cancellation import cancellable diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 4a4ad0f492..d743282f13 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -22,8 +22,8 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.engines import PostgresEngine -from synapse.storage.state import StateFilter from synapse.types import MutableStateMap, StateMap +from synapse.types.state import StateFilter from synapse.util.caches import intern_string if TYPE_CHECKING: diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index f8cfcaca83..1a7232b276 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -25,10 +25,10 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.databases.state.bg_updates import StateBackgroundUpdateStore -from synapse.storage.state import StateFilter from synapse.storage.types import Cursor from synapse.storage.util.sequence import build_sequence_generator from synapse.types import MutableStateMap, StateKey, StateMap +from synapse.types.state import StateFilter from synapse.util.caches.descriptors import cached from synapse.util.caches.dictionary_cache import DictionaryCache from synapse.util.cancellation import cancellable diff --git a/synapse/storage/state.py b/synapse/storage/state.py deleted file mode 100644 index 0004d955b4..0000000000 --- a/synapse/storage/state.py +++ /dev/null @@ -1,567 +0,0 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2022 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -from typing import ( - TYPE_CHECKING, - Callable, - Collection, - Dict, - Iterable, - List, - Mapping, - Optional, - Set, - Tuple, - TypeVar, -) - -import attr -from frozendict import frozendict - -from synapse.api.constants import EventTypes -from synapse.types import MutableStateMap, StateKey, StateMap - -if TYPE_CHECKING: - from typing import FrozenSet # noqa: used within quoted type hint; flake8 sad - - -logger = logging.getLogger(__name__) - -# Used for generic functions below -T = TypeVar("T") - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class StateFilter: - """A filter used when querying for state. - - Attributes: - types: Map from type to set of state keys (or None). This specifies - which state_keys for the given type to fetch from the DB. If None - then all events with that type are fetched. If the set is empty - then no events with that type are fetched. - include_others: Whether to fetch events with types that do not - appear in `types`. - """ - - types: "frozendict[str, Optional[FrozenSet[str]]]" - include_others: bool = False - - def __attrs_post_init__(self) -> None: - # If `include_others` is set we canonicalise the filter by removing - # wildcards from the types dictionary - if self.include_others: - # this is needed to work around the fact that StateFilter is frozen - object.__setattr__( - self, - "types", - frozendict({k: v for k, v in self.types.items() if v is not None}), - ) - - @staticmethod - def all() -> "StateFilter": - """Returns a filter that fetches everything. - - Returns: - The state filter. - """ - return _ALL_STATE_FILTER - - @staticmethod - def none() -> "StateFilter": - """Returns a filter that fetches nothing. - - Returns: - The new state filter. - """ - return _NONE_STATE_FILTER - - @staticmethod - def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter": - """Creates a filter that only fetches the given types - - Args: - types: A list of type and state keys to fetch. A state_key of None - fetches everything for that type - - Returns: - The new state filter. - """ - type_dict: Dict[str, Optional[Set[str]]] = {} - for typ, s in types: - if typ in type_dict: - if type_dict[typ] is None: - continue - - if s is None: - type_dict[typ] = None - continue - - type_dict.setdefault(typ, set()).add(s) # type: ignore - - return StateFilter( - types=frozendict( - (k, frozenset(v) if v is not None else None) - for k, v in type_dict.items() - ) - ) - - @staticmethod - def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter": - """Creates a filter that returns all non-member events, plus the member - events for the given users - - Args: - members: Set of user IDs - - Returns: - The new state filter - """ - return StateFilter( - types=frozendict({EventTypes.Member: frozenset(members)}), - include_others=True, - ) - - @staticmethod - def freeze( - types: Mapping[str, Optional[Collection[str]]], include_others: bool - ) -> "StateFilter": - """ - Returns a (frozen) StateFilter with the same contents as the parameters - specified here, which can be made of mutable types. - """ - types_with_frozen_values: Dict[str, Optional[FrozenSet[str]]] = {} - for state_types, state_keys in types.items(): - if state_keys is not None: - types_with_frozen_values[state_types] = frozenset(state_keys) - else: - types_with_frozen_values[state_types] = None - - return StateFilter( - frozendict(types_with_frozen_values), include_others=include_others - ) - - def return_expanded(self) -> "StateFilter": - """Creates a new StateFilter where type wild cards have been removed - (except for memberships). The returned filter is a superset of the - current one, i.e. anything that passes the current filter will pass - the returned filter. - - This helps the caching as the DictionaryCache knows if it has *all* the - state, but does not know if it has all of the keys of a particular type, - which makes wildcard lookups expensive unless we have a complete cache. - Hence, if we are doing a wildcard lookup, populate the cache fully so - that we can do an efficient lookup next time. - - Note that since we have two caches, one for membership events and one for - other events, we can be a bit more clever than simply returning - `StateFilter.all()` if `has_wildcards()` is True. - - We return a StateFilter where: - 1. the list of membership events to return is the same - 2. if there is a wildcard that matches non-member events we - return all non-member events - - Returns: - The new state filter. - """ - - if self.is_full(): - # If we're going to return everything then there's nothing to do - return self - - if not self.has_wildcards(): - # If there are no wild cards, there's nothing to do - return self - - if EventTypes.Member in self.types: - get_all_members = self.types[EventTypes.Member] is None - else: - get_all_members = self.include_others - - has_non_member_wildcard = self.include_others or any( - state_keys is None - for t, state_keys in self.types.items() - if t != EventTypes.Member - ) - - if not has_non_member_wildcard: - # If there are no non-member wild cards we can just return ourselves - return self - - if get_all_members: - # We want to return everything. - return StateFilter.all() - elif EventTypes.Member in self.types: - # We want to return all non-members, but only particular - # memberships - return StateFilter( - types=frozendict({EventTypes.Member: self.types[EventTypes.Member]}), - include_others=True, - ) - else: - # We want to return all non-members - return _ALL_NON_MEMBER_STATE_FILTER - - def make_sql_filter_clause(self) -> Tuple[str, List[str]]: - """Converts the filter to an SQL clause. - - For example: - - f = StateFilter.from_types([("m.room.create", "")]) - clause, args = f.make_sql_filter_clause() - clause == "(type = ? AND state_key = ?)" - args == ['m.room.create', ''] - - - Returns: - The SQL string (may be empty) and arguments. An empty SQL string is - returned when the filter matches everything (i.e. is "full"). - """ - - where_clause = "" - where_args: List[str] = [] - - if self.is_full(): - return where_clause, where_args - - if not self.include_others and not self.types: - # i.e. this is an empty filter, so we need to return a clause that - # will match nothing - return "1 = 2", [] - - # First we build up a lost of clauses for each type/state_key combo - clauses = [] - for etype, state_keys in self.types.items(): - if state_keys is None: - clauses.append("(type = ?)") - where_args.append(etype) - continue - - for state_key in state_keys: - clauses.append("(type = ? AND state_key = ?)") - where_args.extend((etype, state_key)) - - # This will match anything that appears in `self.types` - where_clause = " OR ".join(clauses) - - # If we want to include stuff that's not in the types dict then we add - # a `OR type NOT IN (...)` clause to the end. - if self.include_others: - if where_clause: - where_clause += " OR " - - where_clause += "type NOT IN (%s)" % (",".join(["?"] * len(self.types)),) - where_args.extend(self.types) - - return where_clause, where_args - - def max_entries_returned(self) -> Optional[int]: - """Returns the maximum number of entries this filter will return if - known, otherwise returns None. - - For example a simple state filter asking for `("m.room.create", "")` - will return 1, whereas the default state filter will return None. - - This is used to bail out early if the right number of entries have been - fetched. - """ - if self.has_wildcards(): - return None - - return len(self.concrete_types()) - - def filter_state(self, state_dict: StateMap[T]) -> MutableStateMap[T]: - """Returns the state filtered with by this StateFilter. - - Args: - state: The state map to filter - - Returns: - The filtered state map. - This is a copy, so it's safe to mutate. - """ - if self.is_full(): - return dict(state_dict) - - filtered_state = {} - for k, v in state_dict.items(): - typ, state_key = k - if typ in self.types: - state_keys = self.types[typ] - if state_keys is None or state_key in state_keys: - filtered_state[k] = v - elif self.include_others: - filtered_state[k] = v - - return filtered_state - - def is_full(self) -> bool: - """Whether this filter fetches everything or not - - Returns: - True if the filter fetches everything. - """ - return self.include_others and not self.types - - def has_wildcards(self) -> bool: - """Whether the filter includes wildcards or is attempting to fetch - specific state. - - Returns: - True if the filter includes wildcards. - """ - - return self.include_others or any( - state_keys is None for state_keys in self.types.values() - ) - - def concrete_types(self) -> List[Tuple[str, str]]: - """Returns a list of concrete type/state_keys (i.e. not None) that - will be fetched. This will be a complete list if `has_wildcards` - returns False, but otherwise will be a subset (or even empty). - - Returns: - A list of type/state_keys tuples. - """ - return [ - (t, s) - for t, state_keys in self.types.items() - if state_keys is not None - for s in state_keys - ] - - def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]: - """Return the filter split into two: one which assumes it's exclusively - matching against member state, and one which assumes it's matching - against non member state. - - This is useful due to the returned filters giving correct results for - `is_full()`, `has_wildcards()`, etc, when operating against maps that - either exclusively contain member events or only contain non-member - events. (Which is the case when dealing with the member vs non-member - state caches). - - Returns: - The member and non member filters - """ - - if EventTypes.Member in self.types: - state_keys = self.types[EventTypes.Member] - if state_keys is None: - member_filter = StateFilter.all() - else: - member_filter = StateFilter(frozendict({EventTypes.Member: state_keys})) - elif self.include_others: - member_filter = StateFilter.all() - else: - member_filter = StateFilter.none() - - non_member_filter = StateFilter( - types=frozendict( - {k: v for k, v in self.types.items() if k != EventTypes.Member} - ), - include_others=self.include_others, - ) - - return member_filter, non_member_filter - - def _decompose_into_four_parts( - self, - ) -> Tuple[Tuple[bool, Set[str]], Tuple[Set[str], Set[StateKey]]]: - """ - Decomposes this state filter into 4 constituent parts, which can be - thought of as this: - all? - minus_wildcards + plus_wildcards + plus_state_keys - - where - * all represents ALL state - * minus_wildcards represents entire state types to remove - * plus_wildcards represents entire state types to add - * plus_state_keys represents individual state keys to add - - See `recompose_from_four_parts` for the other direction of this - correspondence. - """ - is_all = self.include_others - excluded_types: Set[str] = {t for t in self.types if is_all} - wildcard_types: Set[str] = {t for t, s in self.types.items() if s is None} - concrete_keys: Set[StateKey] = set(self.concrete_types()) - - return (is_all, excluded_types), (wildcard_types, concrete_keys) - - @staticmethod - def _recompose_from_four_parts( - all_part: bool, - minus_wildcards: Set[str], - plus_wildcards: Set[str], - plus_state_keys: Set[StateKey], - ) -> "StateFilter": - """ - Recomposes a state filter from 4 parts. - - See `decompose_into_four_parts` (the other direction of this - correspondence) for descriptions on each of the parts. - """ - - # {state type -> set of state keys OR None for wildcard} - # (The same structure as that of a StateFilter.) - new_types: Dict[str, Optional[Set[str]]] = {} - - # if we start with all, insert the excluded statetypes as empty sets - # to prevent them from being included - if all_part: - new_types.update({state_type: set() for state_type in minus_wildcards}) - - # insert the plus wildcards - new_types.update({state_type: None for state_type in plus_wildcards}) - - # insert the specific state keys - for state_type, state_key in plus_state_keys: - if state_type in new_types: - entry = new_types[state_type] - if entry is not None: - entry.add(state_key) - elif not all_part: - # don't insert if the entire type is already included by - # include_others as this would actually shrink the state allowed - # by this filter. - new_types[state_type] = {state_key} - - return StateFilter.freeze(new_types, include_others=all_part) - - def approx_difference(self, other: "StateFilter") -> "StateFilter": - """ - Returns a state filter which represents `self - other`. - - This is useful for determining what state remains to be pulled out of the - database if we want the state included by `self` but already have the state - included by `other`. - - The returned state filter - - MUST include all state events that are included by this filter (`self`) - unless they are included by `other`; - - MUST NOT include state events not included by this filter (`self`); and - - MAY be an over-approximation: the returned state filter - MAY additionally include some state events from `other`. - - This implementation attempts to return the narrowest such state filter. - In the case that `self` contains wildcards for state types where - `other` contains specific state keys, an approximation must be made: - the returned state filter keeps the wildcard, as state filters are not - able to express 'all state keys except some given examples'. - e.g. - StateFilter(m.room.member -> None (wildcard)) - minus - StateFilter(m.room.member -> {'@wombat:example.org'}) - is approximated as - StateFilter(m.room.member -> None (wildcard)) - """ - - # We first transform self and other into an alternative representation: - # - whether or not they include all events to begin with ('all') - # - if so, which event types are excluded? ('excludes') - # - which entire event types to include ('wildcards') - # - which concrete state keys to include ('concrete state keys') - (self_all, self_excludes), ( - self_wildcards, - self_concrete_keys, - ) = self._decompose_into_four_parts() - (other_all, other_excludes), ( - other_wildcards, - other_concrete_keys, - ) = other._decompose_into_four_parts() - - # Start with an estimate of the difference based on self - new_all = self_all - # Wildcards from the other can be added to the exclusion filter - new_excludes = self_excludes | other_wildcards - # We remove wildcards that appeared as wildcards in the other - new_wildcards = self_wildcards - other_wildcards - # We filter out the concrete state keys that appear in the other - # as wildcards or concrete state keys. - new_concrete_keys = { - (state_type, state_key) - for (state_type, state_key) in self_concrete_keys - if state_type not in other_wildcards - } - other_concrete_keys - - if other_all: - if self_all: - # If self starts with all, then we add as wildcards any - # types which appear in the other's exclusion filter (but - # aren't in the self exclusion filter). This is as the other - # filter will return everything BUT the types in its exclusion, so - # we need to add those excluded types that also match the self - # filter as wildcard types in the new filter. - new_wildcards |= other_excludes.difference(self_excludes) - - # If other is an `include_others` then the difference isn't. - new_all = False - # (We have no need for excludes when we don't start with all, as there - # is nothing to exclude.) - new_excludes = set() - - # We also filter out all state types that aren't in the exclusion - # list of the other. - new_wildcards &= other_excludes - new_concrete_keys = { - (state_type, state_key) - for (state_type, state_key) in new_concrete_keys - if state_type in other_excludes - } - - # Transform our newly-constructed state filter from the alternative - # representation back into the normal StateFilter representation. - return StateFilter._recompose_from_four_parts( - new_all, new_excludes, new_wildcards, new_concrete_keys - ) - - def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool: - """Check if we need to wait for full state to complete to calculate this state - - If we have a state filter which is completely satisfied even with partial - state, then we don't need to await_full_state before we can return it. - - Args: - is_mine_id: a callable which confirms if a given state_key matches a mxid - of a local user - """ - # if we haven't requested membership events, then it depends on the value of - # 'include_others' - if EventTypes.Member not in self.types: - return self.include_others - - # if we're looking for *all* membership events, then we have to wait - member_state_keys = self.types[EventTypes.Member] - if member_state_keys is None: - return True - - # otherwise, consider whose membership we are looking for. If it's entirely - # local users, then we don't need to wait. - for state_key in member_state_keys: - if not is_mine_id(state_key): - # remote user - return True - - # local users only - return False - - -_ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True) -_ALL_NON_MEMBER_STATE_FILTER = StateFilter( - types=frozendict({EventTypes.Member: frozenset()}), include_others=True -) -_NONE_STATE_FILTER = StateFilter(types=frozendict(), include_others=False) diff --git a/synapse/types.py b/synapse/types.py deleted file mode 100644 index f2d436ddc3..0000000000 --- a/synapse/types.py +++ /dev/null @@ -1,928 +0,0 @@ -# Copyright 2014-2016 OpenMarket Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import abc -import re -import string -from typing import ( - TYPE_CHECKING, - Any, - ClassVar, - Dict, - List, - Mapping, - Match, - MutableMapping, - NoReturn, - Optional, - Set, - Tuple, - Type, - TypeVar, - Union, -) - -import attr -from frozendict import frozendict -from signedjson.key import decode_verify_key_bytes -from signedjson.types import VerifyKey -from typing_extensions import Final, TypedDict -from unpaddedbase64 import decode_base64 -from zope.interface import Interface - -from twisted.internet.defer import CancelledError -from twisted.internet.interfaces import ( - IReactorCore, - IReactorPluggableNameResolver, - IReactorSSL, - IReactorTCP, - IReactorThreads, - IReactorTime, -) - -from synapse.api.errors import Codes, SynapseError -from synapse.util.cancellation import cancellable -from synapse.util.stringutils import parse_and_validate_server_name - -if TYPE_CHECKING: - from synapse.appservice.api import ApplicationService - from synapse.storage.databases.main import DataStore, PurgeEventsStore - from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore - -# Define a state map type from type/state_key to T (usually an event ID or -# event) -T = TypeVar("T") -StateKey = Tuple[str, str] -StateMap = Mapping[StateKey, T] -MutableStateMap = MutableMapping[StateKey, T] - -# JSON types. These could be made stronger, but will do for now. -# A JSON-serialisable dict. -JsonDict = Dict[str, Any] -# A JSON-serialisable mapping; roughly speaking an immutable JSONDict. -# Useful when you have a TypedDict which isn't going to be mutated and you don't want -# to cast to JsonDict everywhere. -JsonMapping = Mapping[str, Any] -# A JSON-serialisable object. -JsonSerializable = object - - -# Note that this seems to require inheriting *directly* from Interface in order -# for mypy-zope to realize it is an interface. -class ISynapseReactor( - IReactorTCP, - IReactorSSL, - IReactorPluggableNameResolver, - IReactorTime, - IReactorCore, - IReactorThreads, - Interface, -): - """The interfaces necessary for Synapse to function.""" - - -@attr.s(frozen=True, slots=True, auto_attribs=True) -class Requester: - """ - Represents the user making a request - - Attributes: - user: id of the user making the request - access_token_id: *ID* of the access token used for this - request, or None if it came via the appservice API or similar - is_guest: True if the user making this request is a guest user - shadow_banned: True if the user making this request has been shadow-banned. - device_id: device_id which was set at authentication time - app_service: the AS requesting on behalf of the user - authenticated_entity: The entity that authenticated when making the request. - This is different to the user_id when an admin user or the server is - "puppeting" the user. - """ - - user: "UserID" - access_token_id: Optional[int] - is_guest: bool - shadow_banned: bool - device_id: Optional[str] - app_service: Optional["ApplicationService"] - authenticated_entity: str - - def serialize(self) -> Dict[str, Any]: - """Converts self to a type that can be serialized as JSON, and then - deserialized by `deserialize` - - Returns: - dict - """ - return { - "user_id": self.user.to_string(), - "access_token_id": self.access_token_id, - "is_guest": self.is_guest, - "shadow_banned": self.shadow_banned, - "device_id": self.device_id, - "app_server_id": self.app_service.id if self.app_service else None, - "authenticated_entity": self.authenticated_entity, - } - - @staticmethod - def deserialize( - store: "ApplicationServiceWorkerStore", input: Dict[str, Any] - ) -> "Requester": - """Converts a dict that was produced by `serialize` back into a - Requester. - - Args: - store: Used to convert AS ID to AS object - input: A dict produced by `serialize` - - Returns: - Requester - """ - appservice = None - if input["app_server_id"]: - appservice = store.get_app_service_by_id(input["app_server_id"]) - - return Requester( - user=UserID.from_string(input["user_id"]), - access_token_id=input["access_token_id"], - is_guest=input["is_guest"], - shadow_banned=input["shadow_banned"], - device_id=input["device_id"], - app_service=appservice, - authenticated_entity=input["authenticated_entity"], - ) - - -def create_requester( - user_id: Union[str, "UserID"], - access_token_id: Optional[int] = None, - is_guest: bool = False, - shadow_banned: bool = False, - device_id: Optional[str] = None, - app_service: Optional["ApplicationService"] = None, - authenticated_entity: Optional[str] = None, -) -> Requester: - """ - Create a new ``Requester`` object - - Args: - user_id: id of the user making the request - access_token_id: *ID* of the access token used for this - request, or None if it came via the appservice API or similar - is_guest: True if the user making this request is a guest user - shadow_banned: True if the user making this request is shadow-banned. - device_id: device_id which was set at authentication time - app_service: the AS requesting on behalf of the user - authenticated_entity: The entity that authenticated when making the request. - This is different to the user_id when an admin user or the server is - "puppeting" the user. - - Returns: - Requester - """ - if not isinstance(user_id, UserID): - user_id = UserID.from_string(user_id) - - if authenticated_entity is None: - authenticated_entity = user_id.to_string() - - return Requester( - user_id, - access_token_id, - is_guest, - shadow_banned, - device_id, - app_service, - authenticated_entity, - ) - - -def get_domain_from_id(string: str) -> str: - idx = string.find(":") - if idx == -1: - raise SynapseError(400, "Invalid ID: %r" % (string,)) - return string[idx + 1 :] - - -def get_localpart_from_id(string: str) -> str: - idx = string.find(":") - if idx == -1: - raise SynapseError(400, "Invalid ID: %r" % (string,)) - return string[1:idx] - - -DS = TypeVar("DS", bound="DomainSpecificString") - - -@attr.s(slots=True, frozen=True, repr=False, auto_attribs=True) -class DomainSpecificString(metaclass=abc.ABCMeta): - """Common base class among ID/name strings that have a local part and a - domain name, prefixed with a sigil. - - Has the fields: - - 'localpart' : The local part of the name (without the leading sigil) - 'domain' : The domain part of the name - """ - - SIGIL: ClassVar[str] = abc.abstractproperty() # type: ignore - - localpart: str - domain: str - - # Because this is a frozen class, it is deeply immutable. - def __copy__(self: DS) -> DS: - return self - - def __deepcopy__(self: DS, memo: Dict[str, object]) -> DS: - return self - - @classmethod - def from_string(cls: Type[DS], s: str) -> DS: - """Parse the string given by 's' into a structure object.""" - if len(s) < 1 or s[0:1] != cls.SIGIL: - raise SynapseError( - 400, - "Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL), - Codes.INVALID_PARAM, - ) - - parts = s[1:].split(":", 1) - if len(parts) != 2: - raise SynapseError( - 400, - "Expected %s of the form '%slocalname:domain'" - % (cls.__name__, cls.SIGIL), - Codes.INVALID_PARAM, - ) - - domain = parts[1] - # This code will need changing if we want to support multiple domain - # names on one HS - return cls(localpart=parts[0], domain=domain) - - def to_string(self) -> str: - """Return a string encoding the fields of the structure object.""" - return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain) - - @classmethod - def is_valid(cls: Type[DS], s: str) -> bool: - """Parses the input string and attempts to ensure it is valid.""" - # TODO: this does not reject an empty localpart or an overly-long string. - # See https://spec.matrix.org/v1.2/appendices/#identifier-grammar - try: - obj = cls.from_string(s) - # Apply additional validation to the domain. This is only done - # during is_valid (and not part of from_string) since it is - # possible for invalid data to exist in room-state, etc. - parse_and_validate_server_name(obj.domain) - return True - except Exception: - return False - - __repr__ = to_string - - -@attr.s(slots=True, frozen=True, repr=False) -class UserID(DomainSpecificString): - """Structure representing a user ID.""" - - SIGIL = "@" - - -@attr.s(slots=True, frozen=True, repr=False) -class RoomAlias(DomainSpecificString): - """Structure representing a room name.""" - - SIGIL = "#" - - -@attr.s(slots=True, frozen=True, repr=False) -class RoomID(DomainSpecificString): - """Structure representing a room id.""" - - SIGIL = "!" - - -@attr.s(slots=True, frozen=True, repr=False) -class EventID(DomainSpecificString): - """Structure representing an event id.""" - - SIGIL = "$" - - -mxid_localpart_allowed_characters = set( - "_-./=" + string.ascii_lowercase + string.digits -) - - -def contains_invalid_mxid_characters(localpart: str) -> bool: - """Check for characters not allowed in an mxid or groupid localpart - - Args: - localpart: the localpart to be checked - - Returns: - True if there are any naughty characters - """ - return any(c not in mxid_localpart_allowed_characters for c in localpart) - - -UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") - -# the following is a pattern which matches '=', and bytes which are not allowed in a mxid -# localpart. -# -# It works by: -# * building a string containing the allowed characters (excluding '=') -# * escaping every special character with a backslash (to stop '-' being interpreted as a -# range operator) -# * wrapping it in a '[^...]' regex -# * converting the whole lot to a 'bytes' sequence, so that we can use it to match -# bytes rather than strings -# -NON_MXID_CHARACTER_PATTERN = re.compile( - ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode( - "ascii" - ) -) - - -def map_username_to_mxid_localpart( - username: Union[str, bytes], case_sensitive: bool = False -) -> str: - """Map a username onto a string suitable for a MXID - - This follows the algorithm laid out at - https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets. - - Args: - username: username to be mapped - case_sensitive: true if TEST and test should be mapped - onto different mxids - - Returns: - string suitable for a mxid localpart - """ - if not isinstance(username, bytes): - username = username.encode("utf-8") - - # first we sort out upper-case characters - if case_sensitive: - - def f1(m: Match[bytes]) -> bytes: - return b"_" + m.group().lower() - - username = UPPER_CASE_PATTERN.sub(f1, username) - else: - username = username.lower() - - # then we sort out non-ascii characters by converting to the hex equivalent. - def f2(m: Match[bytes]) -> bytes: - return b"=%02x" % (m.group()[0],) - - username = NON_MXID_CHARACTER_PATTERN.sub(f2, username) - - # we also do the =-escaping to mxids starting with an underscore. - username = re.sub(b"^_", b"=5f", username) - - # we should now only have ascii bytes left, so can decode back to a string. - return username.decode("ascii") - - -@attr.s(frozen=True, slots=True, order=False) -class RoomStreamToken: - """Tokens are positions between events. The token "s1" comes after event 1. - - s0 s1 - | | - [0] ▼ [1] ▼ [2] - - Tokens can either be a point in the live event stream or a cursor going - through historic events. - - When traversing the live event stream, events are ordered by - `stream_ordering` (when they arrived at the homeserver). - - When traversing historic events, events are first ordered by their `depth` - (`topological_ordering` in the event graph) and tie-broken by - `stream_ordering` (when the event arrived at the homeserver). - - If you're looking for more info about what a token with all of the - underscores means, ex. - `s2633508_17_338_6732159_1082514_541479_274711_265584_1`, see the docstring - for `StreamToken` below. - - --- - - Live tokens start with an "s" followed by the `stream_ordering` of the event - that comes before the position of the token. Said another way: - `stream_ordering` uniquely identifies a persisted event. The live token - means "the position just after the event identified by `stream_ordering`". - An example token is: - - s2633508 - - --- - - Historic tokens start with a "t" followed by the `depth` - (`topological_ordering` in the event graph) of the event that comes before - the position of the token, followed by "-", followed by the - `stream_ordering` of the event that comes before the position of the token. - An example token is: - - t426-2633508 - - --- - - There is also a third mode for live tokens where the token starts with "m", - which is sometimes used when using sharded event persisters. In this case - the events stream is considered to be a set of streams (one for each writer) - and the token encodes the vector clock of positions of each writer in their - respective streams. - - The format of the token in such case is an initial integer min position, - followed by the mapping of instance ID to position separated by '.' and '~': - - m{min_pos}~{writer1}.{pos1}~{writer2}.{pos2}. ... - - The `min_pos` corresponds to the minimum position all writers have persisted - up to, and then only writers that are ahead of that position need to be - encoded. An example token is: - - m56~2.58~3.59 - - Which corresponds to a set of three (or more writers) where instances 2 and - 3 (these are instance IDs that can be looked up in the DB to fetch the more - commonly used instance names) are at positions 58 and 59 respectively, and - all other instances are at position 56. - - Note: The `RoomStreamToken` cannot have both a topological part and an - instance map. - - --- - - For caching purposes, `RoomStreamToken`s and by extension, all their - attributes, must be hashable. - """ - - topological: Optional[int] = attr.ib( - validator=attr.validators.optional(attr.validators.instance_of(int)), - ) - stream: int = attr.ib(validator=attr.validators.instance_of(int)) - - instance_map: "frozendict[str, int]" = attr.ib( - factory=frozendict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(int), - mapping_validator=attr.validators.instance_of(frozendict), - ), - ) - - def __attrs_post_init__(self) -> None: - """Validates that both `topological` and `instance_map` aren't set.""" - - if self.instance_map and self.topological: - raise ValueError( - "Cannot set both 'topological' and 'instance_map' on 'RoomStreamToken'." - ) - - @classmethod - async def parse(cls, store: "PurgeEventsStore", string: str) -> "RoomStreamToken": - try: - if string[0] == "s": - return cls(topological=None, stream=int(string[1:])) - if string[0] == "t": - parts = string[1:].split("-", 1) - return cls(topological=int(parts[0]), stream=int(parts[1])) - if string[0] == "m": - parts = string[1:].split("~") - stream = int(parts[0]) - - instance_map = {} - for part in parts[1:]: - key, value = part.split(".") - instance_id = int(key) - pos = int(value) - - instance_name = await store.get_name_from_instance_id(instance_id) # type: ignore[attr-defined] - instance_map[instance_name] = pos - - return cls( - topological=None, - stream=stream, - instance_map=frozendict(instance_map), - ) - except CancelledError: - raise - except Exception: - pass - raise SynapseError(400, "Invalid room stream token %r" % (string,)) - - @classmethod - def parse_stream_token(cls, string: str) -> "RoomStreamToken": - try: - if string[0] == "s": - return cls(topological=None, stream=int(string[1:])) - except Exception: - pass - raise SynapseError(400, "Invalid room stream token %r" % (string,)) - - def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken": - """Return a new token such that if an event is after both this token and - the other token, then its after the returned token too. - """ - - if self.topological or other.topological: - raise Exception("Can't advance topological tokens") - - max_stream = max(self.stream, other.stream) - - instance_map = { - instance: max( - self.instance_map.get(instance, self.stream), - other.instance_map.get(instance, other.stream), - ) - for instance in set(self.instance_map).union(other.instance_map) - } - - return RoomStreamToken(None, max_stream, frozendict(instance_map)) - - def as_historical_tuple(self) -> Tuple[int, int]: - """Returns a tuple of `(topological, stream)` for historical tokens. - - Raises if not an historical token (i.e. doesn't have a topological part). - """ - if self.topological is None: - raise Exception( - "Cannot call `RoomStreamToken.as_historical_tuple` on live token" - ) - - return self.topological, self.stream - - def get_stream_pos_for_instance(self, instance_name: str) -> int: - """Get the stream position that the given writer was at at this token. - - This only makes sense for "live" tokens that may have a vector clock - component, and so asserts that this is a "live" token. - """ - assert self.topological is None - - # If we don't have an entry for the instance we can assume that it was - # at `self.stream`. - return self.instance_map.get(instance_name, self.stream) - - def get_max_stream_pos(self) -> int: - """Get the maximum stream position referenced in this token. - - The corresponding "min" position is, by definition just `self.stream`. - - This is used to handle tokens that have non-empty `instance_map`, and so - reference stream positions after the `self.stream` position. - """ - return max(self.instance_map.values(), default=self.stream) - - async def to_string(self, store: "DataStore") -> str: - if self.topological is not None: - return "t%d-%d" % (self.topological, self.stream) - elif self.instance_map: - entries = [] - for name, pos in self.instance_map.items(): - instance_id = await store.get_id_for_instance(name) - entries.append(f"{instance_id}.{pos}") - - encoded_map = "~".join(entries) - return f"m{self.stream}~{encoded_map}" - else: - return "s%d" % (self.stream,) - - -class StreamKeyType: - """Known stream types. - - A stream is a list of entities ordered by an incrementing "stream token". - """ - - ROOM: Final = "room_key" - PRESENCE: Final = "presence_key" - TYPING: Final = "typing_key" - RECEIPT: Final = "receipt_key" - ACCOUNT_DATA: Final = "account_data_key" - PUSH_RULES: Final = "push_rules_key" - TO_DEVICE: Final = "to_device_key" - DEVICE_LIST: Final = "device_list_key" - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class StreamToken: - """A collection of keys joined together by underscores in the following - order and which represent the position in their respective streams. - - ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1` - 1. `room_key`: `s2633508` which is a `RoomStreamToken` - - `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59` - - See the docstring for `RoomStreamToken` for more details. - 2. `presence_key`: `17` - 3. `typing_key`: `338` - 4. `receipt_key`: `6732159` - 5. `account_data_key`: `1082514` - 6. `push_rules_key`: `541479` - 7. `to_device_key`: `274711` - 8. `device_list_key`: `265584` - 9. `groups_key`: `1` (note that this key is now unused) - - You can see how many of these keys correspond to the various - fields in a "/sync" response: - ```json - { - "next_batch": "s12_4_0_1_1_1_1_4_1", - "presence": { - "events": [] - }, - "device_lists": { - "changed": [] - }, - "rooms": { - "join": { - "!QrZlfIDQLNLdZHqTnt:hs1": { - "timeline": { - "events": [], - "prev_batch": "s10_4_0_1_1_1_1_4_1", - "limited": false - }, - "state": { - "events": [] - }, - "account_data": { - "events": [] - }, - "ephemeral": { - "events": [] - } - } - } - } - } - ``` - - --- - - For caching purposes, `StreamToken`s and by extension, all their attributes, - must be hashable. - """ - - room_key: RoomStreamToken = attr.ib( - validator=attr.validators.instance_of(RoomStreamToken) - ) - presence_key: int - typing_key: int - receipt_key: int - account_data_key: int - push_rules_key: int - to_device_key: int - device_list_key: int - # Note that the groups key is no longer used and may have bogus values. - groups_key: int - - _SEPARATOR = "_" - START: ClassVar["StreamToken"] - - @classmethod - @cancellable - async def from_string(cls, store: "DataStore", string: str) -> "StreamToken": - """ - Creates a RoomStreamToken from its textual representation. - """ - try: - keys = string.split(cls._SEPARATOR) - while len(keys) < len(attr.fields(cls)): - # i.e. old token from before receipt_key - keys.append("0") - return cls( - await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:]) - ) - except CancelledError: - raise - except Exception: - raise SynapseError(400, "Invalid stream token") - - async def to_string(self, store: "DataStore") -> str: - return self._SEPARATOR.join( - [ - await self.room_key.to_string(store), - str(self.presence_key), - str(self.typing_key), - str(self.receipt_key), - str(self.account_data_key), - str(self.push_rules_key), - str(self.to_device_key), - str(self.device_list_key), - # Note that the groups key is no longer used, but it is still - # serialized so that there will not be confusion in the future - # if additional tokens are added. - str(self.groups_key), - ] - ) - - @property - def room_stream_id(self) -> int: - return self.room_key.stream - - def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken": - """Advance the given key in the token to a new value if and only if the - new value is after the old value. - - :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken. - """ - if key == StreamKeyType.ROOM: - new_token = self.copy_and_replace( - StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value) - ) - return new_token - - new_token = self.copy_and_replace(key, new_value) - new_id = int(getattr(new_token, key)) - old_id = int(getattr(self, key)) - - if old_id < new_id: - return new_token - else: - return self - - def copy_and_replace(self, key: str, new_value: Any) -> "StreamToken": - return attr.evolve(self, **{key: new_value}) - - -StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0) - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class PersistedEventPosition: - """Position of a newly persisted event with instance that persisted it. - - This can be used to test whether the event is persisted before or after a - RoomStreamToken. - """ - - instance_name: str - stream: int - - def persisted_after(self, token: RoomStreamToken) -> bool: - return token.get_stream_pos_for_instance(self.instance_name) < self.stream - - def to_room_stream_token(self) -> RoomStreamToken: - """Converts the position to a room stream token such that events - persisted in the same room after this position will be after the - returned `RoomStreamToken`. - - Note: no guarantees are made about ordering w.r.t. events in other - rooms. - """ - # Doing the naive thing satisfies the desired properties described in - # the docstring. - return RoomStreamToken(None, self.stream) - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class ThirdPartyInstanceID: - appservice_id: Optional[str] - network_id: Optional[str] - - # Deny iteration because it will bite you if you try to create a singleton - # set by: - # users = set(user) - def __iter__(self) -> NoReturn: - raise ValueError("Attempted to iterate a %s" % (type(self).__name__,)) - - # Because this class is a frozen class, it is deeply immutable. - def __copy__(self) -> "ThirdPartyInstanceID": - return self - - def __deepcopy__(self, memo: Dict[str, object]) -> "ThirdPartyInstanceID": - return self - - @classmethod - def from_string(cls, s: str) -> "ThirdPartyInstanceID": - bits = s.split("|", 2) - if len(bits) != 2: - raise SynapseError(400, "Invalid ID %r" % (s,)) - - return cls(appservice_id=bits[0], network_id=bits[1]) - - def to_string(self) -> str: - return "%s|%s" % (self.appservice_id, self.network_id) - - __str__ = to_string - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class ReadReceipt: - """Information about a read-receipt""" - - room_id: str - receipt_type: str - user_id: str - event_ids: List[str] - thread_id: Optional[str] - data: JsonDict - - -@attr.s(slots=True, frozen=True, auto_attribs=True) -class DeviceListUpdates: - """ - An object containing a diff of information regarding other users' device lists, intended for - a recipient to carry out device list tracking. - - Attributes: - changed: A set of users whose device lists have changed recently. - left: A set of users who the recipient no longer needs to track the device lists of. - Typically when those users no longer share any end-to-end encryption enabled rooms. - """ - - # We need to use a factory here, otherwise `set` is not evaluated at - # object instantiation, but instead at class definition instantiation. - # The latter happening only once, thus always giving you the same sets - # across multiple DeviceListUpdates instances. - # Also see: don't define mutable default arguments. - changed: Set[str] = attr.ib(factory=set) - left: Set[str] = attr.ib(factory=set) - - def __bool__(self) -> bool: - return bool(self.changed or self.left) - - -def get_verify_key_from_cross_signing_key( - key_info: Mapping[str, Any] -) -> Tuple[str, VerifyKey]: - """Get the key ID and signedjson verify key from a cross-signing key dict - - Args: - key_info: a cross-signing key dict, which must have a "keys" - property that has exactly one item in it - - Returns: - the key ID and verify key for the cross-signing key - """ - # make sure that a `keys` field is provided - if "keys" not in key_info: - raise ValueError("Invalid key") - keys = key_info["keys"] - # and that it contains exactly one key - if len(keys) == 1: - key_id, key_data = next(iter(keys.items())) - return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data)) - else: - raise ValueError("Invalid key") - - -@attr.s(auto_attribs=True, frozen=True, slots=True) -class UserInfo: - """Holds information about a user. Result of get_userinfo_by_id. - - Attributes: - user_id: ID of the user. - appservice_id: Application service ID that created this user. - consent_server_notice_sent: Version of policy documents the user has been sent. - consent_version: Version of policy documents the user has consented to. - creation_ts: Creation timestamp of the user. - is_admin: True if the user is an admin. - is_deactivated: True if the user has been deactivated. - is_guest: True if the user is a guest user. - is_shadow_banned: True if the user has been shadow-banned. - user_type: User type (None for normal user, 'support' and 'bot' other options). - """ - - user_id: UserID - appservice_id: Optional[int] - consent_server_notice_sent: Optional[str] - consent_version: Optional[str] - user_type: Optional[str] - creation_ts: int - is_admin: bool - is_deactivated: bool - is_guest: bool - is_shadow_banned: bool - - -class UserProfile(TypedDict): - user_id: str - display_name: Optional[str] - avatar_url: Optional[str] - - -@attr.s(auto_attribs=True, frozen=True, slots=True) -class RetentionPolicy: - min_lifetime: Optional[int] = None - max_lifetime: Optional[int] = None diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py new file mode 100644 index 0000000000..f2d436ddc3 --- /dev/null +++ b/synapse/types/__init__.py @@ -0,0 +1,928 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2019 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import abc +import re +import string +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + List, + Mapping, + Match, + MutableMapping, + NoReturn, + Optional, + Set, + Tuple, + Type, + TypeVar, + Union, +) + +import attr +from frozendict import frozendict +from signedjson.key import decode_verify_key_bytes +from signedjson.types import VerifyKey +from typing_extensions import Final, TypedDict +from unpaddedbase64 import decode_base64 +from zope.interface import Interface + +from twisted.internet.defer import CancelledError +from twisted.internet.interfaces import ( + IReactorCore, + IReactorPluggableNameResolver, + IReactorSSL, + IReactorTCP, + IReactorThreads, + IReactorTime, +) + +from synapse.api.errors import Codes, SynapseError +from synapse.util.cancellation import cancellable +from synapse.util.stringutils import parse_and_validate_server_name + +if TYPE_CHECKING: + from synapse.appservice.api import ApplicationService + from synapse.storage.databases.main import DataStore, PurgeEventsStore + from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore + +# Define a state map type from type/state_key to T (usually an event ID or +# event) +T = TypeVar("T") +StateKey = Tuple[str, str] +StateMap = Mapping[StateKey, T] +MutableStateMap = MutableMapping[StateKey, T] + +# JSON types. These could be made stronger, but will do for now. +# A JSON-serialisable dict. +JsonDict = Dict[str, Any] +# A JSON-serialisable mapping; roughly speaking an immutable JSONDict. +# Useful when you have a TypedDict which isn't going to be mutated and you don't want +# to cast to JsonDict everywhere. +JsonMapping = Mapping[str, Any] +# A JSON-serialisable object. +JsonSerializable = object + + +# Note that this seems to require inheriting *directly* from Interface in order +# for mypy-zope to realize it is an interface. +class ISynapseReactor( + IReactorTCP, + IReactorSSL, + IReactorPluggableNameResolver, + IReactorTime, + IReactorCore, + IReactorThreads, + Interface, +): + """The interfaces necessary for Synapse to function.""" + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class Requester: + """ + Represents the user making a request + + Attributes: + user: id of the user making the request + access_token_id: *ID* of the access token used for this + request, or None if it came via the appservice API or similar + is_guest: True if the user making this request is a guest user + shadow_banned: True if the user making this request has been shadow-banned. + device_id: device_id which was set at authentication time + app_service: the AS requesting on behalf of the user + authenticated_entity: The entity that authenticated when making the request. + This is different to the user_id when an admin user or the server is + "puppeting" the user. + """ + + user: "UserID" + access_token_id: Optional[int] + is_guest: bool + shadow_banned: bool + device_id: Optional[str] + app_service: Optional["ApplicationService"] + authenticated_entity: str + + def serialize(self) -> Dict[str, Any]: + """Converts self to a type that can be serialized as JSON, and then + deserialized by `deserialize` + + Returns: + dict + """ + return { + "user_id": self.user.to_string(), + "access_token_id": self.access_token_id, + "is_guest": self.is_guest, + "shadow_banned": self.shadow_banned, + "device_id": self.device_id, + "app_server_id": self.app_service.id if self.app_service else None, + "authenticated_entity": self.authenticated_entity, + } + + @staticmethod + def deserialize( + store: "ApplicationServiceWorkerStore", input: Dict[str, Any] + ) -> "Requester": + """Converts a dict that was produced by `serialize` back into a + Requester. + + Args: + store: Used to convert AS ID to AS object + input: A dict produced by `serialize` + + Returns: + Requester + """ + appservice = None + if input["app_server_id"]: + appservice = store.get_app_service_by_id(input["app_server_id"]) + + return Requester( + user=UserID.from_string(input["user_id"]), + access_token_id=input["access_token_id"], + is_guest=input["is_guest"], + shadow_banned=input["shadow_banned"], + device_id=input["device_id"], + app_service=appservice, + authenticated_entity=input["authenticated_entity"], + ) + + +def create_requester( + user_id: Union[str, "UserID"], + access_token_id: Optional[int] = None, + is_guest: bool = False, + shadow_banned: bool = False, + device_id: Optional[str] = None, + app_service: Optional["ApplicationService"] = None, + authenticated_entity: Optional[str] = None, +) -> Requester: + """ + Create a new ``Requester`` object + + Args: + user_id: id of the user making the request + access_token_id: *ID* of the access token used for this + request, or None if it came via the appservice API or similar + is_guest: True if the user making this request is a guest user + shadow_banned: True if the user making this request is shadow-banned. + device_id: device_id which was set at authentication time + app_service: the AS requesting on behalf of the user + authenticated_entity: The entity that authenticated when making the request. + This is different to the user_id when an admin user or the server is + "puppeting" the user. + + Returns: + Requester + """ + if not isinstance(user_id, UserID): + user_id = UserID.from_string(user_id) + + if authenticated_entity is None: + authenticated_entity = user_id.to_string() + + return Requester( + user_id, + access_token_id, + is_guest, + shadow_banned, + device_id, + app_service, + authenticated_entity, + ) + + +def get_domain_from_id(string: str) -> str: + idx = string.find(":") + if idx == -1: + raise SynapseError(400, "Invalid ID: %r" % (string,)) + return string[idx + 1 :] + + +def get_localpart_from_id(string: str) -> str: + idx = string.find(":") + if idx == -1: + raise SynapseError(400, "Invalid ID: %r" % (string,)) + return string[1:idx] + + +DS = TypeVar("DS", bound="DomainSpecificString") + + +@attr.s(slots=True, frozen=True, repr=False, auto_attribs=True) +class DomainSpecificString(metaclass=abc.ABCMeta): + """Common base class among ID/name strings that have a local part and a + domain name, prefixed with a sigil. + + Has the fields: + + 'localpart' : The local part of the name (without the leading sigil) + 'domain' : The domain part of the name + """ + + SIGIL: ClassVar[str] = abc.abstractproperty() # type: ignore + + localpart: str + domain: str + + # Because this is a frozen class, it is deeply immutable. + def __copy__(self: DS) -> DS: + return self + + def __deepcopy__(self: DS, memo: Dict[str, object]) -> DS: + return self + + @classmethod + def from_string(cls: Type[DS], s: str) -> DS: + """Parse the string given by 's' into a structure object.""" + if len(s) < 1 or s[0:1] != cls.SIGIL: + raise SynapseError( + 400, + "Expected %s string to start with '%s'" % (cls.__name__, cls.SIGIL), + Codes.INVALID_PARAM, + ) + + parts = s[1:].split(":", 1) + if len(parts) != 2: + raise SynapseError( + 400, + "Expected %s of the form '%slocalname:domain'" + % (cls.__name__, cls.SIGIL), + Codes.INVALID_PARAM, + ) + + domain = parts[1] + # This code will need changing if we want to support multiple domain + # names on one HS + return cls(localpart=parts[0], domain=domain) + + def to_string(self) -> str: + """Return a string encoding the fields of the structure object.""" + return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain) + + @classmethod + def is_valid(cls: Type[DS], s: str) -> bool: + """Parses the input string and attempts to ensure it is valid.""" + # TODO: this does not reject an empty localpart or an overly-long string. + # See https://spec.matrix.org/v1.2/appendices/#identifier-grammar + try: + obj = cls.from_string(s) + # Apply additional validation to the domain. This is only done + # during is_valid (and not part of from_string) since it is + # possible for invalid data to exist in room-state, etc. + parse_and_validate_server_name(obj.domain) + return True + except Exception: + return False + + __repr__ = to_string + + +@attr.s(slots=True, frozen=True, repr=False) +class UserID(DomainSpecificString): + """Structure representing a user ID.""" + + SIGIL = "@" + + +@attr.s(slots=True, frozen=True, repr=False) +class RoomAlias(DomainSpecificString): + """Structure representing a room name.""" + + SIGIL = "#" + + +@attr.s(slots=True, frozen=True, repr=False) +class RoomID(DomainSpecificString): + """Structure representing a room id.""" + + SIGIL = "!" + + +@attr.s(slots=True, frozen=True, repr=False) +class EventID(DomainSpecificString): + """Structure representing an event id.""" + + SIGIL = "$" + + +mxid_localpart_allowed_characters = set( + "_-./=" + string.ascii_lowercase + string.digits +) + + +def contains_invalid_mxid_characters(localpart: str) -> bool: + """Check for characters not allowed in an mxid or groupid localpart + + Args: + localpart: the localpart to be checked + + Returns: + True if there are any naughty characters + """ + return any(c not in mxid_localpart_allowed_characters for c in localpart) + + +UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") + +# the following is a pattern which matches '=', and bytes which are not allowed in a mxid +# localpart. +# +# It works by: +# * building a string containing the allowed characters (excluding '=') +# * escaping every special character with a backslash (to stop '-' being interpreted as a +# range operator) +# * wrapping it in a '[^...]' regex +# * converting the whole lot to a 'bytes' sequence, so that we can use it to match +# bytes rather than strings +# +NON_MXID_CHARACTER_PATTERN = re.compile( + ("[^%s]" % (re.escape("".join(mxid_localpart_allowed_characters - {"="})),)).encode( + "ascii" + ) +) + + +def map_username_to_mxid_localpart( + username: Union[str, bytes], case_sensitive: bool = False +) -> str: + """Map a username onto a string suitable for a MXID + + This follows the algorithm laid out at + https://matrix.org/docs/spec/appendices.html#mapping-from-other-character-sets. + + Args: + username: username to be mapped + case_sensitive: true if TEST and test should be mapped + onto different mxids + + Returns: + string suitable for a mxid localpart + """ + if not isinstance(username, bytes): + username = username.encode("utf-8") + + # first we sort out upper-case characters + if case_sensitive: + + def f1(m: Match[bytes]) -> bytes: + return b"_" + m.group().lower() + + username = UPPER_CASE_PATTERN.sub(f1, username) + else: + username = username.lower() + + # then we sort out non-ascii characters by converting to the hex equivalent. + def f2(m: Match[bytes]) -> bytes: + return b"=%02x" % (m.group()[0],) + + username = NON_MXID_CHARACTER_PATTERN.sub(f2, username) + + # we also do the =-escaping to mxids starting with an underscore. + username = re.sub(b"^_", b"=5f", username) + + # we should now only have ascii bytes left, so can decode back to a string. + return username.decode("ascii") + + +@attr.s(frozen=True, slots=True, order=False) +class RoomStreamToken: + """Tokens are positions between events. The token "s1" comes after event 1. + + s0 s1 + | | + [0] ▼ [1] ▼ [2] + + Tokens can either be a point in the live event stream or a cursor going + through historic events. + + When traversing the live event stream, events are ordered by + `stream_ordering` (when they arrived at the homeserver). + + When traversing historic events, events are first ordered by their `depth` + (`topological_ordering` in the event graph) and tie-broken by + `stream_ordering` (when the event arrived at the homeserver). + + If you're looking for more info about what a token with all of the + underscores means, ex. + `s2633508_17_338_6732159_1082514_541479_274711_265584_1`, see the docstring + for `StreamToken` below. + + --- + + Live tokens start with an "s" followed by the `stream_ordering` of the event + that comes before the position of the token. Said another way: + `stream_ordering` uniquely identifies a persisted event. The live token + means "the position just after the event identified by `stream_ordering`". + An example token is: + + s2633508 + + --- + + Historic tokens start with a "t" followed by the `depth` + (`topological_ordering` in the event graph) of the event that comes before + the position of the token, followed by "-", followed by the + `stream_ordering` of the event that comes before the position of the token. + An example token is: + + t426-2633508 + + --- + + There is also a third mode for live tokens where the token starts with "m", + which is sometimes used when using sharded event persisters. In this case + the events stream is considered to be a set of streams (one for each writer) + and the token encodes the vector clock of positions of each writer in their + respective streams. + + The format of the token in such case is an initial integer min position, + followed by the mapping of instance ID to position separated by '.' and '~': + + m{min_pos}~{writer1}.{pos1}~{writer2}.{pos2}. ... + + The `min_pos` corresponds to the minimum position all writers have persisted + up to, and then only writers that are ahead of that position need to be + encoded. An example token is: + + m56~2.58~3.59 + + Which corresponds to a set of three (or more writers) where instances 2 and + 3 (these are instance IDs that can be looked up in the DB to fetch the more + commonly used instance names) are at positions 58 and 59 respectively, and + all other instances are at position 56. + + Note: The `RoomStreamToken` cannot have both a topological part and an + instance map. + + --- + + For caching purposes, `RoomStreamToken`s and by extension, all their + attributes, must be hashable. + """ + + topological: Optional[int] = attr.ib( + validator=attr.validators.optional(attr.validators.instance_of(int)), + ) + stream: int = attr.ib(validator=attr.validators.instance_of(int)) + + instance_map: "frozendict[str, int]" = attr.ib( + factory=frozendict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(int), + mapping_validator=attr.validators.instance_of(frozendict), + ), + ) + + def __attrs_post_init__(self) -> None: + """Validates that both `topological` and `instance_map` aren't set.""" + + if self.instance_map and self.topological: + raise ValueError( + "Cannot set both 'topological' and 'instance_map' on 'RoomStreamToken'." + ) + + @classmethod + async def parse(cls, store: "PurgeEventsStore", string: str) -> "RoomStreamToken": + try: + if string[0] == "s": + return cls(topological=None, stream=int(string[1:])) + if string[0] == "t": + parts = string[1:].split("-", 1) + return cls(topological=int(parts[0]), stream=int(parts[1])) + if string[0] == "m": + parts = string[1:].split("~") + stream = int(parts[0]) + + instance_map = {} + for part in parts[1:]: + key, value = part.split(".") + instance_id = int(key) + pos = int(value) + + instance_name = await store.get_name_from_instance_id(instance_id) # type: ignore[attr-defined] + instance_map[instance_name] = pos + + return cls( + topological=None, + stream=stream, + instance_map=frozendict(instance_map), + ) + except CancelledError: + raise + except Exception: + pass + raise SynapseError(400, "Invalid room stream token %r" % (string,)) + + @classmethod + def parse_stream_token(cls, string: str) -> "RoomStreamToken": + try: + if string[0] == "s": + return cls(topological=None, stream=int(string[1:])) + except Exception: + pass + raise SynapseError(400, "Invalid room stream token %r" % (string,)) + + def copy_and_advance(self, other: "RoomStreamToken") -> "RoomStreamToken": + """Return a new token such that if an event is after both this token and + the other token, then its after the returned token too. + """ + + if self.topological or other.topological: + raise Exception("Can't advance topological tokens") + + max_stream = max(self.stream, other.stream) + + instance_map = { + instance: max( + self.instance_map.get(instance, self.stream), + other.instance_map.get(instance, other.stream), + ) + for instance in set(self.instance_map).union(other.instance_map) + } + + return RoomStreamToken(None, max_stream, frozendict(instance_map)) + + def as_historical_tuple(self) -> Tuple[int, int]: + """Returns a tuple of `(topological, stream)` for historical tokens. + + Raises if not an historical token (i.e. doesn't have a topological part). + """ + if self.topological is None: + raise Exception( + "Cannot call `RoomStreamToken.as_historical_tuple` on live token" + ) + + return self.topological, self.stream + + def get_stream_pos_for_instance(self, instance_name: str) -> int: + """Get the stream position that the given writer was at at this token. + + This only makes sense for "live" tokens that may have a vector clock + component, and so asserts that this is a "live" token. + """ + assert self.topological is None + + # If we don't have an entry for the instance we can assume that it was + # at `self.stream`. + return self.instance_map.get(instance_name, self.stream) + + def get_max_stream_pos(self) -> int: + """Get the maximum stream position referenced in this token. + + The corresponding "min" position is, by definition just `self.stream`. + + This is used to handle tokens that have non-empty `instance_map`, and so + reference stream positions after the `self.stream` position. + """ + return max(self.instance_map.values(), default=self.stream) + + async def to_string(self, store: "DataStore") -> str: + if self.topological is not None: + return "t%d-%d" % (self.topological, self.stream) + elif self.instance_map: + entries = [] + for name, pos in self.instance_map.items(): + instance_id = await store.get_id_for_instance(name) + entries.append(f"{instance_id}.{pos}") + + encoded_map = "~".join(entries) + return f"m{self.stream}~{encoded_map}" + else: + return "s%d" % (self.stream,) + + +class StreamKeyType: + """Known stream types. + + A stream is a list of entities ordered by an incrementing "stream token". + """ + + ROOM: Final = "room_key" + PRESENCE: Final = "presence_key" + TYPING: Final = "typing_key" + RECEIPT: Final = "receipt_key" + ACCOUNT_DATA: Final = "account_data_key" + PUSH_RULES: Final = "push_rules_key" + TO_DEVICE: Final = "to_device_key" + DEVICE_LIST: Final = "device_list_key" + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class StreamToken: + """A collection of keys joined together by underscores in the following + order and which represent the position in their respective streams. + + ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1` + 1. `room_key`: `s2633508` which is a `RoomStreamToken` + - `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59` + - See the docstring for `RoomStreamToken` for more details. + 2. `presence_key`: `17` + 3. `typing_key`: `338` + 4. `receipt_key`: `6732159` + 5. `account_data_key`: `1082514` + 6. `push_rules_key`: `541479` + 7. `to_device_key`: `274711` + 8. `device_list_key`: `265584` + 9. `groups_key`: `1` (note that this key is now unused) + + You can see how many of these keys correspond to the various + fields in a "/sync" response: + ```json + { + "next_batch": "s12_4_0_1_1_1_1_4_1", + "presence": { + "events": [] + }, + "device_lists": { + "changed": [] + }, + "rooms": { + "join": { + "!QrZlfIDQLNLdZHqTnt:hs1": { + "timeline": { + "events": [], + "prev_batch": "s10_4_0_1_1_1_1_4_1", + "limited": false + }, + "state": { + "events": [] + }, + "account_data": { + "events": [] + }, + "ephemeral": { + "events": [] + } + } + } + } + } + ``` + + --- + + For caching purposes, `StreamToken`s and by extension, all their attributes, + must be hashable. + """ + + room_key: RoomStreamToken = attr.ib( + validator=attr.validators.instance_of(RoomStreamToken) + ) + presence_key: int + typing_key: int + receipt_key: int + account_data_key: int + push_rules_key: int + to_device_key: int + device_list_key: int + # Note that the groups key is no longer used and may have bogus values. + groups_key: int + + _SEPARATOR = "_" + START: ClassVar["StreamToken"] + + @classmethod + @cancellable + async def from_string(cls, store: "DataStore", string: str) -> "StreamToken": + """ + Creates a RoomStreamToken from its textual representation. + """ + try: + keys = string.split(cls._SEPARATOR) + while len(keys) < len(attr.fields(cls)): + # i.e. old token from before receipt_key + keys.append("0") + return cls( + await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:]) + ) + except CancelledError: + raise + except Exception: + raise SynapseError(400, "Invalid stream token") + + async def to_string(self, store: "DataStore") -> str: + return self._SEPARATOR.join( + [ + await self.room_key.to_string(store), + str(self.presence_key), + str(self.typing_key), + str(self.receipt_key), + str(self.account_data_key), + str(self.push_rules_key), + str(self.to_device_key), + str(self.device_list_key), + # Note that the groups key is no longer used, but it is still + # serialized so that there will not be confusion in the future + # if additional tokens are added. + str(self.groups_key), + ] + ) + + @property + def room_stream_id(self) -> int: + return self.room_key.stream + + def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken": + """Advance the given key in the token to a new value if and only if the + new value is after the old value. + + :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken. + """ + if key == StreamKeyType.ROOM: + new_token = self.copy_and_replace( + StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value) + ) + return new_token + + new_token = self.copy_and_replace(key, new_value) + new_id = int(getattr(new_token, key)) + old_id = int(getattr(self, key)) + + if old_id < new_id: + return new_token + else: + return self + + def copy_and_replace(self, key: str, new_value: Any) -> "StreamToken": + return attr.evolve(self, **{key: new_value}) + + +StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class PersistedEventPosition: + """Position of a newly persisted event with instance that persisted it. + + This can be used to test whether the event is persisted before or after a + RoomStreamToken. + """ + + instance_name: str + stream: int + + def persisted_after(self, token: RoomStreamToken) -> bool: + return token.get_stream_pos_for_instance(self.instance_name) < self.stream + + def to_room_stream_token(self) -> RoomStreamToken: + """Converts the position to a room stream token such that events + persisted in the same room after this position will be after the + returned `RoomStreamToken`. + + Note: no guarantees are made about ordering w.r.t. events in other + rooms. + """ + # Doing the naive thing satisfies the desired properties described in + # the docstring. + return RoomStreamToken(None, self.stream) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ThirdPartyInstanceID: + appservice_id: Optional[str] + network_id: Optional[str] + + # Deny iteration because it will bite you if you try to create a singleton + # set by: + # users = set(user) + def __iter__(self) -> NoReturn: + raise ValueError("Attempted to iterate a %s" % (type(self).__name__,)) + + # Because this class is a frozen class, it is deeply immutable. + def __copy__(self) -> "ThirdPartyInstanceID": + return self + + def __deepcopy__(self, memo: Dict[str, object]) -> "ThirdPartyInstanceID": + return self + + @classmethod + def from_string(cls, s: str) -> "ThirdPartyInstanceID": + bits = s.split("|", 2) + if len(bits) != 2: + raise SynapseError(400, "Invalid ID %r" % (s,)) + + return cls(appservice_id=bits[0], network_id=bits[1]) + + def to_string(self) -> str: + return "%s|%s" % (self.appservice_id, self.network_id) + + __str__ = to_string + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class ReadReceipt: + """Information about a read-receipt""" + + room_id: str + receipt_type: str + user_id: str + event_ids: List[str] + thread_id: Optional[str] + data: JsonDict + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class DeviceListUpdates: + """ + An object containing a diff of information regarding other users' device lists, intended for + a recipient to carry out device list tracking. + + Attributes: + changed: A set of users whose device lists have changed recently. + left: A set of users who the recipient no longer needs to track the device lists of. + Typically when those users no longer share any end-to-end encryption enabled rooms. + """ + + # We need to use a factory here, otherwise `set` is not evaluated at + # object instantiation, but instead at class definition instantiation. + # The latter happening only once, thus always giving you the same sets + # across multiple DeviceListUpdates instances. + # Also see: don't define mutable default arguments. + changed: Set[str] = attr.ib(factory=set) + left: Set[str] = attr.ib(factory=set) + + def __bool__(self) -> bool: + return bool(self.changed or self.left) + + +def get_verify_key_from_cross_signing_key( + key_info: Mapping[str, Any] +) -> Tuple[str, VerifyKey]: + """Get the key ID and signedjson verify key from a cross-signing key dict + + Args: + key_info: a cross-signing key dict, which must have a "keys" + property that has exactly one item in it + + Returns: + the key ID and verify key for the cross-signing key + """ + # make sure that a `keys` field is provided + if "keys" not in key_info: + raise ValueError("Invalid key") + keys = key_info["keys"] + # and that it contains exactly one key + if len(keys) == 1: + key_id, key_data = next(iter(keys.items())) + return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data)) + else: + raise ValueError("Invalid key") + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class UserInfo: + """Holds information about a user. Result of get_userinfo_by_id. + + Attributes: + user_id: ID of the user. + appservice_id: Application service ID that created this user. + consent_server_notice_sent: Version of policy documents the user has been sent. + consent_version: Version of policy documents the user has consented to. + creation_ts: Creation timestamp of the user. + is_admin: True if the user is an admin. + is_deactivated: True if the user has been deactivated. + is_guest: True if the user is a guest user. + is_shadow_banned: True if the user has been shadow-banned. + user_type: User type (None for normal user, 'support' and 'bot' other options). + """ + + user_id: UserID + appservice_id: Optional[int] + consent_server_notice_sent: Optional[str] + consent_version: Optional[str] + user_type: Optional[str] + creation_ts: int + is_admin: bool + is_deactivated: bool + is_guest: bool + is_shadow_banned: bool + + +class UserProfile(TypedDict): + user_id: str + display_name: Optional[str] + avatar_url: Optional[str] + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class RetentionPolicy: + min_lifetime: Optional[int] = None + max_lifetime: Optional[int] = None diff --git a/synapse/types/state.py b/synapse/types/state.py new file mode 100644 index 0000000000..0004d955b4 --- /dev/null +++ b/synapse/types/state.py @@ -0,0 +1,567 @@ +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from typing import ( + TYPE_CHECKING, + Callable, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Set, + Tuple, + TypeVar, +) + +import attr +from frozendict import frozendict + +from synapse.api.constants import EventTypes +from synapse.types import MutableStateMap, StateKey, StateMap + +if TYPE_CHECKING: + from typing import FrozenSet # noqa: used within quoted type hint; flake8 sad + + +logger = logging.getLogger(__name__) + +# Used for generic functions below +T = TypeVar("T") + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class StateFilter: + """A filter used when querying for state. + + Attributes: + types: Map from type to set of state keys (or None). This specifies + which state_keys for the given type to fetch from the DB. If None + then all events with that type are fetched. If the set is empty + then no events with that type are fetched. + include_others: Whether to fetch events with types that do not + appear in `types`. + """ + + types: "frozendict[str, Optional[FrozenSet[str]]]" + include_others: bool = False + + def __attrs_post_init__(self) -> None: + # If `include_others` is set we canonicalise the filter by removing + # wildcards from the types dictionary + if self.include_others: + # this is needed to work around the fact that StateFilter is frozen + object.__setattr__( + self, + "types", + frozendict({k: v for k, v in self.types.items() if v is not None}), + ) + + @staticmethod + def all() -> "StateFilter": + """Returns a filter that fetches everything. + + Returns: + The state filter. + """ + return _ALL_STATE_FILTER + + @staticmethod + def none() -> "StateFilter": + """Returns a filter that fetches nothing. + + Returns: + The new state filter. + """ + return _NONE_STATE_FILTER + + @staticmethod + def from_types(types: Iterable[Tuple[str, Optional[str]]]) -> "StateFilter": + """Creates a filter that only fetches the given types + + Args: + types: A list of type and state keys to fetch. A state_key of None + fetches everything for that type + + Returns: + The new state filter. + """ + type_dict: Dict[str, Optional[Set[str]]] = {} + for typ, s in types: + if typ in type_dict: + if type_dict[typ] is None: + continue + + if s is None: + type_dict[typ] = None + continue + + type_dict.setdefault(typ, set()).add(s) # type: ignore + + return StateFilter( + types=frozendict( + (k, frozenset(v) if v is not None else None) + for k, v in type_dict.items() + ) + ) + + @staticmethod + def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter": + """Creates a filter that returns all non-member events, plus the member + events for the given users + + Args: + members: Set of user IDs + + Returns: + The new state filter + """ + return StateFilter( + types=frozendict({EventTypes.Member: frozenset(members)}), + include_others=True, + ) + + @staticmethod + def freeze( + types: Mapping[str, Optional[Collection[str]]], include_others: bool + ) -> "StateFilter": + """ + Returns a (frozen) StateFilter with the same contents as the parameters + specified here, which can be made of mutable types. + """ + types_with_frozen_values: Dict[str, Optional[FrozenSet[str]]] = {} + for state_types, state_keys in types.items(): + if state_keys is not None: + types_with_frozen_values[state_types] = frozenset(state_keys) + else: + types_with_frozen_values[state_types] = None + + return StateFilter( + frozendict(types_with_frozen_values), include_others=include_others + ) + + def return_expanded(self) -> "StateFilter": + """Creates a new StateFilter where type wild cards have been removed + (except for memberships). The returned filter is a superset of the + current one, i.e. anything that passes the current filter will pass + the returned filter. + + This helps the caching as the DictionaryCache knows if it has *all* the + state, but does not know if it has all of the keys of a particular type, + which makes wildcard lookups expensive unless we have a complete cache. + Hence, if we are doing a wildcard lookup, populate the cache fully so + that we can do an efficient lookup next time. + + Note that since we have two caches, one for membership events and one for + other events, we can be a bit more clever than simply returning + `StateFilter.all()` if `has_wildcards()` is True. + + We return a StateFilter where: + 1. the list of membership events to return is the same + 2. if there is a wildcard that matches non-member events we + return all non-member events + + Returns: + The new state filter. + """ + + if self.is_full(): + # If we're going to return everything then there's nothing to do + return self + + if not self.has_wildcards(): + # If there are no wild cards, there's nothing to do + return self + + if EventTypes.Member in self.types: + get_all_members = self.types[EventTypes.Member] is None + else: + get_all_members = self.include_others + + has_non_member_wildcard = self.include_others or any( + state_keys is None + for t, state_keys in self.types.items() + if t != EventTypes.Member + ) + + if not has_non_member_wildcard: + # If there are no non-member wild cards we can just return ourselves + return self + + if get_all_members: + # We want to return everything. + return StateFilter.all() + elif EventTypes.Member in self.types: + # We want to return all non-members, but only particular + # memberships + return StateFilter( + types=frozendict({EventTypes.Member: self.types[EventTypes.Member]}), + include_others=True, + ) + else: + # We want to return all non-members + return _ALL_NON_MEMBER_STATE_FILTER + + def make_sql_filter_clause(self) -> Tuple[str, List[str]]: + """Converts the filter to an SQL clause. + + For example: + + f = StateFilter.from_types([("m.room.create", "")]) + clause, args = f.make_sql_filter_clause() + clause == "(type = ? AND state_key = ?)" + args == ['m.room.create', ''] + + + Returns: + The SQL string (may be empty) and arguments. An empty SQL string is + returned when the filter matches everything (i.e. is "full"). + """ + + where_clause = "" + where_args: List[str] = [] + + if self.is_full(): + return where_clause, where_args + + if not self.include_others and not self.types: + # i.e. this is an empty filter, so we need to return a clause that + # will match nothing + return "1 = 2", [] + + # First we build up a lost of clauses for each type/state_key combo + clauses = [] + for etype, state_keys in self.types.items(): + if state_keys is None: + clauses.append("(type = ?)") + where_args.append(etype) + continue + + for state_key in state_keys: + clauses.append("(type = ? AND state_key = ?)") + where_args.extend((etype, state_key)) + + # This will match anything that appears in `self.types` + where_clause = " OR ".join(clauses) + + # If we want to include stuff that's not in the types dict then we add + # a `OR type NOT IN (...)` clause to the end. + if self.include_others: + if where_clause: + where_clause += " OR " + + where_clause += "type NOT IN (%s)" % (",".join(["?"] * len(self.types)),) + where_args.extend(self.types) + + return where_clause, where_args + + def max_entries_returned(self) -> Optional[int]: + """Returns the maximum number of entries this filter will return if + known, otherwise returns None. + + For example a simple state filter asking for `("m.room.create", "")` + will return 1, whereas the default state filter will return None. + + This is used to bail out early if the right number of entries have been + fetched. + """ + if self.has_wildcards(): + return None + + return len(self.concrete_types()) + + def filter_state(self, state_dict: StateMap[T]) -> MutableStateMap[T]: + """Returns the state filtered with by this StateFilter. + + Args: + state: The state map to filter + + Returns: + The filtered state map. + This is a copy, so it's safe to mutate. + """ + if self.is_full(): + return dict(state_dict) + + filtered_state = {} + for k, v in state_dict.items(): + typ, state_key = k + if typ in self.types: + state_keys = self.types[typ] + if state_keys is None or state_key in state_keys: + filtered_state[k] = v + elif self.include_others: + filtered_state[k] = v + + return filtered_state + + def is_full(self) -> bool: + """Whether this filter fetches everything or not + + Returns: + True if the filter fetches everything. + """ + return self.include_others and not self.types + + def has_wildcards(self) -> bool: + """Whether the filter includes wildcards or is attempting to fetch + specific state. + + Returns: + True if the filter includes wildcards. + """ + + return self.include_others or any( + state_keys is None for state_keys in self.types.values() + ) + + def concrete_types(self) -> List[Tuple[str, str]]: + """Returns a list of concrete type/state_keys (i.e. not None) that + will be fetched. This will be a complete list if `has_wildcards` + returns False, but otherwise will be a subset (or even empty). + + Returns: + A list of type/state_keys tuples. + """ + return [ + (t, s) + for t, state_keys in self.types.items() + if state_keys is not None + for s in state_keys + ] + + def get_member_split(self) -> Tuple["StateFilter", "StateFilter"]: + """Return the filter split into two: one which assumes it's exclusively + matching against member state, and one which assumes it's matching + against non member state. + + This is useful due to the returned filters giving correct results for + `is_full()`, `has_wildcards()`, etc, when operating against maps that + either exclusively contain member events or only contain non-member + events. (Which is the case when dealing with the member vs non-member + state caches). + + Returns: + The member and non member filters + """ + + if EventTypes.Member in self.types: + state_keys = self.types[EventTypes.Member] + if state_keys is None: + member_filter = StateFilter.all() + else: + member_filter = StateFilter(frozendict({EventTypes.Member: state_keys})) + elif self.include_others: + member_filter = StateFilter.all() + else: + member_filter = StateFilter.none() + + non_member_filter = StateFilter( + types=frozendict( + {k: v for k, v in self.types.items() if k != EventTypes.Member} + ), + include_others=self.include_others, + ) + + return member_filter, non_member_filter + + def _decompose_into_four_parts( + self, + ) -> Tuple[Tuple[bool, Set[str]], Tuple[Set[str], Set[StateKey]]]: + """ + Decomposes this state filter into 4 constituent parts, which can be + thought of as this: + all? - minus_wildcards + plus_wildcards + plus_state_keys + + where + * all represents ALL state + * minus_wildcards represents entire state types to remove + * plus_wildcards represents entire state types to add + * plus_state_keys represents individual state keys to add + + See `recompose_from_four_parts` for the other direction of this + correspondence. + """ + is_all = self.include_others + excluded_types: Set[str] = {t for t in self.types if is_all} + wildcard_types: Set[str] = {t for t, s in self.types.items() if s is None} + concrete_keys: Set[StateKey] = set(self.concrete_types()) + + return (is_all, excluded_types), (wildcard_types, concrete_keys) + + @staticmethod + def _recompose_from_four_parts( + all_part: bool, + minus_wildcards: Set[str], + plus_wildcards: Set[str], + plus_state_keys: Set[StateKey], + ) -> "StateFilter": + """ + Recomposes a state filter from 4 parts. + + See `decompose_into_four_parts` (the other direction of this + correspondence) for descriptions on each of the parts. + """ + + # {state type -> set of state keys OR None for wildcard} + # (The same structure as that of a StateFilter.) + new_types: Dict[str, Optional[Set[str]]] = {} + + # if we start with all, insert the excluded statetypes as empty sets + # to prevent them from being included + if all_part: + new_types.update({state_type: set() for state_type in minus_wildcards}) + + # insert the plus wildcards + new_types.update({state_type: None for state_type in plus_wildcards}) + + # insert the specific state keys + for state_type, state_key in plus_state_keys: + if state_type in new_types: + entry = new_types[state_type] + if entry is not None: + entry.add(state_key) + elif not all_part: + # don't insert if the entire type is already included by + # include_others as this would actually shrink the state allowed + # by this filter. + new_types[state_type] = {state_key} + + return StateFilter.freeze(new_types, include_others=all_part) + + def approx_difference(self, other: "StateFilter") -> "StateFilter": + """ + Returns a state filter which represents `self - other`. + + This is useful for determining what state remains to be pulled out of the + database if we want the state included by `self` but already have the state + included by `other`. + + The returned state filter + - MUST include all state events that are included by this filter (`self`) + unless they are included by `other`; + - MUST NOT include state events not included by this filter (`self`); and + - MAY be an over-approximation: the returned state filter + MAY additionally include some state events from `other`. + + This implementation attempts to return the narrowest such state filter. + In the case that `self` contains wildcards for state types where + `other` contains specific state keys, an approximation must be made: + the returned state filter keeps the wildcard, as state filters are not + able to express 'all state keys except some given examples'. + e.g. + StateFilter(m.room.member -> None (wildcard)) + minus + StateFilter(m.room.member -> {'@wombat:example.org'}) + is approximated as + StateFilter(m.room.member -> None (wildcard)) + """ + + # We first transform self and other into an alternative representation: + # - whether or not they include all events to begin with ('all') + # - if so, which event types are excluded? ('excludes') + # - which entire event types to include ('wildcards') + # - which concrete state keys to include ('concrete state keys') + (self_all, self_excludes), ( + self_wildcards, + self_concrete_keys, + ) = self._decompose_into_four_parts() + (other_all, other_excludes), ( + other_wildcards, + other_concrete_keys, + ) = other._decompose_into_four_parts() + + # Start with an estimate of the difference based on self + new_all = self_all + # Wildcards from the other can be added to the exclusion filter + new_excludes = self_excludes | other_wildcards + # We remove wildcards that appeared as wildcards in the other + new_wildcards = self_wildcards - other_wildcards + # We filter out the concrete state keys that appear in the other + # as wildcards or concrete state keys. + new_concrete_keys = { + (state_type, state_key) + for (state_type, state_key) in self_concrete_keys + if state_type not in other_wildcards + } - other_concrete_keys + + if other_all: + if self_all: + # If self starts with all, then we add as wildcards any + # types which appear in the other's exclusion filter (but + # aren't in the self exclusion filter). This is as the other + # filter will return everything BUT the types in its exclusion, so + # we need to add those excluded types that also match the self + # filter as wildcard types in the new filter. + new_wildcards |= other_excludes.difference(self_excludes) + + # If other is an `include_others` then the difference isn't. + new_all = False + # (We have no need for excludes when we don't start with all, as there + # is nothing to exclude.) + new_excludes = set() + + # We also filter out all state types that aren't in the exclusion + # list of the other. + new_wildcards &= other_excludes + new_concrete_keys = { + (state_type, state_key) + for (state_type, state_key) in new_concrete_keys + if state_type in other_excludes + } + + # Transform our newly-constructed state filter from the alternative + # representation back into the normal StateFilter representation. + return StateFilter._recompose_from_four_parts( + new_all, new_excludes, new_wildcards, new_concrete_keys + ) + + def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool: + """Check if we need to wait for full state to complete to calculate this state + + If we have a state filter which is completely satisfied even with partial + state, then we don't need to await_full_state before we can return it. + + Args: + is_mine_id: a callable which confirms if a given state_key matches a mxid + of a local user + """ + # if we haven't requested membership events, then it depends on the value of + # 'include_others' + if EventTypes.Member not in self.types: + return self.include_others + + # if we're looking for *all* membership events, then we have to wait + member_state_keys = self.types[EventTypes.Member] + if member_state_keys is None: + return True + + # otherwise, consider whose membership we are looking for. If it's entirely + # local users, then we don't need to wait. + for state_key in member_state_keys: + if not is_mine_id(state_key): + # remote user + return True + + # local users only + return False + + +_ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True) +_ALL_NON_MEMBER_STATE_FILTER = StateFilter( + types=frozendict({EventTypes.Member: frozenset()}), include_others=True +) +_NONE_STATE_FILTER = StateFilter(types=frozendict(), include_others=False) diff --git a/synapse/visibility.py b/synapse/visibility.py index b443857571..e442de3173 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -26,8 +26,8 @@ from synapse.events.utils import prune_event from synapse.logging.opentracing import trace from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore -from synapse.storage.state import StateFilter from synapse.types import RetentionPolicy, StateMap, get_domain_from_id +from synapse.types.state import StateFilter from synapse.util import Clock logger = logging.getLogger(__name__) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index d4e6d4236c..a433e70870 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -22,8 +22,8 @@ from synapse.api.constants import EventTypes, Membership from synapse.api.room_versions import RoomVersions from synapse.events import EventBase from synapse.server import HomeServer -from synapse.storage.state import StateFilter from synapse.types import JsonDict, RoomID, StateMap, UserID +from synapse.types.state import StateFilter from synapse.util import Clock from tests.unittest import HomeserverTestCase, TestCase -- cgit 1.5.1