From 14662d3c18217ba9e865b56203829e88d2ed4532 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 25 Mar 2022 09:21:06 -0500 Subject: Refactor `create_new_client_event` to use a new parameter, `state_event_ids`, which accurately describes the usage with MSC2716 instead of abusing `auth_event_ids` (#12083) Spawned from https://github.com/matrix-org/synapse/pull/10975#discussion_r813183430 Part of [MSC2716](https://github.com/matrix-org/matrix-spec-proposals/pull/2716) --- synapse/handlers/message.py | 63 +++++++++++++++------ synapse/handlers/room_batch.py | 112 ++++++++++++++++++++++++-------------- synapse/handlers/room_member.py | 31 +++++++++++ synapse/rest/client/room_batch.py | 23 +++++--- 4 files changed, 164 insertions(+), 65 deletions(-) (limited to 'synapse') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f9544fe7fb..1c4fb4360a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -493,6 +493,7 @@ class EventCreationHandler: allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, require_consent: bool = True, outlier: bool = False, historical: bool = False, @@ -527,6 +528,15 @@ class EventCreationHandler: If non-None, prev_event_ids must also be provided. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is with insertion events which float at + the beginning of a historical batch and don't have any `prev_events` to + derive from; we add all of these state events as the explicit state so the + rest of the historical batch can inherit the same state and state_group. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. + require_consent: Whether to check if the requester has consented to the privacy policy. @@ -612,6 +622,7 @@ class EventCreationHandler: allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, depth=depth, ) @@ -772,6 +783,7 @@ class EventCreationHandler: allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, ratelimit: bool = True, txn_id: Optional[str] = None, ignore_shadow_ban: bool = False, @@ -801,6 +813,14 @@ class EventCreationHandler: based on the room state at the prev_events. If non-None, prev_event_ids must also be provided. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is with insertion events which float at + the beginning of a historical batch and don't have any `prev_events` to + derive from; we add all of these state events as the explicit state so the + rest of the historical batch can inherit the same state and state_group. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. ratelimit: Whether to rate limit this send. txn_id: The transaction ID. ignore_shadow_ban: True if shadow-banned users should be allowed to @@ -856,8 +876,10 @@ class EventCreationHandler: requester, event_dict, txn_id=txn_id, + allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, outlier=outlier, historical=historical, depth=depth, @@ -893,6 +915,7 @@ class EventCreationHandler: allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, depth: Optional[int] = None, ) -> Tuple[EventBase, EventContext]: """Create a new event for a local client @@ -915,6 +938,15 @@ class EventCreationHandler: Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is with insertion events which float at + the beginning of a historical batch and don't have any `prev_events` to + derive from; we add all of these state events as the explicit state so the + rest of the historical batch can inherit the same state and state_group. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. + depth: Override the depth used to order the event in the DAG. Should normally be set to None, which will cause the depth to be calculated based on the prev_events. @@ -922,31 +954,26 @@ class EventCreationHandler: Returns: Tuple of created event, context """ - # Strip down the auth_event_ids to only what we need to auth the event. + # Strip down the state_event_ids to only what we need to auth the event. # For example, we don't need extra m.room.member that don't match event.sender - full_state_ids_at_event = None - if auth_event_ids is not None: - # If auth events are provided, prev events must be also. + if state_event_ids is not None: + # Do a quick check to make sure that prev_event_ids is present to + # make the type-checking around `builder.build` happy. # prev_event_ids could be an empty array though. assert prev_event_ids is not None - # Copy the full auth state before it stripped down - full_state_ids_at_event = auth_event_ids.copy() - temp_event = await builder.build( prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, + auth_event_ids=state_event_ids, depth=depth, ) - auth_events = await self.store.get_events_as_list(auth_event_ids) + state_events = await self.store.get_events_as_list(state_event_ids) # Create a StateMap[str] - auth_event_state_map = { - (e.type, e.state_key): e.event_id for e in auth_events - } - # Actually strip down and use the necessary auth events + state_map = {(e.type, e.state_key): e.event_id for e in state_events} + # Actually strip down and only use the necessary auth events auth_event_ids = self._event_auth_handler.compute_auth_events( event=temp_event, - current_state_ids=auth_event_state_map, + current_state_ids=state_map, for_verification=False, ) @@ -989,12 +1016,16 @@ class EventCreationHandler: context = EventContext.for_outlier() elif ( event.type == EventTypes.MSC2716_INSERTION - and full_state_ids_at_event + and state_event_ids and builder.internal_metadata.is_historical() ): + # Add explicit state to the insertion event so it has state to derive + # from even though it's floating with no `prev_events`. The rest of + # the batch can derive from this state and state_group. + # # TODO(faster_joins): figure out how this works, and make sure that the # old state is complete. - old_state = await self.store.get_events_as_list(full_state_ids_at_event) + old_state = await self.store.get_events_as_list(state_event_ids) context = await self.state.compute_event_context(event, old_state=old_state) else: context = await self.state.compute_event_context(event) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index abbf7b7b27..a0255bd143 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -121,12 +121,11 @@ class RoomBatchHandler: return create_requester(user_id, app_service=app_service) - async def get_most_recent_auth_event_ids_from_event_id_list( + async def get_most_recent_full_state_ids_from_event_id_list( self, event_ids: List[str] ) -> List[str]: - """Find the most recent auth event ids (derived from state events) that - allowed that message to be sent. We will use this as a base - to auth our historical messages against. + """Find the most recent event_id and grab the full state at that event. + We will use this as a base to auth our historical messages against. Args: event_ids: List of event ID's to look at @@ -136,38 +135,37 @@ class RoomBatchHandler: """ ( - most_recent_prev_event_id, + most_recent_event_id, _, ) = await self.store.get_max_depth_of(event_ids) # mapping from (type, state_key) -> state_event_id prev_state_map = await self.state_store.get_state_ids_for_event( - most_recent_prev_event_id + most_recent_event_id ) # List of state event ID's - prev_state_ids = list(prev_state_map.values()) - auth_event_ids = prev_state_ids + full_state_ids = list(prev_state_map.values()) - return auth_event_ids + return full_state_ids async def persist_state_events_at_start( self, state_events_at_start: List[JsonDict], room_id: str, - initial_auth_event_ids: List[str], + initial_state_event_ids: List[str], app_service_requester: Requester, ) -> List[str]: """Takes all `state_events_at_start` event dictionaries and creates/persists - them as floating state events which don't resolve into the current room state. - They are floating because they reference a fake prev_event which doesn't connect - to the normal DAG at all. + them in a floating state event chain which don't resolve into the current room + state. They are floating because they reference no prev_events and are marked + as outliers which disconnects them from the normal DAG. Args: state_events_at_start: room_id: Room where you want the events persisted in. - initial_auth_event_ids: These will be the auth_events for the first - state event created. Each event created afterwards will be - added to the list of auth events for the next state event - created. + initial_state_event_ids: + The base set of state for the historical batch which the floating + state chain will derive from. This should probably be the state + from the `prev_event` defined by `/batch_send?prev_event_id=$abc`. app_service_requester: The requester of an application service. Returns: @@ -176,7 +174,7 @@ class RoomBatchHandler: assert app_service_requester.app_service state_event_ids_at_start = [] - auth_event_ids = initial_auth_event_ids.copy() + state_event_ids = initial_state_event_ids.copy() # Make the state events float off on their own by specifying no # prev_events for the first one in the chain so we don't have a bunch of @@ -189,9 +187,7 @@ class RoomBatchHandler: ) logger.debug( - "RoomBatchSendEventRestServlet inserting state_event=%s, auth_event_ids=%s", - state_event, - auth_event_ids, + "RoomBatchSendEventRestServlet inserting state_event=%s", state_event ) event_dict = { @@ -217,16 +213,26 @@ class RoomBatchHandler: room_id=room_id, action=membership, content=event_dict["content"], + # Mark as an outlier to disconnect it from the normal DAG + # and not show up between batches of history. outlier=True, historical=True, - # Only the first event in the chain should be floating. + # Only the first event in the state chain should be floating. # The rest should hang off each other in a chain. allow_no_prev_events=index == 0, prev_event_ids=prev_event_ids_for_state_chain, + # Since each state event is marked as an outlier, the + # `EventContext.for_outlier()` won't have any `state_ids` + # set and therefore can't derive any state even though the + # prev_events are set. Also since the first event in the + # state chain is floating with no `prev_events`, it can't + # derive state from anywhere automatically. So we need to + # set some state explicitly. + # # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. - auth_event_ids=auth_event_ids.copy(), + state_event_ids=state_event_ids.copy(), ) else: # TODO: Add some complement tests that adds state that is not member joins @@ -240,21 +246,31 @@ class RoomBatchHandler: state_event["sender"], app_service_requester.app_service ), event_dict, + # Mark as an outlier to disconnect it from the normal DAG + # and not show up between batches of history. outlier=True, historical=True, - # Only the first event in the chain should be floating. + # Only the first event in the state chain should be floating. # The rest should hang off each other in a chain. allow_no_prev_events=index == 0, prev_event_ids=prev_event_ids_for_state_chain, + # Since each state event is marked as an outlier, the + # `EventContext.for_outlier()` won't have any `state_ids` + # set and therefore can't derive any state even though the + # prev_events are set. Also since the first event in the + # state chain is floating with no `prev_events`, it can't + # derive state from anywhere automatically. So we need to + # set some state explicitly. + # # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. - auth_event_ids=auth_event_ids.copy(), + state_event_ids=state_event_ids.copy(), ) event_id = event.event_id state_event_ids_at_start.append(event_id) - auth_event_ids.append(event_id) + state_event_ids.append(event_id) # Connect all the state in a floating chain prev_event_ids_for_state_chain = [event_id] @@ -265,7 +281,7 @@ class RoomBatchHandler: events_to_create: List[JsonDict], room_id: str, inherited_depth: int, - auth_event_ids: List[str], + initial_state_event_ids: List[str], app_service_requester: Requester, ) -> List[str]: """Create and persists all events provided sequentially. Handles the @@ -281,8 +297,10 @@ class RoomBatchHandler: room_id: Room where you want the events persisted in. inherited_depth: The depth to create the events at (you will probably by calling inherit_depth_from_prev_ids(...)). - auth_event_ids: Define which events allow you to create the given - event in the room. + initial_state_event_ids: + This is used to set explicit state for the insertion event at + the start of the historical batch since it's floating with no + prev_events to derive state from automatically. app_service_requester: The requester of an application service. Returns: @@ -290,6 +308,11 @@ class RoomBatchHandler: """ assert app_service_requester.app_service + # We expect the first event in a historical batch to be an insertion event + assert events_to_create[0]["type"] == EventTypes.MSC2716_INSERTION + # We expect the last event in a historical batch to be an batch event + assert events_to_create[-1]["type"] == EventTypes.MSC2716_BATCH + # Make the historical event chain float off on its own by specifying no # prev_events for the first event in the chain which causes the HS to # ask for the state at the start of the batch later. @@ -321,11 +344,16 @@ class RoomBatchHandler: ev["sender"], app_service_requester.app_service ), event_dict, - # Only the first event in the chain should be floating. - # The rest should hang off each other in a chain. + # Only the first event (which is the insertion event) in the + # chain should be floating. The rest should hang off each other + # in a chain. allow_no_prev_events=index == 0, prev_event_ids=event_dict.get("prev_events"), - auth_event_ids=auth_event_ids, + # Since the first event (which is the insertion event) in the + # chain is floating with no `prev_events`, it can't derive state + # from anywhere automatically. So we need to set some state + # explicitly. + state_event_ids=initial_state_event_ids if index == 0 else None, historical=True, depth=inherited_depth, ) @@ -343,10 +371,9 @@ class RoomBatchHandler: ) logger.debug( - "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", + "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s", event, prev_event_ids, - auth_event_ids, ) events_to_persist.append((event, context)) @@ -376,12 +403,12 @@ class RoomBatchHandler: room_id: str, batch_id_to_connect_to: str, inherited_depth: int, - auth_event_ids: List[str], + initial_state_event_ids: List[str], app_service_requester: Requester, ) -> Tuple[List[str], str]: """ - Handles creating and persisting all of the historical events as well - as insertion and batch meta events to make the batch navigable in the DAG. + Handles creating and persisting all of the historical events as well as + insertion and batch meta events to make the batch navigable in the DAG. Args: events_to_create: List of historical events to create in JSON @@ -391,8 +418,13 @@ class RoomBatchHandler: want this batch to connect to. inherited_depth: The depth to create the events at (you will probably by calling inherit_depth_from_prev_ids(...)). - auth_event_ids: Define which events allow you to create the given - event in the room. + initial_state_event_ids: + This is used to set explicit state for the insertion event at + the start of the historical batch since it's floating with no + prev_events to derive state from automatically. This should + probably be the state from the `prev_event` defined by + `/batch_send?prev_event_id=$abc` plus the outcome of + `persist_state_events_at_start` app_service_requester: The requester of an application service. Returns: @@ -438,7 +470,7 @@ class RoomBatchHandler: events_to_create=events_to_create, room_id=room_id, inherited_depth=inherited_depth, - auth_event_ids=auth_event_ids, + initial_state_event_ids=initial_state_event_ids, app_service_requester=app_service_requester, ) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7cbc484b06..a33fa34aa8 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -272,6 +272,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, txn_id: Optional[str] = None, ratelimit: bool = True, content: Optional[dict] = None, @@ -298,6 +299,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is the historical `state_events_at_start`; + since each is marked as an `outlier`, the `EventContext.for_outlier()` won't + have any `state_ids` set and therefore can't derive any state even though the + prev_events are set so we need to set them ourself via this argument. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. txn_id: ratelimit: @@ -353,6 +362,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, require_consent=require_consent, outlier=outlier, historical=historical, @@ -456,6 +466,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: """Update a user's membership in a room. @@ -487,6 +498,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is the historical `state_events_at_start`; + since each is marked as an `outlier`, the `EventContext.for_outlier()` won't + have any `state_ids` set and therefore can't derive any state even though the + prev_events are set so we need to set them ourself via this argument. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. Returns: A tuple of the new event ID and stream ID. @@ -526,6 +545,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, ) return result @@ -548,6 +568,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: """Helper for update_membership. @@ -581,6 +602,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is the historical `state_events_at_start`; + since each is marked as an `outlier`, the `EventContext.for_outlier()` won't + have any `state_ids` set and therefore can't derive any state even though the + prev_events are set so we need to set them ourself via this argument. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. Returns: A tuple of the new event ID and stream ID. @@ -708,6 +737,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, content=content, require_consent=require_consent, outlier=outlier, @@ -932,6 +962,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ratelimit=ratelimit, prev_event_ids=latest_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, content=content, require_consent=require_consent, outlier=outlier, diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 0048973e59..0780485322 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -124,14 +124,14 @@ class RoomBatchSendEventRestServlet(RestServlet): ) # For the event we are inserting next to (`prev_event_ids_from_query`), - # find the most recent auth events (derived from state events) that - # allowed that message to be sent. We will use that as a base - # to auth our historical messages against. - auth_event_ids = await self.room_batch_handler.get_most_recent_auth_event_ids_from_event_id_list( + # find the most recent state events that allowed that message to be + # sent. We will use that as a base to auth our historical messages + # against. + state_event_ids = await self.room_batch_handler.get_most_recent_full_state_ids_from_event_id_list( prev_event_ids_from_query ) - if not auth_event_ids: + if not state_event_ids: raise SynapseError( HTTPStatus.BAD_REQUEST, "No auth events found for given prev_event query parameter. The prev_event=%s probably does not exist." @@ -148,13 +148,13 @@ class RoomBatchSendEventRestServlet(RestServlet): await self.room_batch_handler.persist_state_events_at_start( state_events_at_start=body["state_events_at_start"], room_id=room_id, - initial_auth_event_ids=auth_event_ids, + initial_state_event_ids=state_event_ids, app_service_requester=requester, ) ) # Update our ongoing auth event ID list with all of the new state we # just created - auth_event_ids.extend(state_event_ids_at_start) + state_event_ids.extend(state_event_ids_at_start) inherited_depth = await self.room_batch_handler.inherit_depth_from_prev_ids( prev_event_ids_from_query @@ -196,7 +196,12 @@ class RoomBatchSendEventRestServlet(RestServlet): ), base_insertion_event_dict, prev_event_ids=base_insertion_event_dict.get("prev_events"), - auth_event_ids=auth_event_ids, + # Also set the explicit state here because we want to resolve + # any `state_events_at_start` here too. It's not strictly + # necessary to accomplish anything but if someone asks for the + # state at this point, we probably want to show them the + # historical state that was part of this batch. + state_event_ids=state_event_ids, historical=True, depth=inherited_depth, ) @@ -212,7 +217,7 @@ class RoomBatchSendEventRestServlet(RestServlet): room_id=room_id, batch_id_to_connect_to=batch_id_to_connect_to, inherited_depth=inherited_depth, - auth_event_ids=auth_event_ids, + initial_state_event_ids=state_event_ids, app_service_requester=requester, ) -- cgit 1.4.1