From 61fd2a8f591f20fe9d1cffe659336664bf44e742 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 3 Mar 2022 10:52:35 +0000 Subject: Limit the size of the aggregation_key (#12101) There's no reason to let people use long keys. --- synapse/handlers/message.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse/handlers/message.py') diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 61cb133ef2..0799ec9a84 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1069,6 +1069,9 @@ class EventCreationHandler: if relation_type == RelationTypes.ANNOTATION: aggregation_key = relation["key"] + if len(aggregation_key) > 500: + raise SynapseError(400, "Aggregation key is too long") + already_exists = await self.store.has_user_annotated_event( relates_to, event.type, aggregation_key, event.sender ) -- cgit 1.5.1 From ea27528b5d177dcfc5a4e38b463baeace916dc8e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 10 Mar 2022 10:36:13 -0500 Subject: Support stable identifiers for MSC3440: Threading (#12151) The unstable identifiers are still supported if the experimental configuration flag is enabled. The unstable identifiers will be removed in a future release. --- changelog.d/12151.feature | 1 + synapse/api/constants.py | 4 +- synapse/api/filtering.py | 23 ++++----- synapse/events/utils.py | 9 +++- synapse/handlers/message.py | 5 +- synapse/rest/client/versions.py | 1 + synapse/server.py | 2 +- synapse/storage/databases/main/events.py | 5 +- synapse/storage/databases/main/relations.py | 77 ++++++++++++++++++----------- synapse/storage/databases/main/stream.py | 18 ++++--- tests/rest/client/test_relations.py | 7 +-- tests/rest/client/test_rooms.py | 18 +++---- tests/storage/test_stream.py | 20 ++++---- 13 files changed, 109 insertions(+), 81 deletions(-) create mode 100644 changelog.d/12151.feature (limited to 'synapse/handlers/message.py') diff --git a/changelog.d/12151.feature b/changelog.d/12151.feature new file mode 100644 index 0000000000..18432b2da9 --- /dev/null +++ b/changelog.d/12151.feature @@ -0,0 +1 @@ +Support the stable identifiers from [MSC3440](https://github.com/matrix-org/matrix-doc/pull/3440): threads. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 36ace7c613..b0c08a074d 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -178,7 +178,9 @@ class RelationTypes: ANNOTATION: Final = "m.annotation" REPLACE: Final = "m.replace" REFERENCE: Final = "m.reference" - THREAD: Final = "io.element.thread" + THREAD: Final = "m.thread" + # TODO Remove this in Synapse >= v1.57.0. + UNSTABLE_THREAD: Final = "io.element.thread" class LimitBlockingTypes: diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index cb532d7238..27e97d6f37 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -88,7 +88,9 @@ ROOM_EVENT_FILTER_SCHEMA = { "org.matrix.labels": {"type": "array", "items": {"type": "string"}}, "org.matrix.not_labels": {"type": "array", "items": {"type": "string"}}, # MSC3440, filtering by event relations. + "related_by_senders": {"type": "array", "items": {"type": "string"}}, "io.element.relation_senders": {"type": "array", "items": {"type": "string"}}, + "related_by_rel_types": {"type": "array", "items": {"type": "string"}}, "io.element.relation_types": {"type": "array", "items": {"type": "string"}}, }, } @@ -318,19 +320,18 @@ class Filter: self.labels = filter_json.get("org.matrix.labels", None) self.not_labels = filter_json.get("org.matrix.not_labels", []) - # Ideally these would be rejected at the endpoint if they were provided - # and not supported, but that would involve modifying the JSON schema - # based on the homeserver configuration. + self.related_by_senders = self.filter_json.get("related_by_senders", None) + self.related_by_rel_types = self.filter_json.get("related_by_rel_types", None) + + # Fallback to the unstable prefix if the stable version is not given. if hs.config.experimental.msc3440_enabled: - self.relation_senders = self.filter_json.get( + self.related_by_senders = self.related_by_senders or self.filter_json.get( "io.element.relation_senders", None ) - self.relation_types = self.filter_json.get( - "io.element.relation_types", None + self.related_by_rel_types = ( + self.related_by_rel_types + or self.filter_json.get("io.element.relation_types", None) ) - else: - self.relation_senders = None - self.relation_types = None def filters_all_types(self) -> bool: return "*" in self.not_types @@ -461,7 +462,7 @@ class Filter: event_ids = [event.event_id for event in events if isinstance(event, EventBase)] # type: ignore[attr-defined] event_ids_to_keep = set( await self._store.events_have_relations( - event_ids, self.relation_senders, self.relation_types + event_ids, self.related_by_senders, self.related_by_rel_types ) ) @@ -474,7 +475,7 @@ class Filter: async def filter(self, events: Iterable[FilterEvent]) -> List[FilterEvent]: result = [event for event in events if self._check(event)] - if self.relation_senders or self.relation_types: + if self.related_by_senders or self.related_by_rel_types: return await self._check_event_relations(result) return result diff --git a/synapse/events/utils.py b/synapse/events/utils.py index ee34cb46e4..b2a237c1e0 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -38,6 +38,7 @@ from synapse.util.frozenutils import unfreeze from . import EventBase if TYPE_CHECKING: + from synapse.server import HomeServer from synapse.storage.databases.main.relations import BundledAggregations @@ -395,6 +396,9 @@ class EventClientSerializer: clients. """ + def __init__(self, hs: "HomeServer"): + self._msc3440_enabled = hs.config.experimental.msc3440_enabled + def serialize_event( self, event: Union[JsonDict, EventBase], @@ -515,11 +519,14 @@ class EventClientSerializer: thread.latest_event, serialized_latest_event, thread.latest_edit ) - serialized_aggregations[RelationTypes.THREAD] = { + thread_summary = { "latest_event": serialized_latest_event, "count": thread.count, "current_user_participated": thread.current_user_participated, } + serialized_aggregations[RelationTypes.THREAD] = thread_summary + if self._msc3440_enabled: + serialized_aggregations[RelationTypes.UNSTABLE_THREAD] = thread_summary # Include the bundled aggregations in the event. if serialized_aggregations: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 0799ec9a84..f9544fe7fb 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1079,7 +1079,10 @@ class EventCreationHandler: raise SynapseError(400, "Can't send same reaction twice") # Don't attempt to start a thread if the parent event is a relation. - elif relation_type == RelationTypes.THREAD: + elif ( + relation_type == RelationTypes.THREAD + or relation_type == RelationTypes.UNSTABLE_THREAD + ): if await self.store.event_includes_relation(relates_to): raise SynapseError( 400, "Cannot start threads from an event with a relation" diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 2e5d0e4e22..9a65aa4843 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -101,6 +101,7 @@ class VersionsRestServlet(RestServlet): "org.matrix.msc3030": self.config.experimental.msc3030_enabled, # Adds support for thread relations, per MSC3440. "org.matrix.msc3440": self.config.experimental.msc3440_enabled, + "org.matrix.msc3440.stable": True, # TODO: remove when "v1.3" is added above }, }, ) diff --git a/synapse/server.py b/synapse/server.py index 1270abb5a3..7741ff29dc 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -754,7 +754,7 @@ class HomeServer(metaclass=abc.ABCMeta): @cache_in_self def get_event_client_serializer(self) -> EventClientSerializer: - return EventClientSerializer() + return EventClientSerializer(self) @cache_in_self def get_password_policy_handler(self) -> PasswordPolicyHandler: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1a322882bf..1f60aef180 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1814,7 +1814,10 @@ class PersistEventsStore: if rel_type == RelationTypes.REPLACE: txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,)) - if rel_type == RelationTypes.THREAD: + if ( + rel_type == RelationTypes.THREAD + or rel_type == RelationTypes.UNSTABLE_THREAD + ): txn.call_after(self.store.get_thread_summary.invalidate, (parent_id,)) # It should be safe to only invalidate the cache if the user has not # previously participated in the thread, but that's difficult (and diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index be1500092b..c4869d64e6 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -508,7 +508,7 @@ class RelationsWorkerStore(SQLBaseStore): AND parent.room_id = child.room_id WHERE %s - AND relation_type = ? + AND %s ORDER BY parent.event_id, child.topological_ordering DESC, child.stream_ordering DESC """ else: @@ -523,16 +523,22 @@ class RelationsWorkerStore(SQLBaseStore): AND parent.room_id = child.room_id WHERE %s - AND relation_type = ? + AND %s ORDER BY child.topological_ordering DESC, child.stream_ordering DESC """ clause, args = make_in_list_sql_clause( txn.database_engine, "relates_to_id", event_ids ) - args.append(RelationTypes.THREAD) - txn.execute(sql % (clause,), args) + if self._msc3440_enabled: + relations_clause = "(relation_type = ? OR relation_type = ?)" + args.extend((RelationTypes.THREAD, RelationTypes.UNSTABLE_THREAD)) + else: + relations_clause = "relation_type = ?" + args.append(RelationTypes.THREAD) + + txn.execute(sql % (clause, relations_clause), args) latest_event_ids = {} for parent_event_id, child_event_id in txn: # Only consider the latest threaded reply (by topological ordering). @@ -552,7 +558,7 @@ class RelationsWorkerStore(SQLBaseStore): AND parent.room_id = child.room_id WHERE %s - AND relation_type = ? + AND %s GROUP BY parent.event_id """ @@ -561,9 +567,15 @@ class RelationsWorkerStore(SQLBaseStore): clause, args = make_in_list_sql_clause( txn.database_engine, "relates_to_id", latest_event_ids.keys() ) - args.append(RelationTypes.THREAD) - txn.execute(sql % (clause,), args) + if self._msc3440_enabled: + relations_clause = "(relation_type = ? OR relation_type = ?)" + args.extend((RelationTypes.THREAD, RelationTypes.UNSTABLE_THREAD)) + else: + relations_clause = "relation_type = ?" + args.append(RelationTypes.THREAD) + + txn.execute(sql % (clause, relations_clause), args) counts = dict(cast(List[Tuple[str, int]], txn.fetchall())) return counts, latest_event_ids @@ -626,16 +638,24 @@ class RelationsWorkerStore(SQLBaseStore): AND parent.room_id = child.room_id WHERE %s - AND relation_type = ? + AND %s AND child.sender = ? """ clause, args = make_in_list_sql_clause( txn.database_engine, "relates_to_id", event_ids ) - args.extend((RelationTypes.THREAD, user_id)) - txn.execute(sql % (clause,), args) + if self._msc3440_enabled: + relations_clause = "(relation_type = ? OR relation_type = ?)" + args.extend((RelationTypes.THREAD, RelationTypes.UNSTABLE_THREAD)) + else: + relations_clause = "relation_type = ?" + args.append(RelationTypes.THREAD) + + args.append(user_id) + + txn.execute(sql % (clause, relations_clause), args) return {row[0] for row in txn.fetchall()} participated_threads = await self.db_pool.runInteraction( @@ -834,26 +854,23 @@ class RelationsWorkerStore(SQLBaseStore): results.setdefault(event_id, BundledAggregations()).replace = edit # Fetch thread summaries. - if self._msc3440_enabled: - summaries = await self._get_thread_summaries(events_by_id.keys()) - # Only fetch participated for a limited selection based on what had - # summaries. - participated = await self._get_threads_participated( - summaries.keys(), user_id - ) - for event_id, summary in summaries.items(): - if summary: - thread_count, latest_thread_event, edit = summary - results.setdefault( - event_id, BundledAggregations() - ).thread = _ThreadAggregation( - latest_event=latest_thread_event, - latest_edit=edit, - count=thread_count, - # If there's a thread summary it must also exist in the - # participated dictionary. - current_user_participated=participated[event_id], - ) + summaries = await self._get_thread_summaries(events_by_id.keys()) + # Only fetch participated for a limited selection based on what had + # summaries. + participated = await self._get_threads_participated(summaries.keys(), user_id) + for event_id, summary in summaries.items(): + if summary: + thread_count, latest_thread_event, edit = summary + results.setdefault( + event_id, BundledAggregations() + ).thread = _ThreadAggregation( + latest_event=latest_thread_event, + latest_edit=edit, + count=thread_count, + # If there's a thread summary it must also exist in the + # participated dictionary. + current_user_participated=participated[event_id], + ) return results diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index a898f847e7..39e1efe373 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -325,21 +325,23 @@ def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]: args.extend(event_filter.labels) # Filter on relation_senders / relation types from the joined tables. - if event_filter.relation_senders: + if event_filter.related_by_senders: clauses.append( "(%s)" % " OR ".join( - "related_event.sender = ?" for _ in event_filter.relation_senders + "related_event.sender = ?" for _ in event_filter.related_by_senders ) ) - args.extend(event_filter.relation_senders) + args.extend(event_filter.related_by_senders) - if event_filter.relation_types: + if event_filter.related_by_rel_types: clauses.append( "(%s)" - % " OR ".join("relation_type = ?" for _ in event_filter.relation_types) + % " OR ".join( + "relation_type = ?" for _ in event_filter.related_by_rel_types + ) ) - args.extend(event_filter.relation_types) + args.extend(event_filter.related_by_rel_types) return " AND ".join(clauses), args @@ -1203,7 +1205,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # If there is a filter on relation_senders and relation_types join to the # relations table. if event_filter and ( - event_filter.relation_senders or event_filter.relation_types + event_filter.related_by_senders or event_filter.related_by_rel_types ): # Filtering by relations could cause the same event to appear multiple # times (since there's no limit on the number of relations to an event). @@ -1211,7 +1213,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): join_clause += """ LEFT JOIN event_relations AS relation ON (event.event_id = relation.relates_to_id) """ - if event_filter.relation_senders: + if event_filter.related_by_senders: join_clause += """ LEFT JOIN events AS related_event ON (relation.event_id = related_event.event_id) """ diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index f9ae6e663f..0cbe6c0cf7 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -547,9 +547,7 @@ class RelationsTestCase(BaseRelationsTestCase): ) self.assertEqual(400, channel.code, channel.json_body) - @unittest.override_config( - {"experimental_features": {"msc3440_enabled": True, "msc3666_enabled": True}} - ) + @unittest.override_config({"experimental_features": {"msc3666_enabled": True}}) def test_bundled_aggregations(self) -> None: """ Test that annotations, references, and threads get correctly bundled. @@ -758,7 +756,6 @@ class RelationsTestCase(BaseRelationsTestCase): }, ) - @unittest.override_config({"experimental_features": {"msc3440_enabled": True}}) def test_ignore_invalid_room(self) -> None: """Test that we ignore invalid relations over federation.""" # Create another room and send a message in it. @@ -1065,7 +1062,6 @@ class RelationsTestCase(BaseRelationsTestCase): {"event_id": edit_event_id, "sender": self.user_id}, m_replace_dict ) - @unittest.override_config({"experimental_features": {"msc3440_enabled": True}}) def test_edit_thread(self) -> None: """Test that editing a thread works.""" @@ -1383,7 +1379,6 @@ class RelationRedactionTestCase(BaseRelationsTestCase): chunk = self._get_aggregations() self.assertEqual(chunk, [{"type": "m.reaction", "key": "a", "count": 1}]) - @unittest.override_config({"experimental_features": {"msc3440_enabled": True}}) def test_redact_relation_thread(self) -> None: """ Test that thread replies are properly handled after the thread reply redacted. diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 37866ee330..3a9617d6da 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -2141,21 +2141,19 @@ class RelationsTestCase(unittest.HomeserverTestCase): def test_filter_relation_senders(self) -> None: # Messages which second user reacted to. - filter = {"io.element.relation_senders": [self.second_user_id]} + filter = {"related_by_senders": [self.second_user_id]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0]["event_id"], self.event_id_1) # Messages which third user reacted to. - filter = {"io.element.relation_senders": [self.third_user_id]} + filter = {"related_by_senders": [self.third_user_id]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0]["event_id"], self.event_id_2) # Messages which either user reacted to. - filter = { - "io.element.relation_senders": [self.second_user_id, self.third_user_id] - } + filter = {"related_by_senders": [self.second_user_id, self.third_user_id]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 2, chunk) self.assertCountEqual( @@ -2164,20 +2162,20 @@ class RelationsTestCase(unittest.HomeserverTestCase): def test_filter_relation_type(self) -> None: # Messages which have annotations. - filter = {"io.element.relation_types": [RelationTypes.ANNOTATION]} + filter = {"related_by_rel_types": [RelationTypes.ANNOTATION]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0]["event_id"], self.event_id_1) # Messages which have references. - filter = {"io.element.relation_types": [RelationTypes.REFERENCE]} + filter = {"related_by_rel_types": [RelationTypes.REFERENCE]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0]["event_id"], self.event_id_2) # Messages which have either annotations or references. filter = { - "io.element.relation_types": [ + "related_by_rel_types": [ RelationTypes.ANNOTATION, RelationTypes.REFERENCE, ] @@ -2191,8 +2189,8 @@ class RelationsTestCase(unittest.HomeserverTestCase): def test_filter_relation_senders_and_type(self) -> None: # Messages which second user reacted to. filter = { - "io.element.relation_senders": [self.second_user_id], - "io.element.relation_types": [RelationTypes.ANNOTATION], + "related_by_senders": [self.second_user_id], + "related_by_rel_types": [RelationTypes.ANNOTATION], } chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 6a1cf33054..eaa0d7d749 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -129,21 +129,19 @@ class PaginationTestCase(HomeserverTestCase): def test_filter_relation_senders(self): # Messages which second user reacted to. - filter = {"io.element.relation_senders": [self.second_user_id]} + filter = {"related_by_senders": [self.second_user_id]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0].event_id, self.event_id_1) # Messages which third user reacted to. - filter = {"io.element.relation_senders": [self.third_user_id]} + filter = {"related_by_senders": [self.third_user_id]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0].event_id, self.event_id_2) # Messages which either user reacted to. - filter = { - "io.element.relation_senders": [self.second_user_id, self.third_user_id] - } + filter = {"related_by_senders": [self.second_user_id, self.third_user_id]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 2, chunk) self.assertCountEqual( @@ -152,20 +150,20 @@ class PaginationTestCase(HomeserverTestCase): def test_filter_relation_type(self): # Messages which have annotations. - filter = {"io.element.relation_types": [RelationTypes.ANNOTATION]} + filter = {"related_by_rel_types": [RelationTypes.ANNOTATION]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0].event_id, self.event_id_1) # Messages which have references. - filter = {"io.element.relation_types": [RelationTypes.REFERENCE]} + filter = {"related_by_rel_types": [RelationTypes.REFERENCE]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0].event_id, self.event_id_2) # Messages which have either annotations or references. filter = { - "io.element.relation_types": [ + "related_by_rel_types": [ RelationTypes.ANNOTATION, RelationTypes.REFERENCE, ] @@ -179,8 +177,8 @@ class PaginationTestCase(HomeserverTestCase): def test_filter_relation_senders_and_type(self): # Messages which second user reacted to. filter = { - "io.element.relation_senders": [self.second_user_id], - "io.element.relation_types": [RelationTypes.ANNOTATION], + "related_by_senders": [self.second_user_id], + "related_by_rel_types": [RelationTypes.ANNOTATION], } chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) @@ -201,7 +199,7 @@ class PaginationTestCase(HomeserverTestCase): tok=self.second_tok, ) - filter = {"io.element.relation_senders": [self.second_user_id]} + filter = {"related_by_senders": [self.second_user_id]} chunk = self._filter_messages(filter) self.assertEqual(len(chunk), 1, chunk) self.assertEqual(chunk[0].event_id, self.event_id_1) -- cgit 1.5.1 From 14662d3c18217ba9e865b56203829e88d2ed4532 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 25 Mar 2022 09:21:06 -0500 Subject: Refactor `create_new_client_event` to use a new parameter, `state_event_ids`, which accurately describes the usage with MSC2716 instead of abusing `auth_event_ids` (#12083) Spawned from https://github.com/matrix-org/synapse/pull/10975#discussion_r813183430 Part of [MSC2716](https://github.com/matrix-org/matrix-spec-proposals/pull/2716) --- changelog.d/12083.misc | 1 + synapse/handlers/message.py | 63 +++++++++++++++------ synapse/handlers/room_batch.py | 112 ++++++++++++++++++++++++-------------- synapse/handlers/room_member.py | 31 +++++++++++ synapse/rest/client/room_batch.py | 23 +++++--- 5 files changed, 165 insertions(+), 65 deletions(-) create mode 100644 changelog.d/12083.misc (limited to 'synapse/handlers/message.py') diff --git a/changelog.d/12083.misc b/changelog.d/12083.misc new file mode 100644 index 0000000000..88fd6b92ee --- /dev/null +++ b/changelog.d/12083.misc @@ -0,0 +1 @@ +Refactor `create_new_client_event` to use a new parameter, `state_event_ids`, which accurately describes the usage with [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) instead of abusing `auth_event_ids`. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index f9544fe7fb..1c4fb4360a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -493,6 +493,7 @@ class EventCreationHandler: allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, require_consent: bool = True, outlier: bool = False, historical: bool = False, @@ -527,6 +528,15 @@ class EventCreationHandler: If non-None, prev_event_ids must also be provided. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is with insertion events which float at + the beginning of a historical batch and don't have any `prev_events` to + derive from; we add all of these state events as the explicit state so the + rest of the historical batch can inherit the same state and state_group. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. + require_consent: Whether to check if the requester has consented to the privacy policy. @@ -612,6 +622,7 @@ class EventCreationHandler: allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, depth=depth, ) @@ -772,6 +783,7 @@ class EventCreationHandler: allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, ratelimit: bool = True, txn_id: Optional[str] = None, ignore_shadow_ban: bool = False, @@ -801,6 +813,14 @@ class EventCreationHandler: based on the room state at the prev_events. If non-None, prev_event_ids must also be provided. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is with insertion events which float at + the beginning of a historical batch and don't have any `prev_events` to + derive from; we add all of these state events as the explicit state so the + rest of the historical batch can inherit the same state and state_group. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. ratelimit: Whether to rate limit this send. txn_id: The transaction ID. ignore_shadow_ban: True if shadow-banned users should be allowed to @@ -856,8 +876,10 @@ class EventCreationHandler: requester, event_dict, txn_id=txn_id, + allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, outlier=outlier, historical=historical, depth=depth, @@ -893,6 +915,7 @@ class EventCreationHandler: allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, depth: Optional[int] = None, ) -> Tuple[EventBase, EventContext]: """Create a new event for a local client @@ -915,6 +938,15 @@ class EventCreationHandler: Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is with insertion events which float at + the beginning of a historical batch and don't have any `prev_events` to + derive from; we add all of these state events as the explicit state so the + rest of the historical batch can inherit the same state and state_group. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. + depth: Override the depth used to order the event in the DAG. Should normally be set to None, which will cause the depth to be calculated based on the prev_events. @@ -922,31 +954,26 @@ class EventCreationHandler: Returns: Tuple of created event, context """ - # Strip down the auth_event_ids to only what we need to auth the event. + # Strip down the state_event_ids to only what we need to auth the event. # For example, we don't need extra m.room.member that don't match event.sender - full_state_ids_at_event = None - if auth_event_ids is not None: - # If auth events are provided, prev events must be also. + if state_event_ids is not None: + # Do a quick check to make sure that prev_event_ids is present to + # make the type-checking around `builder.build` happy. # prev_event_ids could be an empty array though. assert prev_event_ids is not None - # Copy the full auth state before it stripped down - full_state_ids_at_event = auth_event_ids.copy() - temp_event = await builder.build( prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, + auth_event_ids=state_event_ids, depth=depth, ) - auth_events = await self.store.get_events_as_list(auth_event_ids) + state_events = await self.store.get_events_as_list(state_event_ids) # Create a StateMap[str] - auth_event_state_map = { - (e.type, e.state_key): e.event_id for e in auth_events - } - # Actually strip down and use the necessary auth events + state_map = {(e.type, e.state_key): e.event_id for e in state_events} + # Actually strip down and only use the necessary auth events auth_event_ids = self._event_auth_handler.compute_auth_events( event=temp_event, - current_state_ids=auth_event_state_map, + current_state_ids=state_map, for_verification=False, ) @@ -989,12 +1016,16 @@ class EventCreationHandler: context = EventContext.for_outlier() elif ( event.type == EventTypes.MSC2716_INSERTION - and full_state_ids_at_event + and state_event_ids and builder.internal_metadata.is_historical() ): + # Add explicit state to the insertion event so it has state to derive + # from even though it's floating with no `prev_events`. The rest of + # the batch can derive from this state and state_group. + # # TODO(faster_joins): figure out how this works, and make sure that the # old state is complete. - old_state = await self.store.get_events_as_list(full_state_ids_at_event) + old_state = await self.store.get_events_as_list(state_event_ids) context = await self.state.compute_event_context(event, old_state=old_state) else: context = await self.state.compute_event_context(event) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index abbf7b7b27..a0255bd143 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -121,12 +121,11 @@ class RoomBatchHandler: return create_requester(user_id, app_service=app_service) - async def get_most_recent_auth_event_ids_from_event_id_list( + async def get_most_recent_full_state_ids_from_event_id_list( self, event_ids: List[str] ) -> List[str]: - """Find the most recent auth event ids (derived from state events) that - allowed that message to be sent. We will use this as a base - to auth our historical messages against. + """Find the most recent event_id and grab the full state at that event. + We will use this as a base to auth our historical messages against. Args: event_ids: List of event ID's to look at @@ -136,38 +135,37 @@ class RoomBatchHandler: """ ( - most_recent_prev_event_id, + most_recent_event_id, _, ) = await self.store.get_max_depth_of(event_ids) # mapping from (type, state_key) -> state_event_id prev_state_map = await self.state_store.get_state_ids_for_event( - most_recent_prev_event_id + most_recent_event_id ) # List of state event ID's - prev_state_ids = list(prev_state_map.values()) - auth_event_ids = prev_state_ids + full_state_ids = list(prev_state_map.values()) - return auth_event_ids + return full_state_ids async def persist_state_events_at_start( self, state_events_at_start: List[JsonDict], room_id: str, - initial_auth_event_ids: List[str], + initial_state_event_ids: List[str], app_service_requester: Requester, ) -> List[str]: """Takes all `state_events_at_start` event dictionaries and creates/persists - them as floating state events which don't resolve into the current room state. - They are floating because they reference a fake prev_event which doesn't connect - to the normal DAG at all. + them in a floating state event chain which don't resolve into the current room + state. They are floating because they reference no prev_events and are marked + as outliers which disconnects them from the normal DAG. Args: state_events_at_start: room_id: Room where you want the events persisted in. - initial_auth_event_ids: These will be the auth_events for the first - state event created. Each event created afterwards will be - added to the list of auth events for the next state event - created. + initial_state_event_ids: + The base set of state for the historical batch which the floating + state chain will derive from. This should probably be the state + from the `prev_event` defined by `/batch_send?prev_event_id=$abc`. app_service_requester: The requester of an application service. Returns: @@ -176,7 +174,7 @@ class RoomBatchHandler: assert app_service_requester.app_service state_event_ids_at_start = [] - auth_event_ids = initial_auth_event_ids.copy() + state_event_ids = initial_state_event_ids.copy() # Make the state events float off on their own by specifying no # prev_events for the first one in the chain so we don't have a bunch of @@ -189,9 +187,7 @@ class RoomBatchHandler: ) logger.debug( - "RoomBatchSendEventRestServlet inserting state_event=%s, auth_event_ids=%s", - state_event, - auth_event_ids, + "RoomBatchSendEventRestServlet inserting state_event=%s", state_event ) event_dict = { @@ -217,16 +213,26 @@ class RoomBatchHandler: room_id=room_id, action=membership, content=event_dict["content"], + # Mark as an outlier to disconnect it from the normal DAG + # and not show up between batches of history. outlier=True, historical=True, - # Only the first event in the chain should be floating. + # Only the first event in the state chain should be floating. # The rest should hang off each other in a chain. allow_no_prev_events=index == 0, prev_event_ids=prev_event_ids_for_state_chain, + # Since each state event is marked as an outlier, the + # `EventContext.for_outlier()` won't have any `state_ids` + # set and therefore can't derive any state even though the + # prev_events are set. Also since the first event in the + # state chain is floating with no `prev_events`, it can't + # derive state from anywhere automatically. So we need to + # set some state explicitly. + # # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. - auth_event_ids=auth_event_ids.copy(), + state_event_ids=state_event_ids.copy(), ) else: # TODO: Add some complement tests that adds state that is not member joins @@ -240,21 +246,31 @@ class RoomBatchHandler: state_event["sender"], app_service_requester.app_service ), event_dict, + # Mark as an outlier to disconnect it from the normal DAG + # and not show up between batches of history. outlier=True, historical=True, - # Only the first event in the chain should be floating. + # Only the first event in the state chain should be floating. # The rest should hang off each other in a chain. allow_no_prev_events=index == 0, prev_event_ids=prev_event_ids_for_state_chain, + # Since each state event is marked as an outlier, the + # `EventContext.for_outlier()` won't have any `state_ids` + # set and therefore can't derive any state even though the + # prev_events are set. Also since the first event in the + # state chain is floating with no `prev_events`, it can't + # derive state from anywhere automatically. So we need to + # set some state explicitly. + # # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. - auth_event_ids=auth_event_ids.copy(), + state_event_ids=state_event_ids.copy(), ) event_id = event.event_id state_event_ids_at_start.append(event_id) - auth_event_ids.append(event_id) + state_event_ids.append(event_id) # Connect all the state in a floating chain prev_event_ids_for_state_chain = [event_id] @@ -265,7 +281,7 @@ class RoomBatchHandler: events_to_create: List[JsonDict], room_id: str, inherited_depth: int, - auth_event_ids: List[str], + initial_state_event_ids: List[str], app_service_requester: Requester, ) -> List[str]: """Create and persists all events provided sequentially. Handles the @@ -281,8 +297,10 @@ class RoomBatchHandler: room_id: Room where you want the events persisted in. inherited_depth: The depth to create the events at (you will probably by calling inherit_depth_from_prev_ids(...)). - auth_event_ids: Define which events allow you to create the given - event in the room. + initial_state_event_ids: + This is used to set explicit state for the insertion event at + the start of the historical batch since it's floating with no + prev_events to derive state from automatically. app_service_requester: The requester of an application service. Returns: @@ -290,6 +308,11 @@ class RoomBatchHandler: """ assert app_service_requester.app_service + # We expect the first event in a historical batch to be an insertion event + assert events_to_create[0]["type"] == EventTypes.MSC2716_INSERTION + # We expect the last event in a historical batch to be an batch event + assert events_to_create[-1]["type"] == EventTypes.MSC2716_BATCH + # Make the historical event chain float off on its own by specifying no # prev_events for the first event in the chain which causes the HS to # ask for the state at the start of the batch later. @@ -321,11 +344,16 @@ class RoomBatchHandler: ev["sender"], app_service_requester.app_service ), event_dict, - # Only the first event in the chain should be floating. - # The rest should hang off each other in a chain. + # Only the first event (which is the insertion event) in the + # chain should be floating. The rest should hang off each other + # in a chain. allow_no_prev_events=index == 0, prev_event_ids=event_dict.get("prev_events"), - auth_event_ids=auth_event_ids, + # Since the first event (which is the insertion event) in the + # chain is floating with no `prev_events`, it can't derive state + # from anywhere automatically. So we need to set some state + # explicitly. + state_event_ids=initial_state_event_ids if index == 0 else None, historical=True, depth=inherited_depth, ) @@ -343,10 +371,9 @@ class RoomBatchHandler: ) logger.debug( - "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", + "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s", event, prev_event_ids, - auth_event_ids, ) events_to_persist.append((event, context)) @@ -376,12 +403,12 @@ class RoomBatchHandler: room_id: str, batch_id_to_connect_to: str, inherited_depth: int, - auth_event_ids: List[str], + initial_state_event_ids: List[str], app_service_requester: Requester, ) -> Tuple[List[str], str]: """ - Handles creating and persisting all of the historical events as well - as insertion and batch meta events to make the batch navigable in the DAG. + Handles creating and persisting all of the historical events as well as + insertion and batch meta events to make the batch navigable in the DAG. Args: events_to_create: List of historical events to create in JSON @@ -391,8 +418,13 @@ class RoomBatchHandler: want this batch to connect to. inherited_depth: The depth to create the events at (you will probably by calling inherit_depth_from_prev_ids(...)). - auth_event_ids: Define which events allow you to create the given - event in the room. + initial_state_event_ids: + This is used to set explicit state for the insertion event at + the start of the historical batch since it's floating with no + prev_events to derive state from automatically. This should + probably be the state from the `prev_event` defined by + `/batch_send?prev_event_id=$abc` plus the outcome of + `persist_state_events_at_start` app_service_requester: The requester of an application service. Returns: @@ -438,7 +470,7 @@ class RoomBatchHandler: events_to_create=events_to_create, room_id=room_id, inherited_depth=inherited_depth, - auth_event_ids=auth_event_ids, + initial_state_event_ids=initial_state_event_ids, app_service_requester=app_service_requester, ) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 7cbc484b06..a33fa34aa8 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -272,6 +272,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, txn_id: Optional[str] = None, ratelimit: bool = True, content: Optional[dict] = None, @@ -298,6 +299,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is the historical `state_events_at_start`; + since each is marked as an `outlier`, the `EventContext.for_outlier()` won't + have any `state_ids` set and therefore can't derive any state even though the + prev_events are set so we need to set them ourself via this argument. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. txn_id: ratelimit: @@ -353,6 +362,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, require_consent=require_consent, outlier=outlier, historical=historical, @@ -456,6 +466,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: """Update a user's membership in a room. @@ -487,6 +498,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is the historical `state_events_at_start`; + since each is marked as an `outlier`, the `EventContext.for_outlier()` won't + have any `state_ids` set and therefore can't derive any state even though the + prev_events are set so we need to set them ourself via this argument. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. Returns: A tuple of the new event ID and stream ID. @@ -526,6 +545,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, ) return result @@ -548,6 +568,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events: bool = False, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, + state_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: """Helper for update_membership. @@ -581,6 +602,14 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + state_event_ids: + The full state at a given event. This is used particularly by the MSC2716 + /batch_send endpoint. One use case is the historical `state_events_at_start`; + since each is marked as an `outlier`, the `EventContext.for_outlier()` won't + have any `state_ids` set and therefore can't derive any state even though the + prev_events are set so we need to set them ourself via this argument. + This should normally be left as None, which will cause the auth_event_ids + to be calculated based on the room state at the prev_events. Returns: A tuple of the new event ID and stream ID. @@ -708,6 +737,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): allow_no_prev_events=allow_no_prev_events, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, content=content, require_consent=require_consent, outlier=outlier, @@ -932,6 +962,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ratelimit=ratelimit, prev_event_ids=latest_event_ids, auth_event_ids=auth_event_ids, + state_event_ids=state_event_ids, content=content, require_consent=require_consent, outlier=outlier, diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 0048973e59..0780485322 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -124,14 +124,14 @@ class RoomBatchSendEventRestServlet(RestServlet): ) # For the event we are inserting next to (`prev_event_ids_from_query`), - # find the most recent auth events (derived from state events) that - # allowed that message to be sent. We will use that as a base - # to auth our historical messages against. - auth_event_ids = await self.room_batch_handler.get_most_recent_auth_event_ids_from_event_id_list( + # find the most recent state events that allowed that message to be + # sent. We will use that as a base to auth our historical messages + # against. + state_event_ids = await self.room_batch_handler.get_most_recent_full_state_ids_from_event_id_list( prev_event_ids_from_query ) - if not auth_event_ids: + if not state_event_ids: raise SynapseError( HTTPStatus.BAD_REQUEST, "No auth events found for given prev_event query parameter. The prev_event=%s probably does not exist." @@ -148,13 +148,13 @@ class RoomBatchSendEventRestServlet(RestServlet): await self.room_batch_handler.persist_state_events_at_start( state_events_at_start=body["state_events_at_start"], room_id=room_id, - initial_auth_event_ids=auth_event_ids, + initial_state_event_ids=state_event_ids, app_service_requester=requester, ) ) # Update our ongoing auth event ID list with all of the new state we # just created - auth_event_ids.extend(state_event_ids_at_start) + state_event_ids.extend(state_event_ids_at_start) inherited_depth = await self.room_batch_handler.inherit_depth_from_prev_ids( prev_event_ids_from_query @@ -196,7 +196,12 @@ class RoomBatchSendEventRestServlet(RestServlet): ), base_insertion_event_dict, prev_event_ids=base_insertion_event_dict.get("prev_events"), - auth_event_ids=auth_event_ids, + # Also set the explicit state here because we want to resolve + # any `state_events_at_start` here too. It's not strictly + # necessary to accomplish anything but if someone asks for the + # state at this point, we probably want to show them the + # historical state that was part of this batch. + state_event_ids=state_event_ids, historical=True, depth=inherited_depth, ) @@ -212,7 +217,7 @@ class RoomBatchSendEventRestServlet(RestServlet): room_id=room_id, batch_id_to_connect_to=batch_id_to_connect_to, inherited_depth=inherited_depth, - auth_event_ids=auth_event_ids, + initial_state_event_ids=state_event_ids, app_service_requester=requester, ) -- cgit 1.5.1