From 3b51c763ba5601e155e3e27a46cddf0370da83eb Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 5 Jan 2022 20:46:50 +0100 Subject: Fix get federation status of destination if no error occured (#11593) --- tests/rest/admin/test_federation.py | 75 ++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 18 deletions(-) (limited to 'tests/rest') diff --git a/tests/rest/admin/test_federation.py b/tests/rest/admin/test_federation.py index 742f194257..b70350b6f1 100644 --- a/tests/rest/admin/test_federation.py +++ b/tests/rest/admin/test_federation.py @@ -314,15 +314,12 @@ class FederationTestCase(unittest.HomeserverTestCase): retry_interval, last_successful_stream_ordering, ) in dest: - self.get_success( - self.store.set_destination_retry_timings( - destination, failure_ts, retry_last_ts, retry_interval - ) - ) - self.get_success( - self.store.set_destination_last_successful_stream_ordering( - destination, last_successful_stream_ordering - ) + self._create_destination( + destination, + failure_ts, + retry_last_ts, + retry_interval, + last_successful_stream_ordering, ) # order by default (destination) @@ -413,11 +410,9 @@ class FederationTestCase(unittest.HomeserverTestCase): _search_test(None, "foo") _search_test(None, "bar") - def test_get_single_destination(self) -> None: - """ - Get one specific destinations. - """ - self._create_destinations(5) + def test_get_single_destination_with_retry_timings(self) -> None: + """Get one specific destination which has retry timings.""" + self._create_destinations(1) channel = self.make_request( "GET", @@ -432,6 +427,53 @@ class FederationTestCase(unittest.HomeserverTestCase): # convert channel.json_body into a List self._check_fields([channel.json_body]) + def test_get_single_destination_no_retry_timings(self) -> None: + """Get one specific destination which has no retry timings.""" + self._create_destination("sub0.example.com") + + channel = self.make_request( + "GET", + self.url + "/sub0.example.com", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual("sub0.example.com", channel.json_body["destination"]) + self.assertEqual(0, channel.json_body["retry_last_ts"]) + self.assertEqual(0, channel.json_body["retry_interval"]) + self.assertIsNone(channel.json_body["failure_ts"]) + self.assertIsNone(channel.json_body["last_successful_stream_ordering"]) + + def _create_destination( + self, + destination: str, + failure_ts: Optional[int] = None, + retry_last_ts: int = 0, + retry_interval: int = 0, + last_successful_stream_ordering: Optional[int] = None, + ) -> None: + """Create one specific destination + + Args: + destination: the destination we have successfully sent to + failure_ts: when the server started failing (ms since epoch) + retry_last_ts: time of last retry attempt in unix epoch ms + retry_interval: how long until next retry in ms + last_successful_stream_ordering: the stream_ordering of the most + recent successfully-sent PDU + """ + self.get_success( + self.store.set_destination_retry_timings( + destination, failure_ts, retry_last_ts, retry_interval + ) + ) + if last_successful_stream_ordering is not None: + self.get_success( + self.store.set_destination_last_successful_stream_ordering( + destination, last_successful_stream_ordering + ) + ) + def _create_destinations(self, number_destinations: int) -> None: """Create a number of destinations @@ -440,10 +482,7 @@ class FederationTestCase(unittest.HomeserverTestCase): """ for i in range(0, number_destinations): dest = f"sub{i}.example.com" - self.get_success(self.store.set_destination_retry_timings(dest, 50, 50, 50)) - self.get_success( - self.store.set_destination_last_successful_stream_ordering(dest, 100) - ) + self._create_destination(dest, 50, 50, 50, 100) def _check_fields(self, content: List[JsonDict]) -> None: """Checks that the expected destination attributes are present in content -- cgit 1.5.1 From 6c68e874b1eafe75db51e46064c0d3af702b4358 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 6 Jan 2022 14:00:34 -0500 Subject: Remove the /send_relation endpoint. (#11682) This was removed from MSC2674 before that was approved and is not used by any known clients. --- changelog.d/11682.removal | 1 + synapse/rest/client/relations.py | 125 ++---------------------------------- tests/rest/client/test_relations.py | 26 ++++---- 3 files changed, 19 insertions(+), 133 deletions(-) create mode 100644 changelog.d/11682.removal (limited to 'tests/rest') diff --git a/changelog.d/11682.removal b/changelog.d/11682.removal new file mode 100644 index 0000000000..50bdf35b20 --- /dev/null +++ b/changelog.d/11682.removal @@ -0,0 +1 @@ +Remove the unstable `/send_relation` endpoint. diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index 5815650ee6..3823498012 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -19,28 +19,20 @@ any time to reflect changes in the MSC. """ import logging -from typing import TYPE_CHECKING, Awaitable, Optional, Tuple +from typing import TYPE_CHECKING, Optional, Tuple -from synapse.api.constants import EventTypes, RelationTypes -from synapse.api.errors import ShadowBanError, SynapseError +from synapse.api.constants import RelationTypes +from synapse.api.errors import SynapseError from synapse.http.server import HttpServer -from synapse.http.servlet import ( - RestServlet, - parse_integer, - parse_json_object_from_request, - parse_string, -) +from synapse.http.servlet import RestServlet, parse_integer, parse_string from synapse.http.site import SynapseRequest -from synapse.rest.client.transactions import HttpTransactionCache +from synapse.rest.client._base import client_patterns from synapse.storage.relations import ( AggregationPaginationToken, PaginationChunk, RelationPaginationToken, ) from synapse.types import JsonDict -from synapse.util.stringutils import random_string - -from ._base import client_patterns if TYPE_CHECKING: from synapse.server import HomeServer @@ -48,112 +40,6 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class RelationSendServlet(RestServlet): - """Helper API for sending events that have relation data. - - Example API shape to send a šŸ‘ reaction to a room: - - POST /rooms/!foo/send_relation/$bar/m.annotation/m.reaction?key=%F0%9F%91%8D - {} - - { - "event_id": "$foobar" - } - """ - - PATTERN = ( - "/rooms/(?P[^/]*)/send_relation" - "/(?P[^/]*)/(?P[^/]*)/(?P[^/]*)" - ) - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.event_creation_handler = hs.get_event_creation_handler() - self.txns = HttpTransactionCache(hs) - - def register(self, http_server: HttpServer) -> None: - http_server.register_paths( - "POST", - client_patterns(self.PATTERN + "$", releases=()), - self.on_PUT_or_POST, - self.__class__.__name__, - ) - http_server.register_paths( - "PUT", - client_patterns(self.PATTERN + "/(?P[^/]*)$", releases=()), - self.on_PUT, - self.__class__.__name__, - ) - - def on_PUT( - self, - request: SynapseRequest, - room_id: str, - parent_id: str, - relation_type: str, - event_type: str, - txn_id: Optional[str] = None, - ) -> Awaitable[Tuple[int, JsonDict]]: - return self.txns.fetch_or_execute_request( - request, - self.on_PUT_or_POST, - request, - room_id, - parent_id, - relation_type, - event_type, - txn_id, - ) - - async def on_PUT_or_POST( - self, - request: SynapseRequest, - room_id: str, - parent_id: str, - relation_type: str, - event_type: str, - txn_id: Optional[str] = None, - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - - if event_type == EventTypes.Member: - # Add relations to a membership is meaningless, so we just deny it - # at the CS API rather than trying to handle it correctly. - raise SynapseError(400, "Cannot send member events with relations") - - content = parse_json_object_from_request(request) - - aggregation_key = parse_string(request, "key", encoding="utf-8") - - content["m.relates_to"] = { - "event_id": parent_id, - "rel_type": relation_type, - } - if aggregation_key is not None: - content["m.relates_to"]["key"] = aggregation_key - - event_dict = { - "type": event_type, - "content": content, - "room_id": room_id, - "sender": requester.user.to_string(), - } - - try: - ( - event, - _, - ) = await self.event_creation_handler.create_and_send_nonmember_event( - requester, event_dict=event_dict, txn_id=txn_id - ) - event_id = event.event_id - except ShadowBanError: - event_id = "$" + random_string(43) - - return 200, {"event_id": event_id} - - class RelationPaginationServlet(RestServlet): """API to paginate relations on an event by topological ordering, optionally filtered by relation type and event type. @@ -431,7 +317,6 @@ class RelationAggregationGroupPaginationServlet(RestServlet): def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: - RelationSendServlet(hs).register(http_server) RelationPaginationServlet(hs).register(http_server) RelationAggregationPaginationServlet(hs).register(http_server) RelationAggregationGroupPaginationServlet(hs).register(http_server) diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index c026d526ef..ff4e81d069 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -93,11 +93,6 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel.json_body, ) - def test_deny_membership(self): - """Test that we deny relations on membership events""" - channel = self._send_relation(RelationTypes.ANNOTATION, EventTypes.Member) - self.assertEquals(400, channel.code, channel.json_body) - def test_deny_invalid_event(self): """Test that we deny relations on non-existant events""" channel = self._send_relation( @@ -1119,7 +1114,8 @@ class RelationsTestCase(unittest.HomeserverTestCase): relation_type: One of `RelationTypes` event_type: The type of the event to create key: The aggregation key used for m.annotation relation type. - content: The content of the created event. + content: The content of the created event. Will be modified to configure + the m.relates_to key based on the other provided parameters. access_token: The access token used to send the relation, defaults to `self.user_token` parent_id: The event_id this relation relates to. If None, then self.parent_id @@ -1130,17 +1126,21 @@ class RelationsTestCase(unittest.HomeserverTestCase): if not access_token: access_token = self.user_token - query = "" - if key: - query = "?key=" + urllib.parse.quote_plus(key.encode("utf-8")) - original_id = parent_id if parent_id else self.parent_id + if content is None: + content = {} + content["m.relates_to"] = { + "event_id": original_id, + "rel_type": relation_type, + } + if key is not None: + content["m.relates_to"]["key"] = key + channel = self.make_request( "POST", - "/_matrix/client/unstable/rooms/%s/send_relation/%s/%s/%s%s" - % (self.room, original_id, relation_type, event_type, query), - content or {}, + f"/_matrix/client/v3/rooms/{self.room}/send/{event_type}", + content, access_token=access_token, ) return channel -- cgit 1.5.1 From 6bf81a7a61d8d5248be5def955104c44fcb78dae Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 7 Jan 2022 09:10:46 -0500 Subject: Bundle aggregations outside of the serialization method. (#11612) This makes the serialization of events synchronous (and it no longer access the database), but we must manually calculate and provide the bundled aggregations. Overall this should cause no change in behavior, but is prep work for other improvements. --- changelog.d/11612.misc | 1 + synapse/events/utils.py | 126 ++++++++------------------- synapse/handlers/events.py | 2 +- synapse/handlers/initial_sync.py | 16 ++-- synapse/handlers/message.py | 2 +- synapse/handlers/pagination.py | 8 +- synapse/handlers/room.py | 10 +++ synapse/handlers/search.py | 10 +-- synapse/rest/admin/rooms.py | 16 ++-- synapse/rest/client/events.py | 2 +- synapse/rest/client/notifications.py | 2 +- synapse/rest/client/relations.py | 11 +-- synapse/rest/client/room.py | 28 +++--- synapse/rest/client/sync.py | 39 +++++---- synapse/server.py | 2 +- synapse/storage/databases/main/relations.py | 128 +++++++++++++++++++++++++++- tests/rest/client/test_retention.py | 2 +- 17 files changed, 249 insertions(+), 156 deletions(-) create mode 100644 changelog.d/11612.misc (limited to 'tests/rest') diff --git a/changelog.d/11612.misc b/changelog.d/11612.misc new file mode 100644 index 0000000000..2d886169c5 --- /dev/null +++ b/changelog.d/11612.misc @@ -0,0 +1 @@ +Avoid database access in the JSON serialization process. diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 2038e72924..de0e0c1731 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -14,17 +14,7 @@ # limitations under the License. import collections.abc import re -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - Iterable, - List, - Mapping, - Optional, - Union, -) +from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Union from frozendict import frozendict @@ -32,14 +22,10 @@ from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersion from synapse.types import JsonDict -from synapse.util.async_helpers import yieldable_gather_results from synapse.util.frozenutils import unfreeze from . import EventBase -if TYPE_CHECKING: - from synapse.server import HomeServer - # Split strings on "." but not "\." This uses a negative lookbehind assertion for '\' # (? JsonDict: """Serializes a single event. @@ -418,66 +399,41 @@ class EventClientSerializer: serialized_event = serialize_event(event, time_now, **kwargs) # Check if there are any bundled aggregations to include with the event. - # - # Do not bundle aggregations if any of the following at true: - # - # * Support is disabled via the configuration or the caller. - # * The event is a state event. - # * The event has been redacted. - if ( - self._msc1849_enabled - and bundle_aggregations - and not event.is_state() - and not event.internal_metadata.is_redacted() - ): - await self._injected_bundled_aggregations(event, time_now, serialized_event) + if bundle_aggregations: + event_aggregations = bundle_aggregations.get(event.event_id) + if event_aggregations: + self._injected_bundled_aggregations( + event, + time_now, + bundle_aggregations[event.event_id], + serialized_event, + ) return serialized_event - async def _injected_bundled_aggregations( - self, event: EventBase, time_now: int, serialized_event: JsonDict + def _injected_bundled_aggregations( + self, + event: EventBase, + time_now: int, + aggregations: JsonDict, + serialized_event: JsonDict, ) -> None: """Potentially injects bundled aggregations into the unsigned portion of the serialized event. Args: event: The event being serialized. time_now: The current time in milliseconds + aggregations: The bundled aggregation to serialize. serialized_event: The serialized event which may be modified. """ - # Do not bundle aggregations for an event which represents an edit or an - # annotation. It does not make sense for them to have related events. - relates_to = event.content.get("m.relates_to") - if isinstance(relates_to, (dict, frozendict)): - relation_type = relates_to.get("rel_type") - if relation_type in (RelationTypes.ANNOTATION, RelationTypes.REPLACE): - return - - event_id = event.event_id - room_id = event.room_id - - # The bundled aggregations to include. - aggregations = {} - - annotations = await self.store.get_aggregation_groups_for_event( - event_id, room_id - ) - if annotations.chunk: - aggregations[RelationTypes.ANNOTATION] = annotations.to_dict() + # Make a copy in-case the object is cached. + aggregations = aggregations.copy() - references = await self.store.get_relations_for_event( - event_id, room_id, RelationTypes.REFERENCE, direction="f" - ) - if references.chunk: - aggregations[RelationTypes.REFERENCE] = references.to_dict() - - edit = None - if event.type == EventTypes.Message: - edit = await self.store.get_applicable_edit(event_id, room_id) - - if edit: + if RelationTypes.REPLACE in aggregations: # If there is an edit replace the content, preserving existing # relations. + edit = aggregations[RelationTypes.REPLACE] # Ensure we take copies of the edit content, otherwise we risk modifying # the original event. @@ -502,27 +458,19 @@ class EventClientSerializer: } # If this event is the start of a thread, include a summary of the replies. - if self._msc3440_enabled: - ( - thread_count, - latest_thread_event, - ) = await self.store.get_thread_summary(event_id, room_id) - if latest_thread_event: - aggregations[RelationTypes.THREAD] = { - # Don't bundle aggregations as this could recurse forever. - "latest_event": await self.serialize_event( - latest_thread_event, time_now, bundle_aggregations=False - ), - "count": thread_count, - } - - # If any bundled aggregations were found, include them. - if aggregations: - serialized_event["unsigned"].setdefault("m.relations", {}).update( - aggregations + if RelationTypes.THREAD in aggregations: + # Serialize the latest thread event. + latest_thread_event = aggregations[RelationTypes.THREAD]["latest_event"] + + # Don't bundle aggregations as this could recurse forever. + aggregations[RelationTypes.THREAD]["latest_event"] = self.serialize_event( + latest_thread_event, time_now, bundle_aggregations=None ) - async def serialize_events( + # Include the bundled aggregations in the event. + serialized_event["unsigned"].setdefault("m.relations", {}).update(aggregations) + + def serialize_events( self, events: Iterable[Union[JsonDict, EventBase]], time_now: int, **kwargs: Any ) -> List[JsonDict]: """Serializes multiple events. @@ -535,9 +483,9 @@ class EventClientSerializer: Returns: The list of serialized events """ - return await yieldable_gather_results( - self.serialize_event, events, time_now=time_now, **kwargs - ) + return [ + self.serialize_event(event, time_now=time_now, **kwargs) for event in events + ] def copy_power_levels_contents( diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index 1b996c420d..a3add8a586 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -119,7 +119,7 @@ class EventStreamHandler: events.extend(to_add) - chunks = await self._event_serializer.serialize_events( + chunks = self._event_serializer.serialize_events( events, time_now, as_client_event=as_client_event, diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 601bab67f9..346a06ff49 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -170,7 +170,7 @@ class InitialSyncHandler: d["inviter"] = event.sender invite_event = await self.store.get_event(event.event_id) - d["invite"] = await self._event_serializer.serialize_event( + d["invite"] = self._event_serializer.serialize_event( invite_event, time_now, as_client_event=as_client_event, @@ -222,7 +222,7 @@ class InitialSyncHandler: d["messages"] = { "chunk": ( - await self._event_serializer.serialize_events( + self._event_serializer.serialize_events( messages, time_now=time_now, as_client_event=as_client_event, @@ -232,7 +232,7 @@ class InitialSyncHandler: "end": await end_token.to_string(self.store), } - d["state"] = await self._event_serializer.serialize_events( + d["state"] = self._event_serializer.serialize_events( current_state.values(), time_now=time_now, as_client_event=as_client_event, @@ -376,16 +376,14 @@ class InitialSyncHandler: "messages": { "chunk": ( # Don't bundle aggregations as this is a deprecated API. - await self._event_serializer.serialize_events(messages, time_now) + self._event_serializer.serialize_events(messages, time_now) ), "start": await start_token.to_string(self.store), "end": await end_token.to_string(self.store), }, "state": ( # Don't bundle aggregations as this is a deprecated API. - await self._event_serializer.serialize_events( - room_state.values(), time_now - ) + self._event_serializer.serialize_events(room_state.values(), time_now) ), "presence": [], "receipts": [], @@ -404,7 +402,7 @@ class InitialSyncHandler: # TODO: These concurrently time_now = self.clock.time_msec() # Don't bundle aggregations as this is a deprecated API. - state = await self._event_serializer.serialize_events( + state = self._event_serializer.serialize_events( current_state.values(), time_now ) @@ -480,7 +478,7 @@ class InitialSyncHandler: "messages": { "chunk": ( # Don't bundle aggregations as this is a deprecated API. - await self._event_serializer.serialize_events(messages, time_now) + self._event_serializer.serialize_events(messages, time_now) ), "start": await start_token.to_string(self.store), "end": await end_token.to_string(self.store), diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5e3d3886eb..b37250aa38 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -246,7 +246,7 @@ class MessageHandler: room_state = room_state_events[membership_event_id] now = self.clock.time_msec() - events = await self._event_serializer.serialize_events(room_state.values(), now) + events = self._event_serializer.serialize_events(room_state.values(), now) return events async def get_joined_members(self, requester: Requester, room_id: str) -> dict: diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 7469cc55a2..472688f045 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -537,14 +537,16 @@ class PaginationHandler: state_dict = await self.store.get_events(list(state_ids.values())) state = state_dict.values() + aggregations = await self.store.get_bundled_aggregations(events) + time_now = self.clock.time_msec() chunk = { "chunk": ( - await self._event_serializer.serialize_events( + self._event_serializer.serialize_events( events, time_now, - bundle_aggregations=True, + bundle_aggregations=aggregations, as_client_event=as_client_event, ) ), @@ -553,7 +555,7 @@ class PaginationHandler: } if state: - chunk["state"] = await self._event_serializer.serialize_events( + chunk["state"] = self._event_serializer.serialize_events( state, time_now, as_client_event=as_client_event ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 3d3a0f6ac3..3d47163f25 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1181,6 +1181,16 @@ class RoomContextHandler: # `filtered` rather than the event we retrieved from the datastore. results["event"] = filtered[0] + # Fetch the aggregations. + aggregations = await self.store.get_bundled_aggregations([results["event"]]) + aggregations.update( + await self.store.get_bundled_aggregations(results["events_before"]) + ) + aggregations.update( + await self.store.get_bundled_aggregations(results["events_after"]) + ) + results["aggregations"] = aggregations + if results["events_after"]: last_event_id = results["events_after"][-1].event_id else: diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index ab7eaab2fb..0b153a6822 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -420,10 +420,10 @@ class SearchHandler: time_now = self.clock.time_msec() for context in contexts.values(): - context["events_before"] = await self._event_serializer.serialize_events( + context["events_before"] = self._event_serializer.serialize_events( context["events_before"], time_now ) - context["events_after"] = await self._event_serializer.serialize_events( + context["events_after"] = self._event_serializer.serialize_events( context["events_after"], time_now ) @@ -441,9 +441,7 @@ class SearchHandler: results.append( { "rank": rank_map[e.event_id], - "result": ( - await self._event_serializer.serialize_event(e, time_now) - ), + "result": self._event_serializer.serialize_event(e, time_now), "context": contexts.get(e.event_id, {}), } ) @@ -457,7 +455,7 @@ class SearchHandler: if state_results: s = {} for room_id, state_events in state_results.items(): - s[room_id] = await self._event_serializer.serialize_events( + s[room_id] = self._event_serializer.serialize_events( state_events, time_now ) diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 6030373ebc..2e714ac87b 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -424,7 +424,7 @@ class RoomStateRestServlet(RestServlet): event_ids = await self.store.get_current_state_ids(room_id) events = await self.store.get_events(event_ids.values()) now = self.clock.time_msec() - room_state = await self._event_serializer.serialize_events(events.values(), now) + room_state = self._event_serializer.serialize_events(events.values(), now) ret = {"state": room_state} return HTTPStatus.OK, ret @@ -744,22 +744,22 @@ class RoomEventContextServlet(RestServlet): ) time_now = self.clock.time_msec() - results["events_before"] = await self._event_serializer.serialize_events( + results["events_before"] = self._event_serializer.serialize_events( results["events_before"], time_now, - bundle_aggregations=True, + bundle_aggregations=results["aggregations"], ) - results["event"] = await self._event_serializer.serialize_event( + results["event"] = self._event_serializer.serialize_event( results["event"], time_now, - bundle_aggregations=True, + bundle_aggregations=results["aggregations"], ) - results["events_after"] = await self._event_serializer.serialize_events( + results["events_after"] = self._event_serializer.serialize_events( results["events_after"], time_now, - bundle_aggregations=True, + bundle_aggregations=results["aggregations"], ) - results["state"] = await self._event_serializer.serialize_events( + results["state"] = self._event_serializer.serialize_events( results["state"], time_now ) diff --git a/synapse/rest/client/events.py b/synapse/rest/client/events.py index 13b72a045a..672c821061 100644 --- a/synapse/rest/client/events.py +++ b/synapse/rest/client/events.py @@ -91,7 +91,7 @@ class EventRestServlet(RestServlet): time_now = self.clock.time_msec() if event: - result = await self._event_serializer.serialize_event(event, time_now) + result = self._event_serializer.serialize_event(event, time_now) return 200, result else: return 404, "Event not found." diff --git a/synapse/rest/client/notifications.py b/synapse/rest/client/notifications.py index acd0c9e135..8e427a96a3 100644 --- a/synapse/rest/client/notifications.py +++ b/synapse/rest/client/notifications.py @@ -72,7 +72,7 @@ class NotificationsServlet(RestServlet): "actions": pa.actions, "ts": pa.received_ts, "event": ( - await self._event_serializer.serialize_event( + self._event_serializer.serialize_event( notif_events[pa.event_id], self.clock.time_msec(), event_format=format_event_for_client_v2_without_room_id, diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index 3823498012..37d949a71e 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -113,13 +113,14 @@ class RelationPaginationServlet(RestServlet): now = self.clock.time_msec() # Do not bundle aggregations when retrieving the original event because # we want the content before relations are applied to it. - original_event = await self._event_serializer.serialize_event( - event, now, bundle_aggregations=False + original_event = self._event_serializer.serialize_event( + event, now, bundle_aggregations=None ) # The relations returned for the requested event do include their # bundled aggregations. - serialized_events = await self._event_serializer.serialize_events( - events, now, bundle_aggregations=True + aggregations = await self.store.get_bundled_aggregations(events) + serialized_events = self._event_serializer.serialize_events( + events, now, bundle_aggregations=aggregations ) return_value = pagination_chunk.to_dict() @@ -308,7 +309,7 @@ class RelationAggregationGroupPaginationServlet(RestServlet): ) now = self.clock.time_msec() - serialized_events = await self._event_serializer.serialize_events(events, now) + serialized_events = self._event_serializer.serialize_events(events, now) return_value = result.to_dict() return_value["chunk"] = serialized_events diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 40330749e5..da6014900a 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -642,6 +642,7 @@ class RoomEventServlet(RestServlet): def __init__(self, hs: "HomeServer"): super().__init__() self.clock = hs.get_clock() + self._store = hs.get_datastore() self.event_handler = hs.get_event_handler() self._event_serializer = hs.get_event_client_serializer() self.auth = hs.get_auth() @@ -660,10 +661,13 @@ class RoomEventServlet(RestServlet): # https://matrix.org/docs/spec/client_server/r0.5.0#get-matrix-client-r0-rooms-roomid-event-eventid raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) - time_now = self.clock.time_msec() if event: - event_dict = await self._event_serializer.serialize_event( - event, time_now, bundle_aggregations=True + # Ensure there are bundled aggregations available. + aggregations = await self._store.get_bundled_aggregations([event]) + + time_now = self.clock.time_msec() + event_dict = self._event_serializer.serialize_event( + event, time_now, bundle_aggregations=aggregations ) return 200, event_dict @@ -708,16 +712,20 @@ class RoomEventContextServlet(RestServlet): raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) time_now = self.clock.time_msec() - results["events_before"] = await self._event_serializer.serialize_events( - results["events_before"], time_now, bundle_aggregations=True + results["events_before"] = self._event_serializer.serialize_events( + results["events_before"], + time_now, + bundle_aggregations=results["aggregations"], ) - results["event"] = await self._event_serializer.serialize_event( - results["event"], time_now, bundle_aggregations=True + results["event"] = self._event_serializer.serialize_event( + results["event"], time_now, bundle_aggregations=results["aggregations"] ) - results["events_after"] = await self._event_serializer.serialize_events( - results["events_after"], time_now, bundle_aggregations=True + results["events_after"] = self._event_serializer.serialize_events( + results["events_after"], + time_now, + bundle_aggregations=results["aggregations"], ) - results["state"] = await self._event_serializer.serialize_events( + results["state"] = self._event_serializer.serialize_events( results["state"], time_now ) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index e99a943d0d..a3e57e4b20 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -17,7 +17,6 @@ from collections import defaultdict from typing import ( TYPE_CHECKING, Any, - Awaitable, Callable, Dict, Iterable, @@ -395,7 +394,7 @@ class SyncRestServlet(RestServlet): """ invited = {} for room in rooms: - invite = await self._event_serializer.serialize_event( + invite = self._event_serializer.serialize_event( room.invite, time_now, token_id=token_id, @@ -432,7 +431,7 @@ class SyncRestServlet(RestServlet): """ knocked = {} for room in rooms: - knock = await self._event_serializer.serialize_event( + knock = self._event_serializer.serialize_event( room.knock, time_now, token_id=token_id, @@ -525,21 +524,14 @@ class SyncRestServlet(RestServlet): The room, encoded in our response format """ - def serialize(events: Iterable[EventBase]) -> Awaitable[List[JsonDict]]: + def serialize( + events: Iterable[EventBase], + aggregations: Optional[Dict[str, Dict[str, Any]]] = None, + ) -> List[JsonDict]: return self._event_serializer.serialize_events( events, time_now=time_now, - # Don't bother to bundle aggregations if the timeline is unlimited, - # as clients will have all the necessary information. - # bundle_aggregations=room.timeline.limited, - # - # richvdh 2021-12-15: disable this temporarily as it has too high an - # overhead for initialsyncs. We need to figure out a way that the - # bundling can be done *before* the events are stored in the - # SyncResponseCache so that this part can be synchronous. - # - # Ensure to re-enable the test at tests/rest/client/test_relations.py::RelationsTestCase.test_bundled_aggregations. - bundle_aggregations=False, + bundle_aggregations=aggregations, token_id=token_id, event_format=event_formatter, only_event_fields=only_fields, @@ -561,8 +553,21 @@ class SyncRestServlet(RestServlet): event.room_id, ) - serialized_state = await serialize(state_events) - serialized_timeline = await serialize(timeline_events) + serialized_state = serialize(state_events) + # Don't bother to bundle aggregations if the timeline is unlimited, + # as clients will have all the necessary information. + # bundle_aggregations=room.timeline.limited, + # + # richvdh 2021-12-15: disable this temporarily as it has too high an + # overhead for initialsyncs. We need to figure out a way that the + # bundling can be done *before* the events are stored in the + # SyncResponseCache so that this part can be synchronous. + # + # Ensure to re-enable the test at tests/rest/client/test_relations.py::RelationsTestCase.test_bundled_aggregations. + # if room.timeline.limited: + # aggregations = await self.store.get_bundled_aggregations(timeline_events) + aggregations = None + serialized_timeline = serialize(timeline_events, aggregations) account_data = room.account_data diff --git a/synapse/server.py b/synapse/server.py index 185e40e4da..3032f0b738 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -759,7 +759,7 @@ class HomeServer(metaclass=abc.ABCMeta): @cache_in_self def get_event_client_serializer(self) -> EventClientSerializer: - return EventClientSerializer(self) + return EventClientSerializer() @cache_in_self def get_password_policy_handler(self) -> PasswordPolicyHandler: diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 4ff6aed253..c6c4bd18da 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -13,14 +13,30 @@ # limitations under the License. import logging -from typing import List, Optional, Tuple, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, + cast, +) import attr +from frozendict import frozendict -from synapse.api.constants import RelationTypes +from synapse.api.constants import EventTypes, RelationTypes from synapse.events import EventBase from synapse.storage._base import SQLBaseStore -from synapse.storage.database import LoggingTransaction, make_in_list_sql_clause +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, + make_in_list_sql_clause, +) from synapse.storage.databases.main.stream import generate_pagination_where_clause from synapse.storage.relations import ( AggregationPaginationToken, @@ -29,10 +45,24 @@ from synapse.storage.relations import ( ) from synapse.util.caches.descriptors import cached +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) class RelationsWorkerStore(SQLBaseStore): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + self._msc1849_enabled = hs.config.experimental.msc1849_enabled + self._msc3440_enabled = hs.config.experimental.msc3440_enabled + @cached(tree=True) async def get_relations_for_event( self, @@ -515,6 +545,98 @@ class RelationsWorkerStore(SQLBaseStore): "get_if_user_has_annotated_event", _get_if_user_has_annotated_event ) + async def _get_bundled_aggregation_for_event( + self, event: EventBase + ) -> Optional[Dict[str, Any]]: + """Generate bundled aggregations for an event. + + Note that this does not use a cache, but depends on cached methods. + + Args: + event: The event to calculate bundled aggregations for. + + Returns: + The bundled aggregations for an event, if bundled aggregations are + enabled and the event can have bundled aggregations. + """ + # State events and redacted events do not get bundled aggregations. + if event.is_state() or event.internal_metadata.is_redacted(): + return None + + # Do not bundle aggregations for an event which represents an edit or an + # annotation. It does not make sense for them to have related events. + relates_to = event.content.get("m.relates_to") + if isinstance(relates_to, (dict, frozendict)): + relation_type = relates_to.get("rel_type") + if relation_type in (RelationTypes.ANNOTATION, RelationTypes.REPLACE): + return None + + event_id = event.event_id + room_id = event.room_id + + # The bundled aggregations to include, a mapping of relation type to a + # type-specific value. Some types include the direct return type here + # while others need more processing during serialization. + aggregations: Dict[str, Any] = {} + + annotations = await self.get_aggregation_groups_for_event(event_id, room_id) + if annotations.chunk: + aggregations[RelationTypes.ANNOTATION] = annotations.to_dict() + + references = await self.get_relations_for_event( + event_id, room_id, RelationTypes.REFERENCE, direction="f" + ) + if references.chunk: + aggregations[RelationTypes.REFERENCE] = references.to_dict() + + edit = None + if event.type == EventTypes.Message: + edit = await self.get_applicable_edit(event_id, room_id) + + if edit: + aggregations[RelationTypes.REPLACE] = edit + + # If this event is the start of a thread, include a summary of the replies. + if self._msc3440_enabled: + ( + thread_count, + latest_thread_event, + ) = await self.get_thread_summary(event_id, room_id) + if latest_thread_event: + aggregations[RelationTypes.THREAD] = { + # Don't bundle aggregations as this could recurse forever. + "latest_event": latest_thread_event, + "count": thread_count, + } + + # Store the bundled aggregations in the event metadata for later use. + return aggregations + + async def get_bundled_aggregations( + self, events: Iterable[EventBase] + ) -> Dict[str, Dict[str, Any]]: + """Generate bundled aggregations for events. + + Args: + events: The iterable of events to calculate bundled aggregations for. + + Returns: + A map of event ID to the bundled aggregation for the event. Not all + events may have bundled aggregations in the results. + """ + # If bundled aggregations are disabled, nothing to do. + if not self._msc1849_enabled: + return {} + + # TODO Parallelize. + results = {} + for event in events: + event_result = await self._get_bundled_aggregation_for_event(event) + if event_result is not None: + results[event.event_id] = event_result + + return results + class RelationsStore(RelationsWorkerStore): pass diff --git a/tests/rest/client/test_retention.py b/tests/rest/client/test_retention.py index b58452195a..fe5b536d97 100644 --- a/tests/rest/client/test_retention.py +++ b/tests/rest/client/test_retention.py @@ -228,7 +228,7 @@ class RetentionTestCase(unittest.HomeserverTestCase): self.assertIsNotNone(event) time_now = self.clock.time_msec() - serialized = self.get_success(self.serializer.serialize_event(event, time_now)) + serialized = self.serializer.serialize_event(event, time_now) return serialized -- cgit 1.5.1 From 2bb4bd126946df46aacf6849f0acb01e78f7d807 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 7 Jan 2022 16:43:21 +0000 Subject: Test that bans win a join against a race when computing `/sync` response (#11701) --- changelog.d/11701.misc | 1 + tests/handlers/test_sync.py | 97 +++++++++++++++++++++++++++++++++++++++++++-- tests/rest/client/utils.py | 10 +++++ 3 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 changelog.d/11701.misc (limited to 'tests/rest') diff --git a/changelog.d/11701.misc b/changelog.d/11701.misc new file mode 100644 index 0000000000..68905e0412 --- /dev/null +++ b/changelog.d/11701.misc @@ -0,0 +1 @@ +Add a test for [an edge case](https://github.com/matrix-org/synapse/pull/11532#discussion_r769104461) in the `/sync` logic. \ No newline at end of file diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py index 638186f173..07a760e91a 100644 --- a/tests/handlers/test_sync.py +++ b/tests/handlers/test_sync.py @@ -11,15 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from typing import Optional -from unittest.mock import Mock +from unittest.mock import MagicMock, Mock, patch from synapse.api.constants import EventTypes, JoinRules from synapse.api.errors import Codes, ResourceLimitError from synapse.api.filtering import Filtering from synapse.api.room_versions import RoomVersions -from synapse.handlers.sync import SyncConfig +from synapse.handlers.sync import SyncConfig, SyncResult from synapse.rest import admin from synapse.rest.client import knock, login, room from synapse.server import HomeServer @@ -27,6 +26,7 @@ from synapse.types import UserID, create_requester import tests.unittest import tests.utils +from tests.test_utils import make_awaitable class SyncTestCase(tests.unittest.HomeserverTestCase): @@ -186,6 +186,97 @@ class SyncTestCase(tests.unittest.HomeserverTestCase): self.assertNotIn(invite_room, [r.room_id for r in result.invited]) self.assertNotIn(knock_room, [r.room_id for r in result.knocked]) + def test_ban_wins_race_with_join(self): + """Rooms shouldn't appear under "joined" if a join loses a race to a ban. + + A complicated edge case. Imagine the following scenario: + + * you attempt to join a room + * racing with that is a ban which comes in over federation, which ends up with + an earlier stream_ordering than the join. + * you get a sync response with a sync token which is _after_ the ban, but before + the join + * now your join lands; it is a valid event because its `prev_event`s predate the + ban, but will not make it into current_state_events (because bans win over + joins in state res, essentially). + * When we do a sync from the incremental sync, the only event in the timeline + is your join ... and yet you aren't joined. + + The ban coming in over federation isn't crucial for this behaviour; the key + requirements are: + 1. the homeserver generates a join event with prev_events that precede the ban + (so that it passes the "are you banned" test) + 2. the join event has a stream_ordering after that of the ban. + + We use monkeypatching to artificially trigger condition (1). + """ + # A local user Alice creates a room. + owner = self.register_user("alice", "password") + owner_tok = self.login(owner, "password") + room_id = self.helper.create_room_as(owner, is_public=True, tok=owner_tok) + + # Do a sync as Alice to get the latest event in the room. + alice_sync_result: SyncResult = self.get_success( + self.sync_handler.wait_for_sync_for_user( + create_requester(owner), generate_sync_config(owner) + ) + ) + self.assertEqual(len(alice_sync_result.joined), 1) + self.assertEqual(alice_sync_result.joined[0].room_id, room_id) + last_room_creation_event_id = ( + alice_sync_result.joined[0].timeline.events[-1].event_id + ) + + # Eve, a ne'er-do-well, registers. + eve = self.register_user("eve", "password") + eve_token = self.login(eve, "password") + + # Alice preemptively bans Eve. + self.helper.ban(room_id, owner, eve, tok=owner_tok) + + # Eve syncs. + eve_requester = create_requester(eve) + eve_sync_config = generate_sync_config(eve) + eve_sync_after_ban: SyncResult = self.get_success( + self.sync_handler.wait_for_sync_for_user(eve_requester, eve_sync_config) + ) + + # Sanity check this sync result. We shouldn't be joined to the room. + self.assertEqual(eve_sync_after_ban.joined, []) + + # Eve tries to join the room. We monkey patch the internal logic which selects + # the prev_events used when creating the join event, such that the ban does not + # precede the join. + mocked_get_prev_events = patch.object( + self.hs.get_datastore(), + "get_prev_events_for_room", + new_callable=MagicMock, + return_value=make_awaitable([last_room_creation_event_id]), + ) + with mocked_get_prev_events: + self.helper.join(room_id, eve, tok=eve_token) + + # Eve makes a second, incremental sync. + eve_incremental_sync_after_join: SyncResult = self.get_success( + self.sync_handler.wait_for_sync_for_user( + eve_requester, + eve_sync_config, + since_token=eve_sync_after_ban.next_batch, + ) + ) + # Eve should not see herself as joined to the room. + self.assertEqual(eve_incremental_sync_after_join.joined, []) + + # If we did a third initial sync, we should _still_ see eve is not joined to the room. + eve_initial_sync_after_join: SyncResult = self.get_success( + self.sync_handler.wait_for_sync_for_user( + eve_requester, + eve_sync_config, + since_token=None, + ) + ) + self.assertEqual(eve_initial_sync_after_join.joined, []) + _request_key = 0 diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index 1af5e5cee5..8424383580 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -196,6 +196,16 @@ class RestHelper: expect_code=expect_code, ) + def ban(self, room: str, src: str, targ: str, **kwargs: object): + """A convenience helper: `change_membership` with `membership` preset to "ban".""" + self.change_membership( + room=room, + src=src, + targ=targ, + membership=Membership.BAN, + **kwargs, + ) + def change_membership( self, room: str, -- cgit 1.5.1 From 0c40c619aa8c8240ab75970e195fe73695df978b Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 13 Jan 2022 10:45:28 -0500 Subject: Include bundled aggregations in the sync response cache. (#11659) --- changelog.d/11659.bugfix | 1 + synapse/handlers/sync.py | 10 ++++++++++ synapse/rest/client/sync.py | 17 +++-------------- tests/rest/client/test_relations.py | 10 +++++----- 4 files changed, 19 insertions(+), 19 deletions(-) create mode 100644 changelog.d/11659.bugfix (limited to 'tests/rest') diff --git a/changelog.d/11659.bugfix b/changelog.d/11659.bugfix new file mode 100644 index 0000000000..842f6892fd --- /dev/null +++ b/changelog.d/11659.bugfix @@ -0,0 +1 @@ +Include the bundled aggregations in the `/sync` response, per [MSC2675](https://github.com/matrix-org/matrix-doc/pull/2675). diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4b3f1ea059..e1df9b3106 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -98,6 +98,9 @@ class TimelineBatch: prev_batch: StreamToken events: List[EventBase] limited: bool + # A mapping of event ID to the bundled aggregations for the above events. + # This is only calculated if limited is true. + bundled_aggregations: Optional[Dict[str, Dict[str, Any]]] = None def __bool__(self) -> bool: """Make the result appear empty if there are no updates. This is used @@ -630,10 +633,17 @@ class SyncHandler: prev_batch_token = now_token.copy_and_replace("room_key", room_key) + # Don't bother to bundle aggregations if the timeline is unlimited, + # as clients will have all the necessary information. + bundled_aggregations = None + if limited or newly_joined_room: + bundled_aggregations = await self.store.get_bundled_aggregations(recents) + return TimelineBatch( events=recents, prev_batch=prev_batch_token, limited=limited or newly_joined_room, + bundled_aggregations=bundled_aggregations, ) async def get_state_after_event( diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index a3e57e4b20..d20ae1421e 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -554,20 +554,9 @@ class SyncRestServlet(RestServlet): ) serialized_state = serialize(state_events) - # Don't bother to bundle aggregations if the timeline is unlimited, - # as clients will have all the necessary information. - # bundle_aggregations=room.timeline.limited, - # - # richvdh 2021-12-15: disable this temporarily as it has too high an - # overhead for initialsyncs. We need to figure out a way that the - # bundling can be done *before* the events are stored in the - # SyncResponseCache so that this part can be synchronous. - # - # Ensure to re-enable the test at tests/rest/client/test_relations.py::RelationsTestCase.test_bundled_aggregations. - # if room.timeline.limited: - # aggregations = await self.store.get_bundled_aggregations(timeline_events) - aggregations = None - serialized_timeline = serialize(timeline_events, aggregations) + serialized_timeline = serialize( + timeline_events, room.timeline.bundled_aggregations + ) account_data = room.account_data diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index ff4e81d069..ee26751430 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -572,11 +572,11 @@ class RelationsTestCase(unittest.HomeserverTestCase): assert_bundle(channel.json_body["event"]["unsigned"].get("m.relations")) # Request sync. - # channel = self.make_request("GET", "/sync", access_token=self.user_token) - # self.assertEquals(200, channel.code, channel.json_body) - # room_timeline = channel.json_body["rooms"]["join"][self.room]["timeline"] - # self.assertTrue(room_timeline["limited"]) - # _find_and_assert_event(room_timeline["events"]) + channel = self.make_request("GET", "/sync", access_token=self.user_token) + self.assertEquals(200, channel.code, channel.json_body) + room_timeline = channel.json_body["rooms"]["join"][self.room]["timeline"] + self.assertTrue(room_timeline["limited"]) + _find_and_assert_event(room_timeline["events"]) # Note that /relations is tested separately in test_aggregation_get_event_for_thread # since it needs different data configured. -- cgit 1.5.1 From 3e0536cd2afb5a640619bd872fc27b068ec3eb9b Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 13 Jan 2022 19:44:18 -0500 Subject: Replace uses of simple_insert_many with simple_insert_many_values. (#11742) This should be (slightly) more efficient and it is simpler to have a single method for inserting multiple values. --- changelog.d/11742.misc | 1 + synapse/rest/admin/background_updates.py | 44 +++----- synapse/storage/database.py | 54 +-------- synapse/storage/databases/main/account_data.py | 4 +- synapse/storage/databases/main/deviceinbox.py | 30 ++--- synapse/storage/databases/main/devices.py | 37 ++++--- synapse/storage/databases/main/directory.py | 6 +- synapse/storage/databases/main/e2e_room_keys.py | 34 ++++-- synapse/storage/databases/main/end_to_end_keys.py | 42 ++++--- .../storage/databases/main/event_push_actions.py | 21 ++-- synapse/storage/databases/main/events.py | 122 ++++++++++----------- .../storage/databases/main/events_bg_updates.py | 30 ++--- synapse/storage/databases/main/presence.py | 33 ++++-- synapse/storage/databases/main/pusher.py | 8 +- synapse/storage/databases/main/user_directory.py | 12 +- synapse/storage/databases/state/bg_updates.py | 15 +-- synapse/storage/databases/state/store.py | 27 +---- tests/rest/admin/test_registration_tokens.py | 15 +-- tests/storage/test_event_federation.py | 26 +++-- 19 files changed, 263 insertions(+), 298 deletions(-) create mode 100644 changelog.d/11742.misc (limited to 'tests/rest') diff --git a/changelog.d/11742.misc b/changelog.d/11742.misc new file mode 100644 index 0000000000..f65ccdf30a --- /dev/null +++ b/changelog.d/11742.misc @@ -0,0 +1 @@ +Minor efficiency improvements when inserting many values into the database. diff --git a/synapse/rest/admin/background_updates.py b/synapse/rest/admin/background_updates.py index 6ec00ce0b9..e9bce22a34 100644 --- a/synapse/rest/admin/background_updates.py +++ b/synapse/rest/admin/background_updates.py @@ -123,34 +123,25 @@ class BackgroundUpdateStartJobRestServlet(RestServlet): job_name = body["job_name"] if job_name == "populate_stats_process_rooms": - jobs = [ - { - "update_name": "populate_stats_process_rooms", - "progress_json": "{}", - }, - ] + jobs = [("populate_stats_process_rooms", "{}", "")] elif job_name == "regenerate_directory": jobs = [ - { - "update_name": "populate_user_directory_createtables", - "progress_json": "{}", - "depends_on": "", - }, - { - "update_name": "populate_user_directory_process_rooms", - "progress_json": "{}", - "depends_on": "populate_user_directory_createtables", - }, - { - "update_name": "populate_user_directory_process_users", - "progress_json": "{}", - "depends_on": "populate_user_directory_process_rooms", - }, - { - "update_name": "populate_user_directory_cleanup", - "progress_json": "{}", - "depends_on": "populate_user_directory_process_users", - }, + ("populate_user_directory_createtables", "{}", ""), + ( + "populate_user_directory_process_rooms", + "{}", + "populate_user_directory_createtables", + ), + ( + "populate_user_directory_process_users", + "{}", + "populate_user_directory_process_rooms", + ), + ( + "populate_user_directory_cleanup", + "{}", + "populate_user_directory_process_users", + ), ] else: raise SynapseError(HTTPStatus.BAD_REQUEST, "Invalid job_name") @@ -158,6 +149,7 @@ class BackgroundUpdateStartJobRestServlet(RestServlet): try: await self._store.db_pool.simple_insert_many( table="background_updates", + keys=("update_name", "progress_json", "depends_on"), values=jobs, desc=f"admin_api_run_{job_name}", ) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index a27cc3605c..57cc1d76e0 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -934,56 +934,6 @@ class DatabasePool: txn.execute(sql, vals) async def simple_insert_many( - self, table: str, values: List[Dict[str, Any]], desc: str - ) -> None: - """Executes an INSERT query on the named table. - - The input is given as a list of dicts, with one dict per row. - Generally simple_insert_many_values should be preferred for new code. - - Args: - table: string giving the table name - values: dict of new column names and values for them - desc: description of the transaction, for logging and metrics - """ - await self.runInteraction(desc, self.simple_insert_many_txn, table, values) - - @staticmethod - def simple_insert_many_txn( - txn: LoggingTransaction, table: str, values: List[Dict[str, Any]] - ) -> None: - """Executes an INSERT query on the named table. - - The input is given as a list of dicts, with one dict per row. - Generally simple_insert_many_values_txn should be preferred for new code. - - Args: - txn: The transaction to use. - table: string giving the table name - values: dict of new column names and values for them - """ - if not values: - return - - # This is a *slight* abomination to get a list of tuples of key names - # and a list of tuples of value names. - # - # i.e. [{"a": 1, "b": 2}, {"c": 3, "d": 4}] - # => [("a", "b",), ("c", "d",)] and [(1, 2,), (3, 4,)] - # - # The sort is to ensure that we don't rely on dictionary iteration - # order. - keys, vals = zip( - *(zip(*(sorted(i.items(), key=lambda kv: kv[0]))) for i in values if i) - ) - - for k in keys: - if k != keys[0]: - raise RuntimeError("All items must have the same keys") - - return DatabasePool.simple_insert_many_values_txn(txn, table, keys[0], vals) - - async def simple_insert_many_values( self, table: str, keys: Collection[str], @@ -1002,11 +952,11 @@ class DatabasePool: desc: description of the transaction, for logging and metrics """ await self.runInteraction( - desc, self.simple_insert_many_values_txn, table, keys, values + desc, self.simple_insert_many_txn, table, keys, values ) @staticmethod - def simple_insert_many_values_txn( + def simple_insert_many_txn( txn: LoggingTransaction, table: str, keys: Collection[str], diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 93db71d1b4..ef475e18c7 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -536,9 +536,9 @@ class AccountDataWorkerStore(CacheInvalidationWorkerStore): self.db_pool.simple_insert_many_txn( txn, table="ignored_users", + keys=("ignorer_user_id", "ignored_user_id"), values=[ - {"ignorer_user_id": user_id, "ignored_user_id": u} - for u in currently_ignored_users - previously_ignored_users + (user_id, u) for u in currently_ignored_users - previously_ignored_users ], ) diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 3682cb6a81..4eca97189b 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -432,14 +432,21 @@ class DeviceInboxWorkerStore(SQLBaseStore): self.db_pool.simple_insert_many_txn( txn, table="device_federation_outbox", + keys=( + "destination", + "stream_id", + "queued_ts", + "messages_json", + "instance_name", + ), values=[ - { - "destination": destination, - "stream_id": stream_id, - "queued_ts": now_ms, - "messages_json": json_encoder.encode(edu), - "instance_name": self._instance_name, - } + ( + destination, + stream_id, + now_ms, + json_encoder.encode(edu), + self._instance_name, + ) for destination, edu in remote_messages_by_destination.items() ], ) @@ -571,14 +578,9 @@ class DeviceInboxWorkerStore(SQLBaseStore): self.db_pool.simple_insert_many_txn( txn, table="device_inbox", + keys=("user_id", "device_id", "stream_id", "message_json", "instance_name"), values=[ - { - "user_id": user_id, - "device_id": device_id, - "stream_id": stream_id, - "message_json": message_json, - "instance_name": self._instance_name, - } + (user_id, device_id, stream_id, message_json, self._instance_name) for user_id, messages_by_device in local_by_user_then_device.items() for device_id, message_json in messages_by_device.items() ], diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 52fbf50db6..8748654b55 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1386,12 +1386,9 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): self.db_pool.simple_insert_many_txn( txn, table="device_lists_remote_cache", + keys=("user_id", "device_id", "content"), values=[ - { - "user_id": user_id, - "device_id": content["device_id"], - "content": json_encoder.encode(content), - } + (user_id, content["device_id"], json_encoder.encode(content)) for content in devices ], ) @@ -1479,8 +1476,9 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): self.db_pool.simple_insert_many_txn( txn, table="device_lists_stream", + keys=("stream_id", "user_id", "device_id"), values=[ - {"stream_id": stream_id, "user_id": user_id, "device_id": device_id} + (stream_id, user_id, device_id) for stream_id, device_id in zip(stream_ids, device_ids) ], ) @@ -1507,18 +1505,27 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): self.db_pool.simple_insert_many_txn( txn, table="device_lists_outbound_pokes", + keys=( + "destination", + "stream_id", + "user_id", + "device_id", + "sent", + "ts", + "opentracing_context", + ), values=[ - { - "destination": destination, - "stream_id": next(next_stream_id), - "user_id": user_id, - "device_id": device_id, - "sent": False, - "ts": now, - "opentracing_context": json_encoder.encode(context) + ( + destination, + next(next_stream_id), + user_id, + device_id, + False, + now, + json_encoder.encode(context) if whitelisted_homeserver(destination) else "{}", - } + ) for destination in hosts for device_id in device_ids ], diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py index f76c6121e8..5903fdaf00 100644 --- a/synapse/storage/databases/main/directory.py +++ b/synapse/storage/databases/main/directory.py @@ -112,10 +112,8 @@ class DirectoryWorkerStore(CacheInvalidationWorkerStore): self.db_pool.simple_insert_many_txn( txn, table="room_alias_servers", - values=[ - {"room_alias": room_alias.to_string(), "server": server} - for server in servers - ], + keys=("room_alias", "server"), + values=[(room_alias.to_string(), server) for server in servers], ) self._invalidate_cache_and_stream( diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py index 0cb48b9dd7..b789a588a5 100644 --- a/synapse/storage/databases/main/e2e_room_keys.py +++ b/synapse/storage/databases/main/e2e_room_keys.py @@ -110,16 +110,16 @@ class EndToEndRoomKeyStore(SQLBaseStore): values = [] for (room_id, session_id, room_key) in room_keys: values.append( - { - "user_id": user_id, - "version": version_int, - "room_id": room_id, - "session_id": session_id, - "first_message_index": room_key["first_message_index"], - "forwarded_count": room_key["forwarded_count"], - "is_verified": room_key["is_verified"], - "session_data": json_encoder.encode(room_key["session_data"]), - } + ( + user_id, + version_int, + room_id, + session_id, + room_key["first_message_index"], + room_key["forwarded_count"], + room_key["is_verified"], + json_encoder.encode(room_key["session_data"]), + ) ) log_kv( { @@ -131,7 +131,19 @@ class EndToEndRoomKeyStore(SQLBaseStore): ) await self.db_pool.simple_insert_many( - table="e2e_room_keys", values=values, desc="add_e2e_room_keys" + table="e2e_room_keys", + keys=( + "user_id", + "version", + "room_id", + "session_id", + "first_message_index", + "forwarded_count", + "is_verified", + "session_data", + ), + values=values, + desc="add_e2e_room_keys", ) @trace diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 86cab97563..1f8447b507 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -387,15 +387,16 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker self.db_pool.simple_insert_many_txn( txn, table="e2e_one_time_keys_json", + keys=( + "user_id", + "device_id", + "algorithm", + "key_id", + "ts_added_ms", + "key_json", + ), values=[ - { - "user_id": user_id, - "device_id": device_id, - "algorithm": algorithm, - "key_id": key_id, - "ts_added_ms": time_now, - "key_json": json_bytes, - } + (user_id, device_id, algorithm, key_id, time_now, json_bytes) for algorithm, key_id, json_bytes in new_keys ], ) @@ -1186,15 +1187,22 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): """ await self.db_pool.simple_insert_many( "e2e_cross_signing_signatures", - [ - { - "user_id": user_id, - "key_id": item.signing_key_id, - "target_user_id": item.target_user_id, - "target_device_id": item.target_device_id, - "signature": item.signature, - } + keys=( + "user_id", + "key_id", + "target_user_id", + "target_device_id", + "signature", + ), + values=[ + ( + user_id, + item.signing_key_id, + item.target_user_id, + item.target_device_id, + item.signature, + ) for item in signatures ], - "add_e2e_signing_key", + desc="add_e2e_signing_key", ) diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index a98e6b2593..b7c4c62222 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -875,14 +875,21 @@ class EventPushActionsWorkerStore(SQLBaseStore): self.db_pool.simple_insert_many_txn( txn, table="event_push_summary", + keys=( + "user_id", + "room_id", + "notif_count", + "unread_count", + "stream_ordering", + ), values=[ - { - "user_id": user_id, - "room_id": room_id, - "notif_count": summary.notif_count, - "unread_count": summary.unread_count, - "stream_ordering": summary.stream_ordering, - } + ( + user_id, + room_id, + summary.notif_count, + summary.unread_count, + summary.stream_ordering, + ) for ((user_id, room_id), summary) in summaries.items() if summary.old_user_id is None ], diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cce2305597..de3b48524b 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -442,12 +442,9 @@ class PersistEventsStore: self.db_pool.simple_insert_many_txn( txn, table="event_auth", + keys=("event_id", "room_id", "auth_id"), values=[ - { - "event_id": event.event_id, - "room_id": event.room_id, - "auth_id": auth_id, - } + (event.event_id, event.room_id, auth_id) for event in events for auth_id in event.auth_event_ids() if event.is_state() @@ -675,8 +672,9 @@ class PersistEventsStore: db_pool.simple_insert_many_txn( txn, table="event_auth_chains", + keys=("event_id", "chain_id", "sequence_number"), values=[ - {"event_id": event_id, "chain_id": c_id, "sequence_number": seq} + (event_id, c_id, seq) for event_id, (c_id, seq) in new_chain_tuples.items() ], ) @@ -782,13 +780,14 @@ class PersistEventsStore: db_pool.simple_insert_many_txn( txn, table="event_auth_chain_links", + keys=( + "origin_chain_id", + "origin_sequence_number", + "target_chain_id", + "target_sequence_number", + ), values=[ - { - "origin_chain_id": source_id, - "origin_sequence_number": source_seq, - "target_chain_id": target_id, - "target_sequence_number": target_seq, - } + (source_id, source_seq, target_id, target_seq) for ( source_id, source_seq, @@ -943,20 +942,28 @@ class PersistEventsStore: txn_id = getattr(event.internal_metadata, "txn_id", None) if token_id and txn_id: to_insert.append( - { - "event_id": event.event_id, - "room_id": event.room_id, - "user_id": event.sender, - "token_id": token_id, - "txn_id": txn_id, - "inserted_ts": self._clock.time_msec(), - } + ( + event.event_id, + event.room_id, + event.sender, + token_id, + txn_id, + self._clock.time_msec(), + ) ) if to_insert: self.db_pool.simple_insert_many_txn( txn, table="event_txn_id", + keys=( + "event_id", + "room_id", + "user_id", + "token_id", + "txn_id", + "inserted_ts", + ), values=to_insert, ) @@ -1161,8 +1168,9 @@ class PersistEventsStore: self.db_pool.simple_insert_many_txn( txn, table="event_forward_extremities", + keys=("event_id", "room_id"), values=[ - {"event_id": ev_id, "room_id": room_id} + (ev_id, room_id) for room_id, new_extrem in new_forward_extremities.items() for ev_id in new_extrem ], @@ -1174,12 +1182,9 @@ class PersistEventsStore: self.db_pool.simple_insert_many_txn( txn, table="stream_ordering_to_exterm", + keys=("room_id", "event_id", "stream_ordering"), values=[ - { - "room_id": room_id, - "event_id": event_id, - "stream_ordering": max_stream_order, - } + (room_id, event_id, max_stream_order) for room_id, new_extrem in new_forward_extremities.items() for event_id in new_extrem ], @@ -1342,7 +1347,7 @@ class PersistEventsStore: d.pop("redacted_because", None) return d - self.db_pool.simple_insert_many_values_txn( + self.db_pool.simple_insert_many_txn( txn, table="event_json", keys=("event_id", "room_id", "internal_metadata", "json", "format_version"), @@ -1358,7 +1363,7 @@ class PersistEventsStore: ), ) - self.db_pool.simple_insert_many_values_txn( + self.db_pool.simple_insert_many_txn( txn, table="events", keys=( @@ -1412,7 +1417,7 @@ class PersistEventsStore: ) txn.execute(sql + clause, [False] + args) - self.db_pool.simple_insert_many_values_txn( + self.db_pool.simple_insert_many_txn( txn, table="state_events", keys=("event_id", "room_id", "type", "state_key"), @@ -1622,14 +1627,9 @@ class PersistEventsStore: return self.db_pool.simple_insert_many_txn( txn=txn, table="event_labels", + keys=("event_id", "label", "room_id", "topological_ordering"), values=[ - { - "event_id": event_id, - "label": label, - "room_id": room_id, - "topological_ordering": topological_ordering, - } - for label in labels + (event_id, label, room_id, topological_ordering) for label in labels ], ) @@ -1657,16 +1657,13 @@ class PersistEventsStore: vals = [] for event in events: ref_alg, ref_hash_bytes = compute_event_reference_hash(event) - vals.append( - { - "event_id": event.event_id, - "algorithm": ref_alg, - "hash": memoryview(ref_hash_bytes), - } - ) + vals.append((event.event_id, ref_alg, memoryview(ref_hash_bytes))) self.db_pool.simple_insert_many_txn( - txn, table="event_reference_hashes", values=vals + txn, + table="event_reference_hashes", + keys=("event_id", "algorithm", "hash"), + values=vals, ) def _store_room_members_txn( @@ -1689,18 +1686,25 @@ class PersistEventsStore: self.db_pool.simple_insert_many_txn( txn, table="room_memberships", + keys=( + "event_id", + "user_id", + "sender", + "room_id", + "membership", + "display_name", + "avatar_url", + ), values=[ - { - "event_id": event.event_id, - "user_id": event.state_key, - "sender": event.user_id, - "room_id": event.room_id, - "membership": event.membership, - "display_name": non_null_str_or_none( - event.content.get("displayname") - ), - "avatar_url": non_null_str_or_none(event.content.get("avatar_url")), - } + ( + event.event_id, + event.state_key, + event.user_id, + event.room_id, + event.membership, + non_null_str_or_none(event.content.get("displayname")), + non_null_str_or_none(event.content.get("avatar_url")), + ) for event in events ], ) @@ -2163,13 +2167,9 @@ class PersistEventsStore: self.db_pool.simple_insert_many_txn( txn, table="event_edges", + keys=("event_id", "prev_event_id", "room_id", "is_state"), values=[ - { - "event_id": ev.event_id, - "prev_event_id": e_id, - "room_id": ev.room_id, - "is_state": False, - } + (ev.event_id, e_id, ev.room_id, False) for ev in events for e_id in ev.prev_event_ids() ], diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 0a96664caf..d5f0059665 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -684,13 +684,14 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): self.db_pool.simple_insert_many_txn( txn=txn, table="event_labels", + keys=("event_id", "label", "room_id", "topological_ordering"), values=[ - { - "event_id": event_id, - "label": label, - "room_id": event_json["room_id"], - "topological_ordering": event_json["depth"], - } + ( + event_id, + label, + event_json["room_id"], + event_json["depth"], + ) for label in event_json["content"].get( EventContentFields.LABELS, [] ) @@ -803,29 +804,19 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): if not has_state: state_events.append( - { - "event_id": event.event_id, - "room_id": event.room_id, - "type": event.type, - "state_key": event.state_key, - } + (event.event_id, event.room_id, event.type, event.state_key) ) if not has_event_auth: # Old, dodgy, events may have duplicate auth events, which we # need to deduplicate as we have a unique constraint. for auth_id in set(event.auth_event_ids()): - auth_events.append( - { - "room_id": event.room_id, - "event_id": event.event_id, - "auth_id": auth_id, - } - ) + auth_events.append((event.event_id, event.room_id, auth_id)) if state_events: await self.db_pool.simple_insert_many( table="state_events", + keys=("event_id", "room_id", "type", "state_key"), values=state_events, desc="_rejected_events_metadata_state_events", ) @@ -833,6 +824,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): if auth_events: await self.db_pool.simple_insert_many( table="event_auth", + keys=("event_id", "room_id", "auth_id"), values=auth_events, desc="_rejected_events_metadata_event_auth", ) diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py index cbf9ec38f7..4f05811a77 100644 --- a/synapse/storage/databases/main/presence.py +++ b/synapse/storage/databases/main/presence.py @@ -129,18 +129,29 @@ class PresenceStore(PresenceBackgroundUpdateStore): self.db_pool.simple_insert_many_txn( txn, table="presence_stream", + keys=( + "stream_id", + "user_id", + "state", + "last_active_ts", + "last_federation_update_ts", + "last_user_sync_ts", + "status_msg", + "currently_active", + "instance_name", + ), values=[ - { - "stream_id": stream_id, - "user_id": state.user_id, - "state": state.state, - "last_active_ts": state.last_active_ts, - "last_federation_update_ts": state.last_federation_update_ts, - "last_user_sync_ts": state.last_user_sync_ts, - "status_msg": state.status_msg, - "currently_active": state.currently_active, - "instance_name": self._instance_name, - } + ( + stream_id, + state.user_id, + state.state, + state.last_active_ts, + state.last_federation_update_ts, + state.last_user_sync_ts, + state.status_msg, + state.currently_active, + self._instance_name, + ) for stream_id, state in zip(stream_orderings, presence_states) ], ) diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index 747b4f31df..cf64cd63a4 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -561,13 +561,9 @@ class PusherStore(PusherWorkerStore): self.db_pool.simple_insert_many_txn( txn, table="deleted_pushers", + keys=("stream_id", "app_id", "pushkey", "user_id"), values=[ - { - "stream_id": stream_id, - "app_id": pusher.app_id, - "pushkey": pusher.pushkey, - "user_id": user_id, - } + (stream_id, pusher.app_id, pusher.pushkey, user_id) for stream_id, pusher in zip(stream_ids, pushers) ], ) diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 0f9b8575d3..f7c778bdf2 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -105,8 +105,10 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): GROUP BY room_id """ txn.execute(sql) - rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()] - self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) + rooms = list(txn.fetchall()) + self.db_pool.simple_insert_many_txn( + txn, TEMP_TABLE + "_rooms", keys=("room_id", "events"), values=rooms + ) del rooms sql = ( @@ -117,9 +119,11 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): txn.execute(sql) txn.execute("SELECT name FROM users") - users = [{"user_id": x[0]} for x in txn.fetchall()] + users = list(txn.fetchall()) - self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) + self.db_pool.simple_insert_many_txn( + txn, TEMP_TABLE + "_users", keys=("user_id",), values=users + ) new_pos = await self.get_max_stream_id_in_current_state_deltas() await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index eb1118d2cb..5de70f31d2 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -327,14 +327,15 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): self.db_pool.simple_insert_many_txn( txn, table="state_groups_state", + keys=( + "state_group", + "room_id", + "type", + "state_key", + "event_id", + ), values=[ - { - "state_group": state_group, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } + (state_group, room_id, key[0], key[1], state_id) for key, state_id in delta_state.items() ], ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index c4c8c0021b..7614d76ac6 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -460,14 +460,9 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): self.db_pool.simple_insert_many_txn( txn, table="state_groups_state", + keys=("state_group", "room_id", "type", "state_key", "event_id"), values=[ - { - "state_group": state_group, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } + (state_group, room_id, key[0], key[1], state_id) for key, state_id in delta_ids.items() ], ) @@ -475,14 +470,9 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): self.db_pool.simple_insert_many_txn( txn, table="state_groups_state", + keys=("state_group", "room_id", "type", "state_key", "event_id"), values=[ - { - "state_group": state_group, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } + (state_group, room_id, key[0], key[1], state_id) for key, state_id in current_state_ids.items() ], ) @@ -589,14 +579,9 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): self.db_pool.simple_insert_many_txn( txn, table="state_groups_state", + keys=("state_group", "room_id", "type", "state_key", "event_id"), values=[ - { - "state_group": sg, - "room_id": room_id, - "type": key[0], - "state_key": key[1], - "event_id": state_id, - } + (sg, room_id, key[0], key[1], state_id) for key, state_id in curr_state.items() ], ) diff --git a/tests/rest/admin/test_registration_tokens.py b/tests/rest/admin/test_registration_tokens.py index 81f3ac7f04..8513b1d2df 100644 --- a/tests/rest/admin/test_registration_tokens.py +++ b/tests/rest/admin/test_registration_tokens.py @@ -223,20 +223,13 @@ class ManageRegistrationTokensTestCase(unittest.HomeserverTestCase): # Create all possible single character tokens tokens = [] for c in string.ascii_letters + string.digits + "._~-": - tokens.append( - { - "token": c, - "uses_allowed": None, - "pending": 0, - "completed": 0, - "expiry_time": None, - } - ) + tokens.append((c, None, 0, 0, None)) self.get_success( self.store.db_pool.simple_insert_many( "registration_tokens", - tokens, - "create_all_registration_tokens", + keys=("token", "uses_allowed", "pending", "completed", "expiry_time"), + values=tokens, + desc="create_all_registration_tokens", ) ) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index ecfda7677e..632bbc9de7 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -515,17 +515,23 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): self.get_success( self.store.db_pool.simple_insert_many( table="federation_inbound_events_staging", + keys=( + "origin", + "room_id", + "received_ts", + "event_id", + "event_json", + "internal_metadata", + ), values=[ - { - "origin": "some_origin", - "room_id": room_id, - "received_ts": 0, - "event_id": f"$fake_event_id_{i + 1}", - "event_json": json_encoder.encode( - {"prev_events": [f"$fake_event_id_{i}"]} - ), - "internal_metadata": "{}", - } + ( + "some_origin", + room_id, + 0, + f"$fake_event_id_{i + 1}", + json_encoder.encode({"prev_events": [f"$fake_event_id_{i}"]}), + "{}", + ) for i in range(500) ], desc="test_prune_inbound_federation_queue", -- cgit 1.5.1 From 18862f20b5495bdc556c54e92fd4b1efdc718ba7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 14 Jan 2022 14:53:33 +0000 Subject: Remove the 'password_hash' from the Users Admin API endpoint response dictionary (#11576) --- changelog.d/11576.feature | 1 + docs/admin_api/user_admin_api.md | 9 ++++--- synapse/handlers/admin.py | 56 +++++++++++++++++++++++++++++----------- synapse/rest/admin/users.py | 13 +++++----- tests/rest/admin/test_user.py | 50 +++++++++++++++++++++++------------ 5 files changed, 86 insertions(+), 43 deletions(-) create mode 100644 changelog.d/11576.feature (limited to 'tests/rest') diff --git a/changelog.d/11576.feature b/changelog.d/11576.feature new file mode 100644 index 0000000000..5be836ae02 --- /dev/null +++ b/changelog.d/11576.feature @@ -0,0 +1 @@ +Remove the `"password_hash"` field from the response dictionaries of the [Users Admin API](https://matrix-org.github.io/synapse/latest/admin_api/user_admin_api.html). \ No newline at end of file diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md index 74933d2fcf..c514cadb9d 100644 --- a/docs/admin_api/user_admin_api.md +++ b/docs/admin_api/user_admin_api.md @@ -15,9 +15,10 @@ server admin: [Admin API](../usage/administration/admin_api) It returns a JSON body like the following: -```json +```jsonc { - "displayname": "User", + "name": "@user:example.com", + "displayname": "User", // can be null if not set "threepids": [ { "medium": "email", @@ -32,11 +33,11 @@ It returns a JSON body like the following: "validated_at": 1586458409743 } ], - "avatar_url": "", + "avatar_url": "", // can be null if not set + "is_guest": 0, "admin": 0, "deactivated": 0, "shadow_banned": 0, - "password_hash": "$2b$12$p9B4GkqYdRTPGD", "creation_ts": 1560432506, "appservice_id": null, "consent_server_notice_sent": null, diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 85157a138b..00ab5e79bf 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -55,21 +55,47 @@ class AdminHandler: async def get_user(self, user: UserID) -> Optional[JsonDict]: """Function to get user details""" - ret = await self.store.get_user_by_id(user.to_string()) - if ret: - profile = await self.store.get_profileinfo(user.localpart) - threepids = await self.store.user_get_threepids(user.to_string()) - external_ids = [ - ({"auth_provider": auth_provider, "external_id": external_id}) - for auth_provider, external_id in await self.store.get_external_ids_by_user( - user.to_string() - ) - ] - ret["displayname"] = profile.display_name - ret["avatar_url"] = profile.avatar_url - ret["threepids"] = threepids - ret["external_ids"] = external_ids - return ret + user_info_dict = await self.store.get_user_by_id(user.to_string()) + if user_info_dict is None: + return None + + # Restrict returned information to a known set of fields. This prevents additional + # fields added to get_user_by_id from modifying Synapse's external API surface. + user_info_to_return = { + "name", + "admin", + "deactivated", + "shadow_banned", + "creation_ts", + "appservice_id", + "consent_server_notice_sent", + "consent_version", + "user_type", + "is_guest", + } + + # Restrict returned keys to a known set. + user_info_dict = { + key: value + for key, value in user_info_dict.items() + if key in user_info_to_return + } + + # Add additional user metadata + profile = await self.store.get_profileinfo(user.localpart) + threepids = await self.store.user_get_threepids(user.to_string()) + external_ids = [ + ({"auth_provider": auth_provider, "external_id": external_id}) + for auth_provider, external_id in await self.store.get_external_ids_by_user( + user.to_string() + ) + ] + user_info_dict["displayname"] = profile.display_name + user_info_dict["avatar_url"] = profile.avatar_url + user_info_dict["threepids"] = threepids + user_info_dict["external_ids"] = external_ids + + return user_info_dict async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> Any: """Write all data we have on the user to the given writer. diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 78e795c347..c2617ee30c 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -173,12 +173,11 @@ class UserRestServletV2(RestServlet): if not self.hs.is_mine(target_user): raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only look up local users") - ret = await self.admin_handler.get_user(target_user) - - if not ret: + user_info_dict = await self.admin_handler.get_user(target_user) + if not user_info_dict: raise NotFoundError("User not found") - return HTTPStatus.OK, ret + return HTTPStatus.OK, user_info_dict async def on_PUT( self, request: SynapseRequest, user_id: str @@ -399,10 +398,10 @@ class UserRestServletV2(RestServlet): target_user, requester, body["avatar_url"], True ) - user = await self.admin_handler.get_user(target_user) - assert user is not None + user_info_dict = await self.admin_handler.get_user(target_user) + assert user_info_dict is not None - return 201, user + return HTTPStatus.CREATED, user_info_dict class UserRegisterServlet(RestServlet): diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index e0b9fe8e91..9711405735 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -1181,6 +1181,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): self.other_user, device_id=None, valid_until_ms=None ) ) + self.url_prefix = "/_synapse/admin/v2/users/%s" self.url_other_user = self.url_prefix % self.other_user @@ -1188,7 +1189,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): """ If the user is not a server admin, an error is returned. """ - url = "/_synapse/admin/v2/users/@bob:test" + url = self.url_prefix % "@bob:test" channel = self.make_request( "GET", @@ -1216,7 +1217,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): channel = self.make_request( "GET", - "/_synapse/admin/v2/users/@unknown_person:test", + self.url_prefix % "@unknown_person:test", access_token=self.admin_user_tok, ) @@ -1337,7 +1338,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): """ Check that a new admin user is created successfully. """ - url = "/_synapse/admin/v2/users/@bob:test" + url = self.url_prefix % "@bob:test" # Create user (server admin) body = { @@ -1386,7 +1387,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): """ Check that a new regular user is created successfully. """ - url = "/_synapse/admin/v2/users/@bob:test" + url = self.url_prefix % "@bob:test" # Create user body = { @@ -1478,7 +1479,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): ) # Register new user with admin API - url = "/_synapse/admin/v2/users/@bob:test" + url = self.url_prefix % "@bob:test" # Create user channel = self.make_request( @@ -1515,7 +1516,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): ) # Register new user with admin API - url = "/_synapse/admin/v2/users/@bob:test" + url = self.url_prefix % "@bob:test" # Create user channel = self.make_request( @@ -1545,7 +1546,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): Check that a new regular user is created successfully and got an email pusher. """ - url = "/_synapse/admin/v2/users/@bob:test" + url = self.url_prefix % "@bob:test" # Create user body = { @@ -1588,7 +1589,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): Check that a new regular user is created successfully and got not an email pusher. """ - url = "/_synapse/admin/v2/users/@bob:test" + url = self.url_prefix % "@bob:test" # Create user body = { @@ -2085,10 +2086,13 @@ class UserRestTestCase(unittest.HomeserverTestCase): self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertEqual("@user:test", channel.json_body["name"]) self.assertTrue(channel.json_body["deactivated"]) - self.assertIsNone(channel.json_body["password_hash"]) self.assertEqual(0, len(channel.json_body["threepids"])) self.assertEqual("mxc://servername/mediaid", channel.json_body["avatar_url"]) self.assertEqual("User", channel.json_body["displayname"]) + + # This key was removed intentionally. Ensure it is not accidentally re-included. + self.assertNotIn("password_hash", channel.json_body) + # the user is deactivated, the threepid will be deleted # Get user @@ -2101,11 +2105,13 @@ class UserRestTestCase(unittest.HomeserverTestCase): self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertEqual("@user:test", channel.json_body["name"]) self.assertTrue(channel.json_body["deactivated"]) - self.assertIsNone(channel.json_body["password_hash"]) self.assertEqual(0, len(channel.json_body["threepids"])) self.assertEqual("mxc://servername/mediaid", channel.json_body["avatar_url"]) self.assertEqual("User", channel.json_body["displayname"]) + # This key was removed intentionally. Ensure it is not accidentally re-included. + self.assertNotIn("password_hash", channel.json_body) + @override_config({"user_directory": {"enabled": True, "search_all_users": True}}) def test_change_name_deactivate_user_user_directory(self): """ @@ -2177,9 +2183,11 @@ class UserRestTestCase(unittest.HomeserverTestCase): self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertEqual("@user:test", channel.json_body["name"]) self.assertFalse(channel.json_body["deactivated"]) - self.assertIsNotNone(channel.json_body["password_hash"]) self._is_erased("@user:test", False) + # This key was removed intentionally. Ensure it is not accidentally re-included. + self.assertNotIn("password_hash", channel.json_body) + @override_config({"password_config": {"localdb_enabled": False}}) def test_reactivate_user_localdb_disabled(self): """ @@ -2209,9 +2217,11 @@ class UserRestTestCase(unittest.HomeserverTestCase): self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertEqual("@user:test", channel.json_body["name"]) self.assertFalse(channel.json_body["deactivated"]) - self.assertIsNone(channel.json_body["password_hash"]) self._is_erased("@user:test", False) + # This key was removed intentionally. Ensure it is not accidentally re-included. + self.assertNotIn("password_hash", channel.json_body) + @override_config({"password_config": {"enabled": False}}) def test_reactivate_user_password_disabled(self): """ @@ -2241,9 +2251,11 @@ class UserRestTestCase(unittest.HomeserverTestCase): self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertEqual("@user:test", channel.json_body["name"]) self.assertFalse(channel.json_body["deactivated"]) - self.assertIsNone(channel.json_body["password_hash"]) self._is_erased("@user:test", False) + # This key was removed intentionally. Ensure it is not accidentally re-included. + self.assertNotIn("password_hash", channel.json_body) + def test_set_user_as_admin(self): """ Test setting the admin flag on a user. @@ -2328,7 +2340,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): Ensure an account can't accidentally be deactivated by using a str value for the deactivated body parameter """ - url = "/_synapse/admin/v2/users/@bob:test" + url = self.url_prefix % "@bob:test" # Create user channel = self.make_request( @@ -2392,18 +2404,20 @@ class UserRestTestCase(unittest.HomeserverTestCase): # Deactivate the user. channel = self.make_request( "PUT", - "/_synapse/admin/v2/users/%s" % urllib.parse.quote(user_id), + self.url_prefix % urllib.parse.quote(user_id), access_token=self.admin_user_tok, content={"deactivated": True}, ) self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) self.assertTrue(channel.json_body["deactivated"]) - self.assertIsNone(channel.json_body["password_hash"]) self._is_erased(user_id, False) d = self.store.mark_user_erased(user_id) self.assertIsNone(self.get_success(d)) self._is_erased(user_id, True) + # This key was removed intentionally. Ensure it is not accidentally re-included. + self.assertNotIn("password_hash", channel.json_body) + def _check_fields(self, content: JsonDict): """Checks that the expected user attributes are present in content @@ -2416,13 +2430,15 @@ class UserRestTestCase(unittest.HomeserverTestCase): self.assertIn("admin", content) self.assertIn("deactivated", content) self.assertIn("shadow_banned", content) - self.assertIn("password_hash", content) self.assertIn("creation_ts", content) self.assertIn("appservice_id", content) self.assertIn("consent_server_notice_sent", content) self.assertIn("consent_version", content) self.assertIn("external_ids", content) + # This key was removed intentionally. Ensure it is not accidentally re-included. + self.assertNotIn("password_hash", content) + class UserMembershipRestTestCase(unittest.HomeserverTestCase): -- cgit 1.5.1 From 6b241f5286f05d4d8dc0569e90388713a956d038 Mon Sep 17 00:00:00 2001 From: Daniel Sonck Date: Mon, 17 Jan 2022 12:42:51 +0100 Subject: Make pagination of rooms in admin api stable (#11737) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always add state.room_id after the configurable ORDER BY. Otherwise, for any sort, certain pages can contain results from other pages. (Especially when sorting by creator, since there may be many rooms by the same creator) * Document different order direction of numerical fields "joined_members", "joined_local_members", "version" and "state_events" are ordered in descending direction by default (dir=f). Added a note in tests to explain the differences in ordering. Signed-off-by: DaniĆ«l Sonck --- changelog.d/11737.bugfix | 1 + synapse/storage/databases/main/room.py | 18 ++++++------- tests/rest/admin/test_room.py | 47 ++++++++++++++++++++-------------- 3 files changed, 38 insertions(+), 28 deletions(-) create mode 100644 changelog.d/11737.bugfix (limited to 'tests/rest') diff --git a/changelog.d/11737.bugfix b/changelog.d/11737.bugfix new file mode 100644 index 0000000000..a293d1cfec --- /dev/null +++ b/changelog.d/11737.bugfix @@ -0,0 +1 @@ +Make the list rooms admin api sort stable. Contributed by DaniĆ«l Sonck. \ No newline at end of file diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index c0e837854a..95167116c9 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -551,24 +551,24 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): FROM room_stats_state state INNER JOIN room_stats_current curr USING (room_id) INNER JOIN rooms USING (room_id) - %s - ORDER BY %s %s + {where} + ORDER BY {order_by} {direction}, state.room_id {direction} LIMIT ? OFFSET ? - """ % ( - where_statement, - order_by_column, - "ASC" if order_by_asc else "DESC", + """.format( + where=where_statement, + order_by=order_by_column, + direction="ASC" if order_by_asc else "DESC", ) # Use a nested SELECT statement as SQL can't count(*) with an OFFSET count_sql = """ SELECT count(*) FROM ( SELECT room_id FROM room_stats_state state - %s + {where} ) AS get_room_ids - """ % ( - where_statement, + """.format( + where=where_statement, ) def _get_rooms_paginate_txn( diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index d2c8781cd4..3495a0366a 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -1089,6 +1089,8 @@ class RoomTestCase(unittest.HomeserverTestCase): ) room_ids.append(room_id) + room_ids.sort() + # Request the list of rooms url = "/_synapse/admin/v1/rooms" channel = self.make_request( @@ -1360,6 +1362,12 @@ class RoomTestCase(unittest.HomeserverTestCase): room_id_2 = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok) room_id_3 = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok) + # Also create a list sorted by IDs for properties that are equal (and thus sorted by room_id) + sorted_by_room_id_asc = [room_id_1, room_id_2, room_id_3] + sorted_by_room_id_asc.sort() + sorted_by_room_id_desc = sorted_by_room_id_asc.copy() + sorted_by_room_id_desc.reverse() + # Set room names in alphabetical order. room 1 -> A, 2 -> B, 3 -> C self.helper.send_state( room_id_1, @@ -1405,41 +1413,42 @@ class RoomTestCase(unittest.HomeserverTestCase): _order_test("canonical_alias", [room_id_1, room_id_2, room_id_3]) _order_test("canonical_alias", [room_id_3, room_id_2, room_id_1], reverse=True) + # Note: joined_member counts are sorted in descending order when dir=f _order_test("joined_members", [room_id_3, room_id_2, room_id_1]) _order_test("joined_members", [room_id_1, room_id_2, room_id_3], reverse=True) + # Note: joined_local_member counts are sorted in descending order when dir=f _order_test("joined_local_members", [room_id_3, room_id_2, room_id_1]) _order_test( "joined_local_members", [room_id_1, room_id_2, room_id_3], reverse=True ) - _order_test("version", [room_id_1, room_id_2, room_id_3]) - _order_test("version", [room_id_1, room_id_2, room_id_3], reverse=True) + # Note: versions are sorted in descending order when dir=f + _order_test("version", sorted_by_room_id_asc, reverse=True) + _order_test("version", sorted_by_room_id_desc) - _order_test("creator", [room_id_1, room_id_2, room_id_3]) - _order_test("creator", [room_id_1, room_id_2, room_id_3], reverse=True) + _order_test("creator", sorted_by_room_id_asc) + _order_test("creator", sorted_by_room_id_desc, reverse=True) - _order_test("encryption", [room_id_1, room_id_2, room_id_3]) - _order_test("encryption", [room_id_1, room_id_2, room_id_3], reverse=True) + _order_test("encryption", sorted_by_room_id_asc) + _order_test("encryption", sorted_by_room_id_desc, reverse=True) - _order_test("federatable", [room_id_1, room_id_2, room_id_3]) - _order_test("federatable", [room_id_1, room_id_2, room_id_3], reverse=True) + _order_test("federatable", sorted_by_room_id_asc) + _order_test("federatable", sorted_by_room_id_desc, reverse=True) - _order_test("public", [room_id_1, room_id_2, room_id_3]) - # Different sort order of SQlite and PostreSQL - # _order_test("public", [room_id_3, room_id_2, room_id_1], reverse=True) + _order_test("public", sorted_by_room_id_asc) + _order_test("public", sorted_by_room_id_desc, reverse=True) - _order_test("join_rules", [room_id_1, room_id_2, room_id_3]) - _order_test("join_rules", [room_id_1, room_id_2, room_id_3], reverse=True) + _order_test("join_rules", sorted_by_room_id_asc) + _order_test("join_rules", sorted_by_room_id_desc, reverse=True) - _order_test("guest_access", [room_id_1, room_id_2, room_id_3]) - _order_test("guest_access", [room_id_1, room_id_2, room_id_3], reverse=True) + _order_test("guest_access", sorted_by_room_id_asc) + _order_test("guest_access", sorted_by_room_id_desc, reverse=True) - _order_test("history_visibility", [room_id_1, room_id_2, room_id_3]) - _order_test( - "history_visibility", [room_id_1, room_id_2, room_id_3], reverse=True - ) + _order_test("history_visibility", sorted_by_room_id_asc) + _order_test("history_visibility", sorted_by_room_id_desc, reverse=True) + # Note: state_event counts are sorted in descending order when dir=f _order_test("state_events", [room_id_3, room_id_2, room_id_1]) _order_test("state_events", [room_id_1, room_id_2, room_id_3], reverse=True) -- cgit 1.5.1 From 68acb0a29dcb03a0ecbcebdb95e09c5999598f42 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 18 Jan 2022 11:38:57 -0500 Subject: Include whether the requesting user has participated in a thread. (#11577) Per updates to MSC3440. This is implement as a separate method since it needs to be cached on a per-user basis, instead of a per-thread basis. --- changelog.d/11577.feature | 1 + synapse/handlers/pagination.py | 2 +- synapse/handlers/room.py | 12 ++++-- synapse/handlers/sync.py | 4 +- synapse/rest/client/relations.py | 4 +- synapse/rest/client/room.py | 4 +- synapse/storage/databases/main/events.py | 7 +++ synapse/storage/databases/main/relations.py | 66 ++++++++++++++++++++++++----- tests/rest/client/test_relations.py | 3 ++ 9 files changed, 85 insertions(+), 18 deletions(-) create mode 100644 changelog.d/11577.feature (limited to 'tests/rest') diff --git a/changelog.d/11577.feature b/changelog.d/11577.feature new file mode 100644 index 0000000000..f9c8a0d5f4 --- /dev/null +++ b/changelog.d/11577.feature @@ -0,0 +1 @@ +Include whether the requesting user has participated in a thread when generating a summary for [MSC3440](https://github.com/matrix-org/matrix-doc/pull/3440). diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 472688f045..973f262964 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -537,7 +537,7 @@ class PaginationHandler: state_dict = await self.store.get_events(list(state_ids.values())) state = state_dict.values() - aggregations = await self.store.get_bundled_aggregations(events) + aggregations = await self.store.get_bundled_aggregations(events, user_id) time_now = self.clock.time_msec() diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 3d47163f25..f963078e59 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1182,12 +1182,18 @@ class RoomContextHandler: results["event"] = filtered[0] # Fetch the aggregations. - aggregations = await self.store.get_bundled_aggregations([results["event"]]) + aggregations = await self.store.get_bundled_aggregations( + [results["event"]], user.to_string() + ) aggregations.update( - await self.store.get_bundled_aggregations(results["events_before"]) + await self.store.get_bundled_aggregations( + results["events_before"], user.to_string() + ) ) aggregations.update( - await self.store.get_bundled_aggregations(results["events_after"]) + await self.store.get_bundled_aggregations( + results["events_after"], user.to_string() + ) ) results["aggregations"] = aggregations diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e1df9b3106..ffc6b748e8 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -637,7 +637,9 @@ class SyncHandler: # as clients will have all the necessary information. bundled_aggregations = None if limited or newly_joined_room: - bundled_aggregations = await self.store.get_bundled_aggregations(recents) + bundled_aggregations = await self.store.get_bundled_aggregations( + recents, sync_config.user.to_string() + ) return TimelineBatch( events=recents, diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index 37d949a71e..8cf5ebaa07 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -118,7 +118,9 @@ class RelationPaginationServlet(RestServlet): ) # The relations returned for the requested event do include their # bundled aggregations. - aggregations = await self.store.get_bundled_aggregations(events) + aggregations = await self.store.get_bundled_aggregations( + events, requester.user.to_string() + ) serialized_events = self._event_serializer.serialize_events( events, now, bundle_aggregations=aggregations ) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index da6014900a..31fd329a38 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -663,7 +663,9 @@ class RoomEventServlet(RestServlet): if event: # Ensure there are bundled aggregations available. - aggregations = await self._store.get_bundled_aggregations([event]) + aggregations = await self._store.get_bundled_aggregations( + [event], requester.user.to_string() + ) time_now = self.clock.time_msec() event_dict = self._event_serializer.serialize_event( diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 2be36a741a..7278002322 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1793,6 +1793,13 @@ class PersistEventsStore: txn.call_after( self.store.get_thread_summary.invalidate, (parent_id, event.room_id) ) + # It should be safe to only invalidate the cache if the user has not + # previously participated in the thread, but that's difficult (and + # potentially error-prone) so it is always invalidated. + txn.call_after( + self.store.get_thread_participated.invalidate, + (parent_id, event.room_id, event.sender), + ) def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): """Handles keeping track of insertion events and edges/connections. diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index c6c4bd18da..2cb5d06c13 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -384,8 +384,7 @@ class RelationsWorkerStore(SQLBaseStore): async def get_thread_summary( self, event_id: str, room_id: str ) -> Tuple[int, Optional[EventBase]]: - """Get the number of threaded replies, the senders of those replies, and - the latest reply (if any) for the given event. + """Get the number of threaded replies and the latest reply (if any) for the given event. Args: event_id: Summarize the thread related to this event ID. @@ -398,7 +397,7 @@ class RelationsWorkerStore(SQLBaseStore): def _get_thread_summary_txn( txn: LoggingTransaction, ) -> Tuple[int, Optional[str]]: - # Fetch the count of threaded events and the latest event ID. + # Fetch the latest event ID in the thread. # TODO Should this only allow m.room.message events. sql = """ SELECT event_id @@ -419,6 +418,7 @@ class RelationsWorkerStore(SQLBaseStore): latest_event_id = row[0] + # Fetch the number of threaded replies. sql = """ SELECT COUNT(event_id) FROM event_relations @@ -443,6 +443,44 @@ class RelationsWorkerStore(SQLBaseStore): return count, latest_event + @cached() + async def get_thread_participated( + self, event_id: str, room_id: str, user_id: str + ) -> bool: + """Get whether the requesting user participated in a thread. + + This is separate from get_thread_summary since that can be cached across + all users while this value is specific to the requeser. + + Args: + event_id: The thread related to this event ID. + room_id: The room the event belongs to. + user_id: The user requesting the summary. + + Returns: + True if the requesting user participated in the thread, otherwise false. + """ + + def _get_thread_summary_txn(txn: LoggingTransaction) -> bool: + # Fetch whether the requester has participated or not. + sql = """ + SELECT 1 + FROM event_relations + INNER JOIN events USING (event_id) + WHERE + relates_to_id = ? + AND room_id = ? + AND relation_type = ? + AND sender = ? + """ + + txn.execute(sql, (event_id, room_id, RelationTypes.THREAD, user_id)) + return bool(txn.fetchone()) + + return await self.db_pool.runInteraction( + "get_thread_summary", _get_thread_summary_txn + ) + async def events_have_relations( self, parent_ids: List[str], @@ -546,7 +584,7 @@ class RelationsWorkerStore(SQLBaseStore): ) async def _get_bundled_aggregation_for_event( - self, event: EventBase + self, event: EventBase, user_id: str ) -> Optional[Dict[str, Any]]: """Generate bundled aggregations for an event. @@ -554,6 +592,7 @@ class RelationsWorkerStore(SQLBaseStore): Args: event: The event to calculate bundled aggregations for. + user_id: The user requesting the bundled aggregations. Returns: The bundled aggregations for an event, if bundled aggregations are @@ -598,27 +637,32 @@ class RelationsWorkerStore(SQLBaseStore): # If this event is the start of a thread, include a summary of the replies. if self._msc3440_enabled: - ( - thread_count, - latest_thread_event, - ) = await self.get_thread_summary(event_id, room_id) + thread_count, latest_thread_event = await self.get_thread_summary( + event_id, room_id + ) + participated = await self.get_thread_participated( + event_id, room_id, user_id + ) if latest_thread_event: aggregations[RelationTypes.THREAD] = { - # Don't bundle aggregations as this could recurse forever. "latest_event": latest_thread_event, "count": thread_count, + "current_user_participated": participated, } # Store the bundled aggregations in the event metadata for later use. return aggregations async def get_bundled_aggregations( - self, events: Iterable[EventBase] + self, + events: Iterable[EventBase], + user_id: str, ) -> Dict[str, Dict[str, Any]]: """Generate bundled aggregations for events. Args: events: The iterable of events to calculate bundled aggregations for. + user_id: The user requesting the bundled aggregations. Returns: A map of event ID to the bundled aggregation for the event. Not all @@ -631,7 +675,7 @@ class RelationsWorkerStore(SQLBaseStore): # TODO Parallelize. results = {} for event in events: - event_result = await self._get_bundled_aggregation_for_event(event) + event_result = await self._get_bundled_aggregation_for_event(event, user_id) if event_result is not None: results[event.event_id] = event_result diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index ee26751430..4b20ab0e3e 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -515,6 +515,9 @@ class RelationsTestCase(unittest.HomeserverTestCase): 2, actual[RelationTypes.THREAD].get("count"), ) + self.assertTrue( + actual[RelationTypes.THREAD].get("current_user_participated") + ) # The latest thread event has some fields that don't matter. self.assert_dict( { -- cgit 1.5.1 From 7a11509d17f56b8711a3bda11491b3a5a42a2f67 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 21 Jan 2022 05:31:31 -0500 Subject: Do not try to serialize raw aggregations dict. (#11791) --- changelog.d/11612.bugfix | 1 + changelog.d/11612.misc | 1 - changelog.d/11791.bugfix | 1 + synapse/events/utils.py | 4 +- synapse/rest/admin/rooms.py | 13 ++--- synapse/rest/client/room.py | 11 ++-- tests/rest/client/test_relations.py | 108 ++++++++++++++++++++++++------------ 7 files changed, 85 insertions(+), 54 deletions(-) create mode 100644 changelog.d/11612.bugfix delete mode 100644 changelog.d/11612.misc create mode 100644 changelog.d/11791.bugfix (limited to 'tests/rest') diff --git a/changelog.d/11612.bugfix b/changelog.d/11612.bugfix new file mode 100644 index 0000000000..842f6892fd --- /dev/null +++ b/changelog.d/11612.bugfix @@ -0,0 +1 @@ +Include the bundled aggregations in the `/sync` response, per [MSC2675](https://github.com/matrix-org/matrix-doc/pull/2675). diff --git a/changelog.d/11612.misc b/changelog.d/11612.misc deleted file mode 100644 index 2d886169c5..0000000000 --- a/changelog.d/11612.misc +++ /dev/null @@ -1 +0,0 @@ -Avoid database access in the JSON serialization process. diff --git a/changelog.d/11791.bugfix b/changelog.d/11791.bugfix new file mode 100644 index 0000000000..842f6892fd --- /dev/null +++ b/changelog.d/11791.bugfix @@ -0,0 +1 @@ +Include the bundled aggregations in the `/sync` response, per [MSC2675](https://github.com/matrix-org/matrix-doc/pull/2675). diff --git a/synapse/events/utils.py b/synapse/events/utils.py index de0e0c1731..918adeecf8 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -402,7 +402,7 @@ class EventClientSerializer: if bundle_aggregations: event_aggregations = bundle_aggregations.get(event.event_id) if event_aggregations: - self._injected_bundled_aggregations( + self._inject_bundled_aggregations( event, time_now, bundle_aggregations[event.event_id], @@ -411,7 +411,7 @@ class EventClientSerializer: return serialized_event - def _injected_bundled_aggregations( + def _inject_bundled_aggregations( self, event: EventBase, time_now: int, diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 2e714ac87b..efe25fe7eb 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -744,20 +744,15 @@ class RoomEventContextServlet(RestServlet): ) time_now = self.clock.time_msec() + aggregations = results.pop("aggregations", None) results["events_before"] = self._event_serializer.serialize_events( - results["events_before"], - time_now, - bundle_aggregations=results["aggregations"], + results["events_before"], time_now, bundle_aggregations=aggregations ) results["event"] = self._event_serializer.serialize_event( - results["event"], - time_now, - bundle_aggregations=results["aggregations"], + results["event"], time_now, bundle_aggregations=aggregations ) results["events_after"] = self._event_serializer.serialize_events( - results["events_after"], - time_now, - bundle_aggregations=results["aggregations"], + results["events_after"], time_now, bundle_aggregations=aggregations ) results["state"] = self._event_serializer.serialize_events( results["state"], time_now diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 31fd329a38..90bb9142a0 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -714,18 +714,15 @@ class RoomEventContextServlet(RestServlet): raise SynapseError(404, "Event not found.", errcode=Codes.NOT_FOUND) time_now = self.clock.time_msec() + aggregations = results.pop("aggregations", None) results["events_before"] = self._event_serializer.serialize_events( - results["events_before"], - time_now, - bundle_aggregations=results["aggregations"], + results["events_before"], time_now, bundle_aggregations=aggregations ) results["event"] = self._event_serializer.serialize_event( - results["event"], time_now, bundle_aggregations=results["aggregations"] + results["event"], time_now, bundle_aggregations=aggregations ) results["events_after"] = self._event_serializer.serialize_events( - results["events_after"], - time_now, - bundle_aggregations=results["aggregations"], + results["events_after"], time_now, bundle_aggregations=aggregations ) results["state"] = self._event_serializer.serialize_events( results["state"], time_now diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 4b20ab0e3e..c9b220e73d 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -21,6 +21,7 @@ from unittest.mock import patch from synapse.api.constants import EventTypes, RelationTypes from synapse.rest import admin from synapse.rest.client import login, register, relations, room, sync +from synapse.types import JsonDict from tests import unittest from tests.server import FakeChannel @@ -454,7 +455,14 @@ class RelationsTestCase(unittest.HomeserverTestCase): @unittest.override_config({"experimental_features": {"msc3440_enabled": True}}) def test_bundled_aggregations(self): - """Test that annotations, references, and threads get correctly bundled.""" + """ + Test that annotations, references, and threads get correctly bundled. + + Note that this doesn't test against /relations since only thread relations + get bundled via that API. See test_aggregation_get_event_for_thread. + + See test_edit for a similar test for edits. + """ # Setup by sending a variety of relations. channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") self.assertEquals(200, channel.code, channel.json_body) @@ -482,12 +490,13 @@ class RelationsTestCase(unittest.HomeserverTestCase): self.assertEquals(200, channel.code, channel.json_body) thread_2 = channel.json_body["event_id"] - def assert_bundle(actual): + def assert_bundle(event_json: JsonDict) -> None: """Assert the expected values of the bundled aggregations.""" + relations_dict = event_json["unsigned"].get("m.relations") # Ensure the fields are as expected. self.assertCountEqual( - actual.keys(), + relations_dict.keys(), ( RelationTypes.ANNOTATION, RelationTypes.REFERENCE, @@ -503,20 +512,20 @@ class RelationsTestCase(unittest.HomeserverTestCase): {"type": "m.reaction", "key": "b", "count": 1}, ] }, - actual[RelationTypes.ANNOTATION], + relations_dict[RelationTypes.ANNOTATION], ) self.assertEquals( {"chunk": [{"event_id": reply_1}, {"event_id": reply_2}]}, - actual[RelationTypes.REFERENCE], + relations_dict[RelationTypes.REFERENCE], ) self.assertEquals( 2, - actual[RelationTypes.THREAD].get("count"), + relations_dict[RelationTypes.THREAD].get("count"), ) self.assertTrue( - actual[RelationTypes.THREAD].get("current_user_participated") + relations_dict[RelationTypes.THREAD].get("current_user_participated") ) # The latest thread event has some fields that don't matter. self.assert_dict( @@ -533,20 +542,9 @@ class RelationsTestCase(unittest.HomeserverTestCase): "type": "m.room.test", "user_id": self.user_id, }, - actual[RelationTypes.THREAD].get("latest_event"), + relations_dict[RelationTypes.THREAD].get("latest_event"), ) - def _find_and_assert_event(events): - """ - Find the parent event in a chunk of events and assert that it has the proper bundled aggregations. - """ - for event in events: - if event["event_id"] == self.parent_id: - break - else: - raise AssertionError(f"Event {self.parent_id} not found in chunk") - assert_bundle(event["unsigned"].get("m.relations")) - # Request the event directly. channel = self.make_request( "GET", @@ -554,7 +552,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) - assert_bundle(channel.json_body["unsigned"].get("m.relations")) + assert_bundle(channel.json_body) # Request the room messages. channel = self.make_request( @@ -563,7 +561,7 @@ class RelationsTestCase(unittest.HomeserverTestCase): access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) - _find_and_assert_event(channel.json_body["chunk"]) + assert_bundle(self._find_event_in_chunk(channel.json_body["chunk"])) # Request the room context. channel = self.make_request( @@ -572,17 +570,14 @@ class RelationsTestCase(unittest.HomeserverTestCase): access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) - assert_bundle(channel.json_body["event"]["unsigned"].get("m.relations")) + assert_bundle(channel.json_body["event"]) # Request sync. channel = self.make_request("GET", "/sync", access_token=self.user_token) self.assertEquals(200, channel.code, channel.json_body) room_timeline = channel.json_body["rooms"]["join"][self.room]["timeline"] self.assertTrue(room_timeline["limited"]) - _find_and_assert_event(room_timeline["events"]) - - # Note that /relations is tested separately in test_aggregation_get_event_for_thread - # since it needs different data configured. + self._find_event_in_chunk(room_timeline["events"]) def test_aggregation_get_event_for_annotation(self): """Test that annotations do not get bundled aggregations included @@ -777,25 +772,58 @@ class RelationsTestCase(unittest.HomeserverTestCase): edit_event_id = channel.json_body["event_id"] + def assert_bundle(event_json: JsonDict) -> None: + """Assert the expected values of the bundled aggregations.""" + relations_dict = event_json["unsigned"].get("m.relations") + self.assertIn(RelationTypes.REPLACE, relations_dict) + + m_replace_dict = relations_dict[RelationTypes.REPLACE] + for key in ["event_id", "sender", "origin_server_ts"]: + self.assertIn(key, m_replace_dict) + + self.assert_dict( + {"event_id": edit_event_id, "sender": self.user_id}, m_replace_dict + ) + channel = self.make_request( "GET", - "/rooms/%s/event/%s" % (self.room, self.parent_id), + f"/rooms/{self.room}/event/{self.parent_id}", access_token=self.user_token, ) self.assertEquals(200, channel.code, channel.json_body) - self.assertEquals(channel.json_body["content"], new_body) + assert_bundle(channel.json_body) - relations_dict = channel.json_body["unsigned"].get("m.relations") - self.assertIn(RelationTypes.REPLACE, relations_dict) + # Request the room messages. + channel = self.make_request( + "GET", + f"/rooms/{self.room}/messages?dir=b", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + assert_bundle(self._find_event_in_chunk(channel.json_body["chunk"])) - m_replace_dict = relations_dict[RelationTypes.REPLACE] - for key in ["event_id", "sender", "origin_server_ts"]: - self.assertIn(key, m_replace_dict) + # Request the room context. + channel = self.make_request( + "GET", + f"/rooms/{self.room}/context/{self.parent_id}", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + assert_bundle(channel.json_body["event"]) - self.assert_dict( - {"event_id": edit_event_id, "sender": self.user_id}, m_replace_dict + # Request sync, but limit the timeline so it becomes limited (and includes + # bundled aggregations). + filter = urllib.parse.quote_plus( + '{"room": {"timeline": {"limit": 2}}}'.encode() + ) + channel = self.make_request( + "GET", f"/sync?filter={filter}", access_token=self.user_token ) + self.assertEquals(200, channel.code, channel.json_body) + room_timeline = channel.json_body["rooms"]["join"][self.room]["timeline"] + self.assertTrue(room_timeline["limited"]) + assert_bundle(self._find_event_in_chunk(room_timeline["events"])) def test_multi_edit(self): """Test that multiple edits, including attempts by people who @@ -1102,6 +1130,16 @@ class RelationsTestCase(unittest.HomeserverTestCase): self.assertEquals(200, channel.code, channel.json_body) self.assertEquals(channel.json_body["chunk"], []) + def _find_event_in_chunk(self, events: List[JsonDict]) -> JsonDict: + """ + Find the parent event in a chunk of events and assert that it has the proper bundled aggregations. + """ + for event in events: + if event["event_id"] == self.parent_id: + return event + + raise AssertionError(f"Event {self.parent_id} not found in chunk") + def _send_relation( self, relation_type: str, -- cgit 1.5.1