From 102677638002b3ef6ae956947333ddcde80680a7 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 2 Oct 2023 15:22:36 +0100 Subject: mypy plugin to check `@cached` return types (#14911) Co-authored-by: David Robertson Co-authored-by: Patrick Cloke Co-authored-by: Erik Johnston Assert that the return type of callables wrapped in @cached and @cachedList are cachable (aka immutable). --- synapse/storage/databases/main/event_push_actions.py | 7 ++++--- synapse/storage/databases/main/relations.py | 12 +++++++----- synapse/storage/databases/main/roommember.py | 5 +++-- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'synapse/storage/databases') diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index ba99e63d26..39556481ff 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -182,6 +182,7 @@ class UserPushAction(EmailPushAction): profile_tag: str +# TODO This is used as a cached value and is mutable. @attr.s(slots=True, auto_attribs=True) class NotifCounts: """ @@ -193,7 +194,7 @@ class NotifCounts: highlight_count: int = 0 -@attr.s(slots=True, auto_attribs=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class RoomNotifCounts: """ The per-user, per-room count of notifications. Used by sync and push. @@ -201,7 +202,7 @@ class RoomNotifCounts: main_timeline: NotifCounts # Map of thread ID to the notification counts. - threads: Dict[str, NotifCounts] + threads: Mapping[str, NotifCounts] @staticmethod def empty() -> "RoomNotifCounts": @@ -483,7 +484,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas return room_to_count - @cached(tree=True, max_entries=5000, iterable=True) + @cached(tree=True, max_entries=5000, iterable=True) # type: ignore[synapse-@cached-mutable] async def get_unread_event_push_actions_by_room_for_user( self, room_id: str, diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index b67f780c10..9246b418f5 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -458,7 +458,7 @@ class RelationsWorkerStore(SQLBaseStore): ) return result is not None - @cached() + @cached() # type: ignore[synapse-@cached-mutable] async def get_references_for_event(self, event_id: str) -> List[JsonDict]: raise NotImplementedError() @@ -512,11 +512,12 @@ class RelationsWorkerStore(SQLBaseStore): "_get_references_for_events_txn", _get_references_for_events_txn ) - @cached() + @cached() # type: ignore[synapse-@cached-mutable] def get_applicable_edit(self, event_id: str) -> Optional[EventBase]: raise NotImplementedError() - @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids") + # TODO: This returns a mutable object, which is generally bad. + @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids") # type: ignore[synapse-@cached-mutable] async def get_applicable_edits( self, event_ids: Collection[str] ) -> Mapping[str, Optional[EventBase]]: @@ -598,11 +599,12 @@ class RelationsWorkerStore(SQLBaseStore): for original_event_id in event_ids } - @cached() + @cached() # type: ignore[synapse-@cached-mutable] def get_thread_summary(self, event_id: str) -> Optional[Tuple[int, EventBase]]: raise NotImplementedError() - @cachedList(cached_method_name="get_thread_summary", list_name="event_ids") + # TODO: This returns a mutable object, which is generally bad. + @cachedList(cached_method_name="get_thread_summary", list_name="event_ids") # type: ignore[synapse-@cached-mutable] async def get_thread_summaries( self, event_ids: Collection[str] ) -> Mapping[str, Optional[Tuple[int, EventBase]]]: diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 3755773faa..e93573f315 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -275,7 +275,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): _get_users_in_room_with_profiles, ) - @cached(max_entries=100000) + @cached(max_entries=100000) # type: ignore[synapse-@cached-mutable] async def get_room_summary(self, room_id: str) -> Mapping[str, MemberSummary]: """Get the details of a room roughly suitable for use by the room summary extension to /sync. Useful when lazy loading room members. @@ -1071,7 +1071,8 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): ) return {row["event_id"]: row["membership"] for row in rows} - @cached(max_entries=10000) + # TODO This returns a mutable object, which is generally confusing when using a cache. + @cached(max_entries=10000) # type: ignore[synapse-@cached-mutable] def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache": return _JoinedHostsCache() -- cgit 1.5.1 From 80ec81dcc54bdb823b95c2f870a919868de9a481 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 4 Oct 2023 18:28:40 +0300 Subject: Some refactors around receipts stream (#16426) --- changelog.d/16426.misc | 1 + synapse/handlers/appservice.py | 4 +- synapse/handlers/push_rules.py | 6 ++- synapse/handlers/receipts.py | 25 +++++------ synapse/notifier.py | 17 ++++--- synapse/push/__init__.py | 2 +- synapse/push/emailpusher.py | 2 +- synapse/push/httppusher.py | 2 +- synapse/push/pusherpool.py | 12 +---- synapse/replication/tcp/client.py | 4 +- synapse/storage/databases/main/e2e_room_keys.py | 2 +- synapse/storage/databases/main/receipts.py | 6 +-- synapse/streams/events.py | 15 ++++--- synapse/types/__init__.py | 59 ++++++++++++++++++------- tests/handlers/test_appservice.py | 8 ++-- tests/handlers/test_typing.py | 26 ++++++++--- 16 files changed, 111 insertions(+), 80 deletions(-) create mode 100644 changelog.d/16426.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16426.misc b/changelog.d/16426.misc new file mode 100644 index 0000000000..208a007171 --- /dev/null +++ b/changelog.d/16426.misc @@ -0,0 +1 @@ +Refactor some code to simplify and better type receipts stream adjacent code. diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 7de7bd3289..c200a45f3a 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -216,7 +216,7 @@ class ApplicationServicesHandler: def notify_interested_services_ephemeral( self, - stream_key: str, + stream_key: StreamKeyType, new_token: Union[int, RoomStreamToken], users: Collection[Union[str, UserID]], ) -> None: @@ -326,7 +326,7 @@ class ApplicationServicesHandler: async def _notify_interested_services_ephemeral( self, services: List[ApplicationService], - stream_key: str, + stream_key: StreamKeyType, new_token: int, users: Collection[Union[str, UserID]], ) -> None: diff --git a/synapse/handlers/push_rules.py b/synapse/handlers/push_rules.py index 7ed88a3611..87b428ab1c 100644 --- a/synapse/handlers/push_rules.py +++ b/synapse/handlers/push_rules.py @@ -19,7 +19,7 @@ from synapse.api.errors import SynapseError, UnrecognizedRequestError from synapse.push.clientformat import format_push_rules_for_user from synapse.storage.push_rule import RuleNotFoundException from synapse.synapse_rust.push import get_base_rule_ids -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict, StreamKeyType, UserID if TYPE_CHECKING: from synapse.server import HomeServer @@ -114,7 +114,9 @@ class PushRulesHandler: user_id: the user ID the change is for. """ stream_id = self._main_store.get_max_push_rules_stream_id() - self._notifier.on_new_event("push_rules_key", stream_id, users=[user_id]) + self._notifier.on_new_event( + StreamKeyType.PUSH_RULES, stream_id, users=[user_id] + ) async def push_rules_for_user( self, user: UserID diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index a7a29b758b..69ac468f75 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -130,11 +130,10 @@ class ReceiptsHandler: async def _handle_new_receipts(self, receipts: List[ReadReceipt]) -> bool: """Takes a list of receipts, stores them and informs the notifier.""" - min_batch_id: Optional[int] = None - max_batch_id: Optional[int] = None + receipts_persisted: List[ReadReceipt] = [] for receipt in receipts: - res = await self.store.insert_receipt( + stream_id = await self.store.insert_receipt( receipt.room_id, receipt.receipt_type, receipt.user_id, @@ -143,30 +142,26 @@ class ReceiptsHandler: receipt.data, ) - if not res: - # res will be None if this receipt is 'old' + if stream_id is None: + # stream_id will be None if this receipt is 'old' continue - stream_id, max_persisted_id = res + receipts_persisted.append(receipt) - if min_batch_id is None or stream_id < min_batch_id: - min_batch_id = stream_id - if max_batch_id is None or max_persisted_id > max_batch_id: - max_batch_id = max_persisted_id - - # Either both of these should be None or neither. - if min_batch_id is None or max_batch_id is None: + if not receipts_persisted: # no new receipts return False - affected_room_ids = list({r.room_id for r in receipts}) + max_batch_id = self.store.get_max_receipt_stream_id() + + affected_room_ids = list({r.room_id for r in receipts_persisted}) self.notifier.on_new_event( StreamKeyType.RECEIPT, max_batch_id, rooms=affected_room_ids ) # Note that the min here shouldn't be relied upon to be accurate. await self.hs.get_pusherpool().on_new_receipts( - min_batch_id, max_batch_id, affected_room_ids + {r.user_id for r in receipts_persisted} ) return True diff --git a/synapse/notifier.py b/synapse/notifier.py index fc39e5c963..99e7715896 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -126,7 +126,7 @@ class _NotifierUserStream: def notify( self, - stream_key: str, + stream_key: StreamKeyType, stream_id: Union[int, RoomStreamToken], time_now_ms: int, ) -> None: @@ -454,7 +454,7 @@ class Notifier: def on_new_event( self, - stream_key: str, + stream_key: StreamKeyType, new_token: Union[int, RoomStreamToken], users: Optional[Collection[Union[str, UserID]]] = None, rooms: Optional[StrCollection] = None, @@ -655,30 +655,29 @@ class Notifier: events: List[Union[JsonDict, EventBase]] = [] end_token = from_token - for name, source in self.event_sources.sources.get_sources(): - keyname = "%s_key" % name - before_id = getattr(before_token, keyname) - after_id = getattr(after_token, keyname) + for keyname, source in self.event_sources.sources.get_sources(): + before_id = before_token.get_field(keyname) + after_id = after_token.get_field(keyname) if before_id == after_id: continue new_events, new_key = await source.get_new_events( user=user, - from_key=getattr(from_token, keyname), + from_key=from_token.get_field(keyname), limit=limit, is_guest=is_peeking, room_ids=room_ids, explicit_room_id=explicit_room_id, ) - if name == "room": + if keyname == StreamKeyType.ROOM: new_events = await filter_events_for_client( self._storage_controllers, user.to_string(), new_events, is_peeking=is_peeking, ) - elif name == "presence": + elif keyname == StreamKeyType.PRESENCE: now = self.clock.time_msec() new_events[:] = [ { diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 9e3a98741a..9e5eb2a445 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -182,7 +182,7 @@ class Pusher(metaclass=abc.ABCMeta): raise NotImplementedError() @abc.abstractmethod - def on_new_receipts(self, min_stream_id: int, max_stream_id: int) -> None: + def on_new_receipts(self) -> None: raise NotImplementedError() @abc.abstractmethod diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index 1710dd51b9..cf45fd09a8 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -99,7 +99,7 @@ class EmailPusher(Pusher): pass self.timed_call = None - def on_new_receipts(self, min_stream_id: int, max_stream_id: int) -> None: + def on_new_receipts(self) -> None: # We could wake up and cancel the timer but there tend to be quite a # lot of read receipts so it's probably less work to just let the # timer fire diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 50027680cb..725910a659 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -160,7 +160,7 @@ class HttpPusher(Pusher): if should_check_for_notifs: self._start_processing() - def on_new_receipts(self, min_stream_id: int, max_stream_id: int) -> None: + def on_new_receipts(self) -> None: # Note that the min here shouldn't be relied upon to be accurate. # We could check the receipts are actually m.read receipts here, diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 6517e3566f..15a2cc932f 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -292,20 +292,12 @@ class PusherPool: except Exception: logger.exception("Exception in pusher on_new_notifications") - async def on_new_receipts( - self, min_stream_id: int, max_stream_id: int, affected_room_ids: Iterable[str] - ) -> None: + async def on_new_receipts(self, users_affected: StrCollection) -> None: if not self.pushers: # nothing to do here. return try: - # Need to subtract 1 from the minimum because the lower bound here - # is not inclusive - users_affected = await self.store.get_users_sent_receipts_between( - min_stream_id - 1, max_stream_id - ) - for u in users_affected: # Don't push if the user account has expired expired = await self._account_validity_handler.is_user_expired(u) @@ -314,7 +306,7 @@ class PusherPool: if u in self.pushers: for p in self.pushers[u].values(): - p.on_new_receipts(min_stream_id, max_stream_id) + p.on_new_receipts() except Exception: logger.exception("Exception in pusher on_new_receipts") diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index f4f2b29e96..d5337fe588 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -129,9 +129,7 @@ class ReplicationDataHandler: self.notifier.on_new_event( StreamKeyType.RECEIPT, token, rooms=[row.room_id for row in rows] ) - await self._pusher_pool.on_new_receipts( - token, token, {row.room_id for row in rows} - ) + await self._pusher_pool.on_new_receipts({row.user_id for row in rows}) elif stream_name == ToDeviceStream.NAME: entities = [row.entity for row in rows if row.entity.startswith("@")] if entities: diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py index d01f28cc80..bc7c6a6346 100644 --- a/synapse/storage/databases/main/e2e_room_keys.py +++ b/synapse/storage/databases/main/e2e_room_keys.py @@ -208,7 +208,7 @@ class EndToEndRoomKeyStore(EndToEndRoomKeyBackgroundStore): "message": "Set room key", "room_id": room_id, "session_id": session_id, - StreamKeyType.ROOM: room_key, + StreamKeyType.ROOM.value: room_key, } ) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 0231f9407b..3bab1024ea 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -742,7 +742,7 @@ class ReceiptsWorkerStore(SQLBaseStore): event_ids: List[str], thread_id: Optional[str], data: dict, - ) -> Optional[Tuple[int, int]]: + ) -> Optional[int]: """Insert a receipt, either from local client or remote server. Automatically does conversion between linearized and graph @@ -804,9 +804,7 @@ class ReceiptsWorkerStore(SQLBaseStore): data, ) - max_persisted_id = self._receipts_id_gen.get_current_token() - - return stream_id, max_persisted_id + return stream_id async def _insert_graph_receipt( self, diff --git a/synapse/streams/events.py b/synapse/streams/events.py index d7084d2358..609a0978a9 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Iterator, Tuple +from typing import TYPE_CHECKING, Sequence, Tuple import attr @@ -23,7 +23,7 @@ from synapse.handlers.room import RoomEventSource from synapse.handlers.typing import TypingNotificationEventSource from synapse.logging.opentracing import trace from synapse.streams import EventSource -from synapse.types import StreamToken +from synapse.types import StreamKeyType, StreamToken if TYPE_CHECKING: from synapse.server import HomeServer @@ -37,9 +37,14 @@ class _EventSourcesInner: receipt: ReceiptEventSource account_data: AccountDataEventSource - def get_sources(self) -> Iterator[Tuple[str, EventSource]]: - for attribute in attr.fields(_EventSourcesInner): - yield attribute.name, getattr(self, attribute.name) + def get_sources(self) -> Sequence[Tuple[StreamKeyType, EventSource]]: + return [ + (StreamKeyType.ROOM, self.room), + (StreamKeyType.PRESENCE, self.presence), + (StreamKeyType.TYPING, self.typing), + (StreamKeyType.RECEIPT, self.receipt), + (StreamKeyType.ACCOUNT_DATA, self.account_data), + ] class EventSources: diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 76b0e3e694..406d5b1611 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -22,8 +22,8 @@ from typing import ( Any, ClassVar, Dict, - Final, List, + Literal, Mapping, Match, MutableMapping, @@ -34,6 +34,7 @@ from typing import ( Type, TypeVar, Union, + overload, ) import attr @@ -649,20 +650,20 @@ class RoomStreamToken: return "s%d" % (self.stream,) -class StreamKeyType: +class StreamKeyType(Enum): """Known stream types. A stream is a list of entities ordered by an incrementing "stream token". """ - ROOM: Final = "room_key" - PRESENCE: Final = "presence_key" - TYPING: Final = "typing_key" - RECEIPT: Final = "receipt_key" - ACCOUNT_DATA: Final = "account_data_key" - PUSH_RULES: Final = "push_rules_key" - TO_DEVICE: Final = "to_device_key" - DEVICE_LIST: Final = "device_list_key" + ROOM = "room_key" + PRESENCE = "presence_key" + TYPING = "typing_key" + RECEIPT = "receipt_key" + ACCOUNT_DATA = "account_data_key" + PUSH_RULES = "push_rules_key" + TO_DEVICE = "to_device_key" + DEVICE_LIST = "device_list_key" UN_PARTIAL_STATED_ROOMS = "un_partial_stated_rooms_key" @@ -784,7 +785,7 @@ class StreamToken: def room_stream_id(self) -> int: return self.room_key.stream - def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken": + def copy_and_advance(self, key: StreamKeyType, new_value: Any) -> "StreamToken": """Advance the given key in the token to a new value if and only if the new value is after the old value. @@ -797,16 +798,44 @@ class StreamToken: return new_token new_token = self.copy_and_replace(key, new_value) - new_id = int(getattr(new_token, key)) - old_id = int(getattr(self, key)) + new_id = new_token.get_field(key) + old_id = self.get_field(key) if old_id < new_id: return new_token else: return self - def copy_and_replace(self, key: str, new_value: Any) -> "StreamToken": - return attr.evolve(self, **{key: new_value}) + def copy_and_replace(self, key: StreamKeyType, new_value: Any) -> "StreamToken": + return attr.evolve(self, **{key.value: new_value}) + + @overload + def get_field(self, key: Literal[StreamKeyType.ROOM]) -> RoomStreamToken: + ... + + @overload + def get_field( + self, + key: Literal[ + StreamKeyType.ACCOUNT_DATA, + StreamKeyType.DEVICE_LIST, + StreamKeyType.PRESENCE, + StreamKeyType.PUSH_RULES, + StreamKeyType.RECEIPT, + StreamKeyType.TO_DEVICE, + StreamKeyType.TYPING, + StreamKeyType.UN_PARTIAL_STATED_ROOMS, + ], + ) -> int: + ... + + @overload + def get_field(self, key: StreamKeyType) -> Union[int, RoomStreamToken]: + ... + + def get_field(self, key: StreamKeyType) -> Union[int, RoomStreamToken]: + """Returns the stream ID for the given key.""" + return getattr(self, key.value) StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0, 0) diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index a7e6cdd66a..8ce6ccf529 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -31,7 +31,7 @@ from synapse.appservice import ( from synapse.handlers.appservice import ApplicationServicesHandler from synapse.rest.client import login, receipts, register, room, sendtodevice from synapse.server import HomeServer -from synapse.types import JsonDict, RoomStreamToken +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType from synapse.util import Clock from synapse.util.stringutils import random_string @@ -304,7 +304,7 @@ class AppServiceHandlerTestCase(unittest.TestCase): ) self.handler.notify_interested_services_ephemeral( - "receipt_key", 580, ["@fakerecipient:example.com"] + StreamKeyType.RECEIPT, 580, ["@fakerecipient:example.com"] ) self.mock_scheduler.enqueue_for_appservice.assert_called_once_with( interested_service, ephemeral=[event] @@ -332,7 +332,7 @@ class AppServiceHandlerTestCase(unittest.TestCase): ) self.handler.notify_interested_services_ephemeral( - "receipt_key", 580, ["@fakerecipient:example.com"] + StreamKeyType.RECEIPT, 580, ["@fakerecipient:example.com"] ) # This method will be called, but with an empty list of events self.mock_scheduler.enqueue_for_appservice.assert_called_once_with( @@ -634,7 +634,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase): self.get_success( self.hs.get_application_service_handler()._notify_interested_services_ephemeral( services=[interested_appservice], - stream_key="receipt_key", + stream_key=StreamKeyType.RECEIPT, new_token=stream_token, users=[self.exclusive_as_user], ) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 95106ec8f3..3060bc9744 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -28,7 +28,7 @@ from synapse.federation.transport.server import TransportLayerServer from synapse.handlers.typing import TypingWriterHandler from synapse.http.federation.matrix_federation_agent import MatrixFederationAgent from synapse.server import HomeServer -from synapse.types import JsonDict, Requester, UserID, create_requester +from synapse.types import JsonDict, Requester, StreamKeyType, UserID, create_requester from synapse.util import Clock from tests import unittest @@ -203,7 +203,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): ) ) - self.on_new_event.assert_has_calls([call("typing_key", 1, rooms=[ROOM_ID])]) + self.on_new_event.assert_has_calls( + [call(StreamKeyType.TYPING, 1, rooms=[ROOM_ID])] + ) self.assertEqual(self.event_source.get_current_key(), 1) events = self.get_success( @@ -273,7 +275,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, 200) - self.on_new_event.assert_has_calls([call("typing_key", 1, rooms=[ROOM_ID])]) + self.on_new_event.assert_has_calls( + [call(StreamKeyType.TYPING, 1, rooms=[ROOM_ID])] + ) self.assertEqual(self.event_source.get_current_key(), 1) events = self.get_success( @@ -349,7 +353,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): ) ) - self.on_new_event.assert_has_calls([call("typing_key", 1, rooms=[ROOM_ID])]) + self.on_new_event.assert_has_calls( + [call(StreamKeyType.TYPING, 1, rooms=[ROOM_ID])] + ) self.mock_federation_client.put_json.assert_called_once_with( "farm", @@ -399,7 +405,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): ) ) - self.on_new_event.assert_has_calls([call("typing_key", 1, rooms=[ROOM_ID])]) + self.on_new_event.assert_has_calls( + [call(StreamKeyType.TYPING, 1, rooms=[ROOM_ID])] + ) self.on_new_event.reset_mock() self.assertEqual(self.event_source.get_current_key(), 1) @@ -425,7 +433,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.reactor.pump([16]) - self.on_new_event.assert_has_calls([call("typing_key", 2, rooms=[ROOM_ID])]) + self.on_new_event.assert_has_calls( + [call(StreamKeyType.TYPING, 2, rooms=[ROOM_ID])] + ) self.assertEqual(self.event_source.get_current_key(), 2) events = self.get_success( @@ -459,7 +469,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): ) ) - self.on_new_event.assert_has_calls([call("typing_key", 3, rooms=[ROOM_ID])]) + self.on_new_event.assert_has_calls( + [call(StreamKeyType.TYPING, 3, rooms=[ROOM_ID])] + ) self.on_new_event.reset_mock() self.assertEqual(self.event_source.get_current_key(), 3) -- cgit 1.5.1 From 009b47badfed7593cff5f8acbd61e8fddb3ca788 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 5 Oct 2023 12:46:28 +0300 Subject: Factor out `MultiWriter` token from `RoomStreamToken` (#16427) --- changelog.d/16427.misc | 1 + synapse/handlers/admin.py | 4 +- synapse/handlers/initial_sync.py | 3 +- synapse/handlers/room.py | 2 +- synapse/handlers/sync.py | 2 +- synapse/rest/admin/__init__.py | 2 +- synapse/storage/databases/main/stream.py | 22 +++--- synapse/types/__init__.py | 132 +++++++++++++++++++++---------- tests/handlers/test_appservice.py | 8 +- 9 files changed, 115 insertions(+), 61 deletions(-) create mode 100644 changelog.d/16427.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16427.misc b/changelog.d/16427.misc new file mode 100644 index 0000000000..44f0e0595e --- /dev/null +++ b/changelog.d/16427.misc @@ -0,0 +1 @@ +Factor out `MultiWriter` token from `RoomStreamToken`. diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index ba9704a065..97fd1fd427 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -171,8 +171,8 @@ class AdminHandler: else: stream_ordering = room.stream_ordering - from_key = RoomStreamToken(0, 0) - to_key = RoomStreamToken(None, stream_ordering) + from_key = RoomStreamToken(topological=0, stream=0) + to_key = RoomStreamToken(stream=stream_ordering) # Events that we've processed in this room written_events: Set[str] = set() diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 5737f8014d..c34bd7db95 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -192,8 +192,7 @@ class InitialSyncHandler: ) elif event.membership == Membership.LEAVE: room_end_token = RoomStreamToken( - None, - event.stream_ordering, + stream=event.stream_ordering, ) deferred_room_state = run_in_background( self._state_storage_controller.get_state_for_events, diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a0c3b16819..4cdf0a8502 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1708,7 +1708,7 @@ class RoomEventSource(EventSource[RoomStreamToken, EventBase]): if from_key.topological: logger.warning("Stream has topological part!!!! %r", from_key) - from_key = RoomStreamToken(None, from_key.stream) + from_key = RoomStreamToken(stream=from_key.stream) app_service = self.store.get_app_service_by_user_id(user.to_string()) if app_service: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 7bd42f635f..744e080309 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -2333,7 +2333,7 @@ class SyncHandler: continue leave_token = now_token.copy_and_replace( - StreamKeyType.ROOM, RoomStreamToken(None, event.stream_ordering) + StreamKeyType.ROOM, RoomStreamToken(stream=event.stream_ordering) ) room_entries.append( RoomSyncResultBuilder( diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index e42dade246..9bd0d764f8 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -146,7 +146,7 @@ class PurgeHistoryRestServlet(RestServlet): # RoomStreamToken expects [int] not Optional[int] assert event.internal_metadata.stream_ordering is not None room_token = RoomStreamToken( - event.depth, event.internal_metadata.stream_ordering + topological=event.depth, stream=event.internal_metadata.stream_ordering ) token = await room_token.to_string(self.store) diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 5a3611c415..ea06e4eee0 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -266,7 +266,7 @@ def generate_next_token( # when we are going backwards so we subtract one from the # stream part. last_stream_ordering -= 1 - return RoomStreamToken(last_topo_ordering, last_stream_ordering) + return RoomStreamToken(topological=last_topo_ordering, stream=last_stream_ordering) def _make_generic_sql_bound( @@ -558,7 +558,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): if p > min_pos } - return RoomStreamToken(None, min_pos, immutabledict(positions)) + return RoomStreamToken(stream=min_pos, instance_map=immutabledict(positions)) async def get_room_events_stream_for_rooms( self, @@ -708,7 +708,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ret.reverse() if rows: - key = RoomStreamToken(None, min(r.stream_ordering for r in rows)) + key = RoomStreamToken(stream=min(r.stream_ordering for r in rows)) else: # Assume we didn't get anything because there was nothing to # get. @@ -969,7 +969,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): topo = await self.db_pool.runInteraction( "_get_max_topological_txn", self._get_max_topological_txn, room_id ) - return RoomStreamToken(topo, stream_ordering) + return RoomStreamToken(topological=topo, stream=stream_ordering) @overload def get_stream_id_for_event_txn( @@ -1033,7 +1033,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): retcols=("stream_ordering", "topological_ordering"), desc="get_topological_token_for_event", ) - return RoomStreamToken(row["topological_ordering"], row["stream_ordering"]) + return RoomStreamToken( + topological=row["topological_ordering"], stream=row["stream_ordering"] + ) async def get_current_topological_token(self, room_id: str, stream_key: int) -> int: """Gets the topological token in a room after or at the given stream @@ -1114,8 +1116,8 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): else: topo = None internal = event.internal_metadata - internal.before = RoomStreamToken(topo, stream - 1) - internal.after = RoomStreamToken(topo, stream) + internal.before = RoomStreamToken(topological=topo, stream=stream - 1) + internal.after = RoomStreamToken(topological=topo, stream=stream) internal.order = (int(topo) if topo else 0, int(stream)) async def get_events_around( @@ -1191,11 +1193,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): # Paginating backwards includes the event at the token, but paginating # forward doesn't. before_token = RoomStreamToken( - results["topological_ordering"] - 1, results["stream_ordering"] + topological=results["topological_ordering"] - 1, + stream=results["stream_ordering"], ) after_token = RoomStreamToken( - results["topological_ordering"], results["stream_ordering"] + topological=results["topological_ordering"], + stream=results["stream_ordering"], ) rows, start_token = self._paginate_room_events_txn( diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 406d5b1611..09a88c86a7 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -61,6 +61,8 @@ from synapse.util.cancellation import cancellable from synapse.util.stringutils import parse_and_validate_server_name if TYPE_CHECKING: + from typing_extensions import Self + from synapse.appservice.api import ApplicationService from synapse.storage.databases.main import DataStore, PurgeEventsStore from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore @@ -437,7 +439,78 @@ def map_username_to_mxid_localpart( @attr.s(frozen=True, slots=True, order=False) -class RoomStreamToken: +class AbstractMultiWriterStreamToken(metaclass=abc.ABCMeta): + """An abstract stream token class for streams that supports multiple + writers. + + This works by keeping track of the stream position of each writer, + represented by a default `stream` attribute and a map of instance name to + stream position of any writers that are ahead of the default stream + position. + """ + + stream: int = attr.ib(validator=attr.validators.instance_of(int), kw_only=True) + + instance_map: "immutabledict[str, int]" = attr.ib( + factory=immutabledict, + validator=attr.validators.deep_mapping( + key_validator=attr.validators.instance_of(str), + value_validator=attr.validators.instance_of(int), + mapping_validator=attr.validators.instance_of(immutabledict), + ), + kw_only=True, + ) + + @classmethod + @abc.abstractmethod + async def parse(cls, store: "DataStore", string: str) -> "Self": + """Parse the string representation of the token.""" + ... + + @abc.abstractmethod + async def to_string(self, store: "DataStore") -> str: + """Serialize the token into its string representation.""" + ... + + def copy_and_advance(self, other: "Self") -> "Self": + """Return a new token such that if an event is after both this token and + the other token, then its after the returned token too. + """ + + max_stream = max(self.stream, other.stream) + + instance_map = { + instance: max( + self.instance_map.get(instance, self.stream), + other.instance_map.get(instance, other.stream), + ) + for instance in set(self.instance_map).union(other.instance_map) + } + + return attr.evolve( + self, stream=max_stream, instance_map=immutabledict(instance_map) + ) + + def get_max_stream_pos(self) -> int: + """Get the maximum stream position referenced in this token. + + The corresponding "min" position is, by definition just `self.stream`. + + This is used to handle tokens that have non-empty `instance_map`, and so + reference stream positions after the `self.stream` position. + """ + return max(self.instance_map.values(), default=self.stream) + + def get_stream_pos_for_instance(self, instance_name: str) -> int: + """Get the stream position that the given writer was at at this token.""" + + # If we don't have an entry for the instance we can assume that it was + # at `self.stream`. + return self.instance_map.get(instance_name, self.stream) + + +@attr.s(frozen=True, slots=True, order=False) +class RoomStreamToken(AbstractMultiWriterStreamToken): """Tokens are positions between events. The token "s1" comes after event 1. s0 s1 @@ -514,16 +587,8 @@ class RoomStreamToken: topological: Optional[int] = attr.ib( validator=attr.validators.optional(attr.validators.instance_of(int)), - ) - stream: int = attr.ib(validator=attr.validators.instance_of(int)) - - instance_map: "immutabledict[str, int]" = attr.ib( - factory=immutabledict, - validator=attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(str), - value_validator=attr.validators.instance_of(int), - mapping_validator=attr.validators.instance_of(immutabledict), - ), + kw_only=True, + default=None, ) def __attrs_post_init__(self) -> None: @@ -583,17 +648,7 @@ class RoomStreamToken: if self.topological or other.topological: raise Exception("Can't advance topological tokens") - max_stream = max(self.stream, other.stream) - - instance_map = { - instance: max( - self.instance_map.get(instance, self.stream), - other.instance_map.get(instance, other.stream), - ) - for instance in set(self.instance_map).union(other.instance_map) - } - - return RoomStreamToken(None, max_stream, immutabledict(instance_map)) + return super().copy_and_advance(other) def as_historical_tuple(self) -> Tuple[int, int]: """Returns a tuple of `(topological, stream)` for historical tokens. @@ -619,16 +674,6 @@ class RoomStreamToken: # at `self.stream`. return self.instance_map.get(instance_name, self.stream) - def get_max_stream_pos(self) -> int: - """Get the maximum stream position referenced in this token. - - The corresponding "min" position is, by definition just `self.stream`. - - This is used to handle tokens that have non-empty `instance_map`, and so - reference stream positions after the `self.stream` position. - """ - return max(self.instance_map.values(), default=self.stream) - async def to_string(self, store: "DataStore") -> str: if self.topological is not None: return "t%d-%d" % (self.topological, self.stream) @@ -838,23 +883,28 @@ class StreamToken: return getattr(self, key.value) -StreamToken.START = StreamToken(RoomStreamToken(None, 0), 0, 0, 0, 0, 0, 0, 0, 0, 0) +StreamToken.START = StreamToken(RoomStreamToken(stream=0), 0, 0, 0, 0, 0, 0, 0, 0, 0) @attr.s(slots=True, frozen=True, auto_attribs=True) -class PersistedEventPosition: - """Position of a newly persisted event with instance that persisted it. - - This can be used to test whether the event is persisted before or after a - RoomStreamToken. - """ +class PersistedPosition: + """Position of a newly persisted row with instance that persisted it.""" instance_name: str stream: int - def persisted_after(self, token: RoomStreamToken) -> bool: + def persisted_after(self, token: AbstractMultiWriterStreamToken) -> bool: return token.get_stream_pos_for_instance(self.instance_name) < self.stream + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class PersistedEventPosition(PersistedPosition): + """Position of a newly persisted event with instance that persisted it. + + This can be used to test whether the event is persisted before or after a + RoomStreamToken. + """ + def to_room_stream_token(self) -> RoomStreamToken: """Converts the position to a room stream token such that events persisted in the same room after this position will be after the @@ -865,7 +915,7 @@ class PersistedEventPosition: """ # Doing the naive thing satisfies the desired properties described in # the docstring. - return RoomStreamToken(None, self.stream) + return RoomStreamToken(stream=self.stream) @attr.s(slots=True, frozen=True, auto_attribs=True) diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index 8ce6ccf529..867dbd6001 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -86,7 +86,7 @@ class AppServiceHandlerTestCase(unittest.TestCase): [event], ] ) - self.handler.notify_interested_services(RoomStreamToken(None, 1)) + self.handler.notify_interested_services(RoomStreamToken(stream=1)) self.mock_scheduler.enqueue_for_appservice.assert_called_once_with( interested_service, events=[event] @@ -107,7 +107,7 @@ class AppServiceHandlerTestCase(unittest.TestCase): ] ) self.mock_store.get_events_as_list = AsyncMock(side_effect=[[event]]) - self.handler.notify_interested_services(RoomStreamToken(None, 0)) + self.handler.notify_interested_services(RoomStreamToken(stream=0)) self.mock_as_api.query_user.assert_called_once_with(services[0], user_id) @@ -126,7 +126,7 @@ class AppServiceHandlerTestCase(unittest.TestCase): ] ) - self.handler.notify_interested_services(RoomStreamToken(None, 0)) + self.handler.notify_interested_services(RoomStreamToken(stream=0)) self.assertFalse( self.mock_as_api.query_user.called, @@ -441,7 +441,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase): self.get_success( self.hs.get_application_service_handler()._notify_interested_services( RoomStreamToken( - None, self.hs.get_application_service_handler().current_max + stream=self.hs.get_application_service_handler().current_max ) ) ) -- cgit 1.5.1 From fa907025f4b263d27c2b338fb0fe86d257d74fa8 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 5 Oct 2023 11:07:38 -0400 Subject: Remove manys calls to cursor_to_dict (#16431) This avoids calling cursor_to_dict and then immediately unpacking the values in the dict for other users. By not creating the intermediate dictionary we can avoid allocating the dictionary and strings for the keys, which should generally be more performant. Additionally this improves type hints by avoid Dict[str, Any] dictionaries coming out of the database layer. --- changelog.d/16429.misc | 2 +- changelog.d/16431.misc | 1 + synapse/push/__init__.py | 2 +- synapse/storage/databases/main/account_data.py | 11 +- synapse/storage/databases/main/appservice.py | 29 ++--- synapse/storage/databases/main/devices.py | 12 +- synapse/storage/databases/main/end_to_end_keys.py | 22 +--- synapse/storage/databases/main/events.py | 9 +- synapse/storage/databases/main/presence.py | 18 ++- synapse/storage/databases/main/pusher.py | 121 +++++++++++++++----- synapse/storage/databases/main/receipts.py | 72 ++++++------ synapse/storage/databases/main/registration.py | 133 +++++++++++++--------- synapse/storage/databases/main/room.py | 42 ++++--- synapse/storage/databases/main/roommember.py | 10 +- synapse/storage/databases/main/search.py | 20 ++-- synapse/storage/databases/main/task_scheduler.py | 44 +++++-- 16 files changed, 320 insertions(+), 228 deletions(-) create mode 100644 changelog.d/16431.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16429.misc b/changelog.d/16429.misc index 4241e143be..bd7cdd42af 100644 --- a/changelog.d/16429.misc +++ b/changelog.d/16429.misc @@ -1 +1 @@ -Reduce the size of each replication command instance. +Reduce memory allocations. diff --git a/changelog.d/16431.misc b/changelog.d/16431.misc new file mode 100644 index 0000000000..bd7cdd42af --- /dev/null +++ b/changelog.d/16431.misc @@ -0,0 +1 @@ +Reduce memory allocations. diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 9e5eb2a445..4d405f2a0c 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -101,7 +101,7 @@ if TYPE_CHECKING: class PusherConfig: """Parameters necessary to configure a pusher.""" - id: Optional[str] + id: Optional[int] user_name: str profile_tag: str diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 80f146dd53..16c284807a 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -151,10 +151,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) sql += " AND content != '{}'" txn.execute(sql, (user_id,)) - rows = self.db_pool.cursor_to_dict(txn) return { - row["account_data_type"]: db_to_json(row["content"]) for row in rows + account_data_type: db_to_json(content) + for account_data_type, content in txn } return await self.db_pool.runInteraction( @@ -196,13 +196,12 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) sql += " AND content != '{}'" txn.execute(sql, (user_id,)) - rows = self.db_pool.cursor_to_dict(txn) by_room: Dict[str, Dict[str, JsonDict]] = {} - for row in rows: - room_data = by_room.setdefault(row["room_id"], {}) + for room_id, account_data_type, content in txn: + room_data = by_room.setdefault(room_id, {}) - room_data[row["account_data_type"]] = db_to_json(row["content"]) + room_data[account_data_type] = db_to_json(content) return by_room diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 0553a0621a..073a99cd84 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -14,17 +14,7 @@ # limitations under the License. import logging import re -from typing import ( - TYPE_CHECKING, - Any, - Dict, - List, - Optional, - Pattern, - Sequence, - Tuple, - cast, -) +from typing import TYPE_CHECKING, List, Optional, Pattern, Sequence, Tuple, cast from synapse.appservice import ( ApplicationService, @@ -353,21 +343,15 @@ class ApplicationServiceTransactionWorkerStore( def _get_oldest_unsent_txn( txn: LoggingTransaction, - ) -> Optional[Dict[str, Any]]: + ) -> Optional[Tuple[int, str]]: # Monotonically increasing txn ids, so just select the smallest # one in the txns table (we delete them when they are sent) txn.execute( - "SELECT * FROM application_services_txns WHERE as_id=?" + "SELECT txn_id, event_ids FROM application_services_txns WHERE as_id=?" " ORDER BY txn_id ASC LIMIT 1", (service.id,), ) - rows = self.db_pool.cursor_to_dict(txn) - if not rows: - return None - - entry = rows[0] - - return entry + return cast(Optional[Tuple[int, str]], txn.fetchone()) entry = await self.db_pool.runInteraction( "get_oldest_unsent_appservice_txn", _get_oldest_unsent_txn @@ -376,8 +360,9 @@ class ApplicationServiceTransactionWorkerStore( if not entry: return None - event_ids = db_to_json(entry["event_ids"]) + txn_id, event_ids_str = entry + event_ids = db_to_json(event_ids_str) events = await self.get_events_as_list(event_ids) # TODO: to-device messages, one-time key counts, device list summaries and unused @@ -385,7 +370,7 @@ class ApplicationServiceTransactionWorkerStore( # We likely want to populate those for reliability. return AppServiceTransaction( service=service, - id=entry["txn_id"], + id=txn_id, events=events, ephemeral=[], to_device_messages=[], diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index df596f35f9..9f3804a504 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1413,13 +1413,13 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): def get_devices_not_accessed_since_txn( txn: LoggingTransaction, - ) -> List[Dict[str, str]]: + ) -> List[Tuple[str, str]]: sql = """ SELECT user_id, device_id FROM devices WHERE last_seen < ? AND hidden = FALSE """ txn.execute(sql, (since_ms,)) - return self.db_pool.cursor_to_dict(txn) + return cast(List[Tuple[str, str]], txn.fetchall()) rows = await self.db_pool.runInteraction( "get_devices_not_accessed_since", @@ -1427,11 +1427,11 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ) devices: Dict[str, List[str]] = {} - for row in rows: + for user_id, device_id in rows: # Remote devices are never stale from our point of view. - if self.hs.is_mine_id(row["user_id"]): - user_devices = devices.setdefault(row["user_id"], []) - user_devices.append(row["device_id"]) + if self.hs.is_mine_id(user_id): + user_devices = devices.setdefault(user_id, []) + user_devices.append(device_id) return devices diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 89fac23f93..749ae54e20 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -921,14 +921,10 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker } txn.execute(sql, params) - rows = self.db_pool.cursor_to_dict(txn) - for row in rows: - user_id = row["user_id"] - key_type = row["keytype"] - key = db_to_json(row["keydata"]) + for user_id, key_type, key_data, _ in txn: user_keys = result.setdefault(user_id, {}) - user_keys[key_type] = key + user_keys[key_type] = db_to_json(key_data) return result @@ -988,13 +984,9 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker query_params.extend(item) txn.execute(sql, query_params) - rows = self.db_pool.cursor_to_dict(txn) # and add the signatures to the appropriate keys - for row in rows: - key_id: str = row["key_id"] - target_user_id: str = row["target_user_id"] - target_device_id: str = row["target_device_id"] + for target_user_id, target_device_id, key_id, signature in txn: key_type = devices[(target_user_id, target_device_id)] # We need to copy everything, because the result may have come # from the cache. dict.copy only does a shallow copy, so we @@ -1012,13 +1004,11 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker ].copy() if from_user_id in signatures: user_sigs = signatures[from_user_id] = signatures[from_user_id] - user_sigs[key_id] = row["signature"] + user_sigs[key_id] = signature else: - signatures[from_user_id] = {key_id: row["signature"]} + signatures[from_user_id] = {key_id: signature} else: - target_user_key["signatures"] = { - from_user_id: {key_id: row["signature"]} - } + target_user_key["signatures"] = {from_user_id: {key_id: signature}} return keys diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 790d058c43..d4dcdb898c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1654,8 +1654,6 @@ class PersistEventsStore: ) -> None: to_prefill = [] - rows = [] - ev_map = {e.event_id: e for e, _ in events_and_contexts} if not ev_map: return @@ -1676,10 +1674,9 @@ class PersistEventsStore: ) txn.execute(sql + clause, args) - rows = self.db_pool.cursor_to_dict(txn) - for row in rows: - event = ev_map[row["event_id"]] - if not row["rejects"] and not row["redacts"]: + for event_id, redacts, rejects in txn: + event = ev_map[event_id] + if not rejects and not redacts: to_prefill.append(EventCacheEntry(event=event, redacted_event=None)) async def external_prefill() -> None: diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py index 194b4e031f..805c23f89f 100644 --- a/synapse/storage/databases/main/presence.py +++ b/synapse/storage/databases/main/presence.py @@ -434,13 +434,21 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore) txn = db_conn.cursor() txn.execute(sql, (PresenceState.OFFLINE,)) - rows = self.db_pool.cursor_to_dict(txn) + rows = txn.fetchall() txn.close() - for row in rows: - row["currently_active"] = bool(row["currently_active"]) - - return [UserPresenceState(**row) for row in rows] + return [ + UserPresenceState( + user_id=user_id, + state=state, + last_active_ts=last_active_ts, + last_federation_update_ts=last_federation_update_ts, + last_user_sync_ts=last_user_sync_ts, + status_msg=status_msg, + currently_active=bool(currently_active), + ) + for user_id, state, last_active_ts, last_federation_update_ts, last_user_sync_ts, status_msg, currently_active in rows + ] def take_presence_startup_info(self) -> List[UserPresenceState]: active_on_startup = self._presence_on_startup diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index 87e28e22d3..c7eb7fc478 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -47,6 +47,27 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +# The type of a row in the pushers table. +PusherRow = Tuple[ + int, # id + str, # user_name + Optional[int], # access_token + str, # profile_tag + str, # kind + str, # app_id + str, # app_display_name + str, # device_display_name + str, # pushkey + int, # ts + str, # lang + str, # data + int, # last_stream_ordering + int, # last_success + int, # failing_since + bool, # enabled + str, # device_id +] + class PusherWorkerStore(SQLBaseStore): def __init__( @@ -83,30 +104,66 @@ class PusherWorkerStore(SQLBaseStore): self._remove_deleted_email_pushers, ) - def _decode_pushers_rows(self, rows: Iterable[dict]) -> Iterator[PusherConfig]: + def _decode_pushers_rows( + self, + rows: Iterable[PusherRow], + ) -> Iterator[PusherConfig]: """JSON-decode the data in the rows returned from the `pushers` table Drops any rows whose data cannot be decoded """ - for r in rows: - data_json = r["data"] + for ( + id, + user_name, + access_token, + profile_tag, + kind, + app_id, + app_display_name, + device_display_name, + pushkey, + ts, + lang, + data, + last_stream_ordering, + last_success, + failing_since, + enabled, + device_id, + ) in rows: try: - r["data"] = db_to_json(data_json) + data_json = db_to_json(data) except Exception as e: logger.warning( "Invalid JSON in data for pusher %d: %s, %s", - r["id"], - data_json, + id, + data, e.args[0], ) continue - # If we're using SQLite, then boolean values are integers. This is - # troublesome since some code using the return value of this method might - # expect it to be a boolean, or will expose it to clients (in responses). - r["enabled"] = bool(r["enabled"]) - - yield PusherConfig(**r) + yield PusherConfig( + id=id, + user_name=user_name, + profile_tag=profile_tag, + kind=kind, + app_id=app_id, + app_display_name=app_display_name, + device_display_name=device_display_name, + pushkey=pushkey, + ts=ts, + lang=lang, + data=data_json, + last_stream_ordering=last_stream_ordering, + last_success=last_success, + failing_since=failing_since, + # If we're using SQLite, then boolean values are integers. This is + # troublesome since some code using the return value of this method might + # expect it to be a boolean, or will expose it to clients (in responses). + enabled=bool(enabled), + device_id=device_id, + access_token=access_token, + ) def get_pushers_stream_token(self) -> int: return self._pushers_id_gen.get_current_token() @@ -136,7 +193,7 @@ class PusherWorkerStore(SQLBaseStore): The pushers for which the given columns have the given values. """ - def get_pushers_by_txn(txn: LoggingTransaction) -> List[Dict[str, Any]]: + def get_pushers_by_txn(txn: LoggingTransaction) -> List[PusherRow]: # We could technically use simple_select_list here, but we need to call # COALESCE on the 'enabled' column. While it is technically possible to give # simple_select_list the whole `COALESCE(...) AS ...` as a column name, it @@ -154,7 +211,7 @@ class PusherWorkerStore(SQLBaseStore): txn.execute(sql, list(keyvalues.values())) - return self.db_pool.cursor_to_dict(txn) + return cast(List[PusherRow], txn.fetchall()) ret = await self.db_pool.runInteraction( desc="get_pushers_by", @@ -164,14 +221,22 @@ class PusherWorkerStore(SQLBaseStore): return self._decode_pushers_rows(ret) async def get_enabled_pushers(self) -> Iterator[PusherConfig]: - def get_enabled_pushers_txn(txn: LoggingTransaction) -> Iterator[PusherConfig]: - txn.execute("SELECT * FROM pushers WHERE COALESCE(enabled, TRUE)") - rows = self.db_pool.cursor_to_dict(txn) - - return self._decode_pushers_rows(rows) + def get_enabled_pushers_txn(txn: LoggingTransaction) -> List[PusherRow]: + txn.execute( + """ + SELECT id, user_name, access_token, profile_tag, kind, app_id, + app_display_name, device_display_name, pushkey, ts, lang, data, + last_stream_ordering, last_success, failing_since, + enabled, device_id + FROM pushers WHERE COALESCE(enabled, TRUE) + """ + ) + return cast(List[PusherRow], txn.fetchall()) - return await self.db_pool.runInteraction( - "get_enabled_pushers", get_enabled_pushers_txn + return self._decode_pushers_rows( + await self.db_pool.runInteraction( + "get_enabled_pushers", get_enabled_pushers_txn + ) ) async def get_all_updated_pushers_rows( @@ -304,7 +369,7 @@ class PusherWorkerStore(SQLBaseStore): ) async def get_throttle_params_by_room( - self, pusher_id: str + self, pusher_id: int ) -> Dict[str, ThrottleParams]: res = await self.db_pool.simple_select_list( "pusher_throttle", @@ -323,7 +388,7 @@ class PusherWorkerStore(SQLBaseStore): return params_by_room async def set_throttle_params( - self, pusher_id: str, room_id: str, params: ThrottleParams + self, pusher_id: int, room_id: str, params: ThrottleParams ) -> None: await self.db_pool.simple_upsert( "pusher_throttle", @@ -534,7 +599,7 @@ class PusherBackgroundUpdatesStore(SQLBaseStore): (last_pusher_id, batch_size), ) - rows = self.db_pool.cursor_to_dict(txn) + rows = txn.fetchall() if len(rows) == 0: return 0 @@ -550,19 +615,19 @@ class PusherBackgroundUpdatesStore(SQLBaseStore): txn=txn, table="pushers", key_names=("id",), - key_values=[(row["pusher_id"],) for row in rows], + key_values=[row[0] for row in rows], value_names=("device_id", "access_token"), # If there was already a device_id on the pusher, we only want to clear # the access_token column, so we keep the existing device_id. Otherwise, # we set the device_id we got from joining the access_tokens table. value_values=[ - (row["pusher_device_id"] or row["token_device_id"], None) - for row in rows + (pusher_device_id or token_device_id, None) + for _, pusher_device_id, token_device_id in rows ], ) self.db_pool.updates._background_update_progress_txn( - txn, "set_device_id_for_pushers", {"pusher_id": rows[-1]["pusher_id"]} + txn, "set_device_id_for_pushers", {"pusher_id": rows[-1][0]} ) return len(rows) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 3bab1024ea..b2645ab43c 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -313,25 +313,25 @@ class ReceiptsWorkerStore(SQLBaseStore): ) -> Sequence[JsonMapping]: """See get_linearized_receipts_for_room""" - def f(txn: LoggingTransaction) -> List[Dict[str, Any]]: + def f(txn: LoggingTransaction) -> List[Tuple[str, str, str, str]]: if from_key: sql = ( - "SELECT * FROM receipts_linearized WHERE" + "SELECT receipt_type, user_id, event_id, data" + " FROM receipts_linearized WHERE" " room_id = ? AND stream_id > ? AND stream_id <= ?" ) txn.execute(sql, (room_id, from_key, to_key)) else: sql = ( - "SELECT * FROM receipts_linearized WHERE" + "SELECT receipt_type, user_id, event_id, data" + " FROM receipts_linearized WHERE" " room_id = ? AND stream_id <= ?" ) txn.execute(sql, (room_id, to_key)) - rows = self.db_pool.cursor_to_dict(txn) - - return rows + return cast(List[Tuple[str, str, str, str]], txn.fetchall()) rows = await self.db_pool.runInteraction("get_linearized_receipts_for_room", f) @@ -339,10 +339,10 @@ class ReceiptsWorkerStore(SQLBaseStore): return [] content: JsonDict = {} - for row in rows: - content.setdefault(row["event_id"], {}).setdefault(row["receipt_type"], {})[ - row["user_id"] - ] = db_to_json(row["data"]) + for receipt_type, user_id, event_id, data in rows: + content.setdefault(event_id, {}).setdefault(receipt_type, {})[ + user_id + ] = db_to_json(data) return [{"type": EduTypes.RECEIPT, "room_id": room_id, "content": content}] @@ -357,10 +357,13 @@ class ReceiptsWorkerStore(SQLBaseStore): if not room_ids: return {} - def f(txn: LoggingTransaction) -> List[Dict[str, Any]]: + def f( + txn: LoggingTransaction, + ) -> List[Tuple[str, str, str, str, Optional[str], str]]: if from_key: sql = """ - SELECT * FROM receipts_linearized WHERE + SELECT room_id, receipt_type, user_id, event_id, thread_id, data + FROM receipts_linearized WHERE stream_id > ? AND stream_id <= ? AND """ clause, args = make_in_list_sql_clause( @@ -370,7 +373,8 @@ class ReceiptsWorkerStore(SQLBaseStore): txn.execute(sql + clause, [from_key, to_key] + list(args)) else: sql = """ - SELECT * FROM receipts_linearized WHERE + SELECT room_id, receipt_type, user_id, event_id, thread_id, data + FROM receipts_linearized WHERE stream_id <= ? AND """ @@ -380,29 +384,31 @@ class ReceiptsWorkerStore(SQLBaseStore): txn.execute(sql + clause, [to_key] + list(args)) - return self.db_pool.cursor_to_dict(txn) + return cast( + List[Tuple[str, str, str, str, Optional[str], str]], txn.fetchall() + ) txn_results = await self.db_pool.runInteraction( "_get_linearized_receipts_for_rooms", f ) results: JsonDict = {} - for row in txn_results: + for room_id, receipt_type, user_id, event_id, thread_id, data in txn_results: # We want a single event per room, since we want to batch the # receipts by room, event and type. room_event = results.setdefault( - row["room_id"], - {"type": EduTypes.RECEIPT, "room_id": row["room_id"], "content": {}}, + room_id, + {"type": EduTypes.RECEIPT, "room_id": room_id, "content": {}}, ) # The content is of the form: # {"$foo:bar": { "read": { "@user:host": }, .. }, .. } - event_entry = room_event["content"].setdefault(row["event_id"], {}) - receipt_type = event_entry.setdefault(row["receipt_type"], {}) + event_entry = room_event["content"].setdefault(event_id, {}) + receipt_type_dict = event_entry.setdefault(receipt_type, {}) - receipt_type[row["user_id"]] = db_to_json(row["data"]) - if row["thread_id"]: - receipt_type[row["user_id"]]["thread_id"] = row["thread_id"] + receipt_type_dict[user_id] = db_to_json(data) + if thread_id: + receipt_type_dict[user_id]["thread_id"] = thread_id results = { room_id: [results[room_id]] if room_id in results else [] @@ -428,10 +434,11 @@ class ReceiptsWorkerStore(SQLBaseStore): A dictionary of roomids to a list of receipts. """ - def f(txn: LoggingTransaction) -> List[Dict[str, Any]]: + def f(txn: LoggingTransaction) -> List[Tuple[str, str, str, str, str]]: if from_key: sql = """ - SELECT * FROM receipts_linearized WHERE + SELECT room_id, receipt_type, user_id, event_id, data + FROM receipts_linearized WHERE stream_id > ? AND stream_id <= ? ORDER BY stream_id DESC LIMIT 100 @@ -439,7 +446,8 @@ class ReceiptsWorkerStore(SQLBaseStore): txn.execute(sql, [from_key, to_key]) else: sql = """ - SELECT * FROM receipts_linearized WHERE + SELECT room_id, receipt_type, user_id, event_id, data + FROM receipts_linearized WHERE stream_id <= ? ORDER BY stream_id DESC LIMIT 100 @@ -447,27 +455,27 @@ class ReceiptsWorkerStore(SQLBaseStore): txn.execute(sql, [to_key]) - return self.db_pool.cursor_to_dict(txn) + return cast(List[Tuple[str, str, str, str, str]], txn.fetchall()) txn_results = await self.db_pool.runInteraction( "get_linearized_receipts_for_all_rooms", f ) results: JsonDict = {} - for row in txn_results: + for room_id, receipt_type, user_id, event_id, data in txn_results: # We want a single event per room, since we want to batch the # receipts by room, event and type. room_event = results.setdefault( - row["room_id"], - {"type": EduTypes.RECEIPT, "room_id": row["room_id"], "content": {}}, + room_id, + {"type": EduTypes.RECEIPT, "room_id": room_id, "content": {}}, ) # The content is of the form: # {"$foo:bar": { "read": { "@user:host": }, .. }, .. } - event_entry = room_event["content"].setdefault(row["event_id"], {}) - receipt_type = event_entry.setdefault(row["receipt_type"], {}) + event_entry = room_event["content"].setdefault(event_id, {}) + receipt_type_dict = event_entry.setdefault(receipt_type, {}) - receipt_type[row["user_id"]] = db_to_json(row["data"]) + receipt_type_dict[user_id] = db_to_json(data) return results diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index cc964604e2..64a2c31a5d 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -195,7 +195,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): async def get_user_by_id(self, user_id: str) -> Optional[UserInfo]: """Returns info about the user account, if it exists.""" - def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]: + def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[UserInfo]: # We could technically use simple_select_one here, but it would not perform # the COALESCEs (unless hacked into the column names), which could yield # confusing results. @@ -213,35 +213,46 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): (user_id,), ) - rows = self.db_pool.cursor_to_dict(txn) - - if len(rows) == 0: + row = txn.fetchone() + if not row: return None - return rows[0] + ( + name, + is_guest, + admin, + consent_version, + consent_ts, + consent_server_notice_sent, + appservice_id, + creation_ts, + user_type, + deactivated, + shadow_banned, + approved, + locked, + ) = row + + return UserInfo( + appservice_id=appservice_id, + consent_server_notice_sent=consent_server_notice_sent, + consent_version=consent_version, + consent_ts=consent_ts, + creation_ts=creation_ts, + is_admin=bool(admin), + is_deactivated=bool(deactivated), + is_guest=bool(is_guest), + is_shadow_banned=bool(shadow_banned), + user_id=UserID.from_string(name), + user_type=user_type, + approved=bool(approved), + locked=bool(locked), + ) - row = await self.db_pool.runInteraction( + return await self.db_pool.runInteraction( desc="get_user_by_id", func=get_user_by_id_txn, ) - if row is None: - return None - - return UserInfo( - appservice_id=row["appservice_id"], - consent_server_notice_sent=row["consent_server_notice_sent"], - consent_version=row["consent_version"], - consent_ts=row["consent_ts"], - creation_ts=row["creation_ts"], - is_admin=bool(row["admin"]), - is_deactivated=bool(row["deactivated"]), - is_guest=bool(row["is_guest"]), - is_shadow_banned=bool(row["shadow_banned"]), - user_id=UserID.from_string(row["name"]), - user_type=row["user_type"], - approved=bool(row["approved"]), - locked=bool(row["locked"]), - ) async def is_trial_user(self, user_id: str) -> bool: """Checks if user is in the "trial" period, i.e. within the first @@ -579,16 +590,31 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): """ txn.execute(sql, (token,)) - rows = self.db_pool.cursor_to_dict(txn) - - if rows: - row = rows[0] - - # This field is nullable, ensure it comes out as a boolean - if row["token_used"] is None: - row["token_used"] = False + row = txn.fetchone() - return TokenLookupResult(**row) + if row: + ( + user_id, + is_guest, + shadow_banned, + token_id, + device_id, + valid_until_ms, + token_owner, + token_used, + ) = row + + return TokenLookupResult( + user_id=user_id, + is_guest=is_guest, + shadow_banned=shadow_banned, + token_id=token_id, + device_id=device_id, + valid_until_ms=valid_until_ms, + token_owner=token_owner, + # This field is nullable, ensure it comes out as a boolean + token_used=bool(token_used), + ) return None @@ -833,11 +859,10 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): """Counts all users registered on the homeserver.""" def _count_users(txn: LoggingTransaction) -> int: - txn.execute("SELECT COUNT(*) AS users FROM users") - rows = self.db_pool.cursor_to_dict(txn) - if rows: - return rows[0]["users"] - return 0 + txn.execute("SELECT COUNT(*) FROM users") + row = txn.fetchone() + assert row is not None + return row[0] return await self.db_pool.runInteraction("count_users", _count_users) @@ -891,11 +916,10 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): """Counts all users without a special user_type registered on the homeserver.""" def _count_users(txn: LoggingTransaction) -> int: - txn.execute("SELECT COUNT(*) AS users FROM users where user_type is null") - rows = self.db_pool.cursor_to_dict(txn) - if rows: - return rows[0]["users"] - return 0 + txn.execute("SELECT COUNT(*) FROM users where user_type is null") + row = txn.fetchone() + assert row is not None + return row[0] return await self.db_pool.runInteraction("count_real_users", _count_users) @@ -1252,12 +1276,8 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): ) txn.execute(sql, []) - res = self.db_pool.cursor_to_dict(txn) - if res: - for user in res: - self.set_expiration_date_for_user_txn( - txn, user["name"], use_delta=True - ) + for (name,) in txn.fetchall(): + self.set_expiration_date_for_user_txn(txn, name, use_delta=True) await self.db_pool.runInteraction( "get_users_with_no_expiration_date", @@ -1963,11 +1983,12 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): (user_id,), ) - rows = self.db_pool.cursor_to_dict(txn) + row = txn.fetchone() + assert row is not None # We cast to bool because the value returned by the database engine might # be an integer if we're using SQLite. - return bool(rows[0]["approved"]) + return bool(row[0]) return await self.db_pool.runInteraction( desc="is_user_pending_approval", @@ -2045,22 +2066,22 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): (last_user, batch_size), ) - rows = self.db_pool.cursor_to_dict(txn) + rows = txn.fetchall() if not rows: return True, 0 rows_processed_nb = 0 - for user in rows: - if not user["count_tokens"] and not user["count_threepids"]: - self.set_user_deactivated_status_txn(txn, user["name"], True) + for name, count_tokens, count_threepids in rows: + if not count_tokens and not count_threepids: + self.set_user_deactivated_status_txn(txn, name, True) rows_processed_nb += 1 logger.info("Marked %d rows as deactivated", rows_processed_nb) self.db_pool.updates._background_update_progress_txn( - txn, "users_set_deactivated_flag", {"user_id": rows[-1]["name"]} + txn, "users_set_deactivated_flag", {"user_id": rows[-1][0]} ) if batch_size > len(rows): diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 719e11aea6..1d4d99932b 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -831,7 +831,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): def get_retention_policy_for_room_txn( txn: LoggingTransaction, - ) -> List[Dict[str, Optional[int]]]: + ) -> Optional[Tuple[Optional[int], Optional[int]]]: txn.execute( """ SELECT min_lifetime, max_lifetime FROM room_retention @@ -841,7 +841,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): (room_id,), ) - return self.db_pool.cursor_to_dict(txn) + return cast(Optional[Tuple[Optional[int], Optional[int]]], txn.fetchone()) ret = await self.db_pool.runInteraction( "get_retention_policy_for_room", @@ -856,8 +856,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): max_lifetime=self.config.retention.retention_default_max_lifetime, ) - min_lifetime = ret[0]["min_lifetime"] - max_lifetime = ret[0]["max_lifetime"] + min_lifetime, max_lifetime = ret # If one of the room's policy's attributes isn't defined, use the matching # attribute from the default policy. @@ -1162,14 +1161,13 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): txn.execute(sql, args) - rows = self.db_pool.cursor_to_dict(txn) - rooms_dict = {} - - for row in rows: - rooms_dict[row["room_id"]] = RetentionPolicy( - min_lifetime=row["min_lifetime"], - max_lifetime=row["max_lifetime"], + rooms_dict = { + room_id: RetentionPolicy( + min_lifetime=min_lifetime, + max_lifetime=max_lifetime, ) + for room_id, min_lifetime, max_lifetime in txn + } if include_null: # If required, do a second query that retrieves all of the rooms we know @@ -1178,13 +1176,11 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): txn.execute(sql) - rows = self.db_pool.cursor_to_dict(txn) - # If a room isn't already in the dict (i.e. it doesn't have a retention # policy in its state), add it with a null policy. - for row in rows: - if row["room_id"] not in rooms_dict: - rooms_dict[row["room_id"]] = RetentionPolicy() + for (room_id,) in txn: + if room_id not in rooms_dict: + rooms_dict[room_id] = RetentionPolicy() return rooms_dict @@ -1703,24 +1699,24 @@ class RoomBackgroundUpdateStore(SQLBaseStore): (last_room, batch_size), ) - rows = self.db_pool.cursor_to_dict(txn) + rows = txn.fetchall() if not rows: return True - for row in rows: - if not row["json"]: + for room_id, event_id, json in rows: + if not json: retention_policy = {} else: - ev = db_to_json(row["json"]) + ev = db_to_json(json) retention_policy = ev["content"] self.db_pool.simple_insert_txn( txn=txn, table="room_retention", values={ - "room_id": row["room_id"], - "event_id": row["event_id"], + "room_id": room_id, + "event_id": event_id, "min_lifetime": retention_policy.get("min_lifetime"), "max_lifetime": retention_policy.get("max_lifetime"), }, @@ -1729,7 +1725,7 @@ class RoomBackgroundUpdateStore(SQLBaseStore): logger.info("Inserted %d rows into room_retention", len(rows)) self.db_pool.updates._background_update_progress_txn( - txn, "insert_room_retention", {"room_id": rows[-1]["room_id"]} + txn, "insert_room_retention", {"room_id": rows[-1][0]} ) if batch_size > len(rows): diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index e93573f315..bbe08368db 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1349,18 +1349,16 @@ class RoomMemberBackgroundUpdateStore(SQLBaseStore): txn.execute(sql, (target_min_stream_id, max_stream_id, batch_size)) - rows = self.db_pool.cursor_to_dict(txn) + rows = txn.fetchall() if not rows: return 0 - min_stream_id = rows[-1]["stream_ordering"] + min_stream_id = rows[-1][0] to_update = [] - for row in rows: - event_id = row["event_id"] - room_id = row["room_id"] + for _, event_id, room_id, json in rows: try: - event_json = db_to_json(row["json"]) + event_json = db_to_json(json) content = event_json["content"] except Exception: continue diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index a7aae661d8..1d69c4a5f0 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -179,22 +179,24 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): # store_search_entries_txn with a generator function, but that # would mean having two cursors open on the database at once. # Instead we just build a list of results. - rows = self.db_pool.cursor_to_dict(txn) + rows = txn.fetchall() if not rows: return 0 - min_stream_id = rows[-1]["stream_ordering"] + min_stream_id = rows[-1][0] event_search_rows = [] - for row in rows: + for ( + stream_ordering, + event_id, + room_id, + etype, + json, + origin_server_ts, + ) in rows: try: - event_id = row["event_id"] - room_id = row["room_id"] - etype = row["type"] - stream_ordering = row["stream_ordering"] - origin_server_ts = row["origin_server_ts"] try: - event_json = db_to_json(row["json"]) + event_json = db_to_json(json) content = event_json["content"] except Exception: continue diff --git a/synapse/storage/databases/main/task_scheduler.py b/synapse/storage/databases/main/task_scheduler.py index 5c5372a825..5555b53575 100644 --- a/synapse/storage/databases/main/task_scheduler.py +++ b/synapse/storage/databases/main/task_scheduler.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, List, Optional, Tuple, cast from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import ( @@ -27,6 +27,8 @@ from synapse.util import json_encoder if TYPE_CHECKING: from synapse.server import HomeServer +ScheduledTaskRow = Tuple[str, str, str, int, str, str, str, str] + class TaskSchedulerWorkerStore(SQLBaseStore): def __init__( @@ -38,13 +40,18 @@ class TaskSchedulerWorkerStore(SQLBaseStore): super().__init__(database, db_conn, hs) @staticmethod - def _convert_row_to_task(row: Dict[str, Any]) -> ScheduledTask: - row["status"] = TaskStatus(row["status"]) - if row["params"] is not None: - row["params"] = db_to_json(row["params"]) - if row["result"] is not None: - row["result"] = db_to_json(row["result"]) - return ScheduledTask(**row) + def _convert_row_to_task(row: ScheduledTaskRow) -> ScheduledTask: + task_id, action, status, timestamp, resource_id, params, result, error = row + return ScheduledTask( + id=task_id, + action=action, + status=TaskStatus(status), + timestamp=timestamp, + resource_id=resource_id, + params=db_to_json(params) if params is not None else None, + result=db_to_json(result) if result is not None else None, + error=error, + ) async def get_scheduled_tasks( self, @@ -68,7 +75,7 @@ class TaskSchedulerWorkerStore(SQLBaseStore): Returns: a list of `ScheduledTask`, ordered by increasing timestamps """ - def get_scheduled_tasks_txn(txn: LoggingTransaction) -> List[Dict[str, Any]]: + def get_scheduled_tasks_txn(txn: LoggingTransaction) -> List[ScheduledTaskRow]: clauses: List[str] = [] args: List[Any] = [] if resource_id: @@ -101,7 +108,7 @@ class TaskSchedulerWorkerStore(SQLBaseStore): args.append(limit) txn.execute(sql, args) - return self.db_pool.cursor_to_dict(txn) + return cast(List[ScheduledTaskRow], txn.fetchall()) rows = await self.db_pool.runInteraction( "get_scheduled_tasks", get_scheduled_tasks_txn @@ -193,7 +200,22 @@ class TaskSchedulerWorkerStore(SQLBaseStore): desc="get_scheduled_task", ) - return TaskSchedulerWorkerStore._convert_row_to_task(row) if row else None + return ( + TaskSchedulerWorkerStore._convert_row_to_task( + ( + row["id"], + row["action"], + row["status"], + row["timestamp"], + row["resource_id"], + row["params"], + row["result"], + row["error"], + ) + ) + if row + else None + ) async def delete_scheduled_task(self, id: str) -> None: """Delete a specific task from its id. -- cgit 1.5.1 From 3555790b27a923f29283dbb01fed6844086edcd1 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 5 Oct 2023 17:42:44 -0400 Subject: Remove unused method. (#16435) --- changelog.d/16435.misc | 1 + synapse/storage/databases/main/__init__.py | 20 -------------------- 2 files changed, 1 insertion(+), 20 deletions(-) create mode 100644 changelog.d/16435.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16435.misc b/changelog.d/16435.misc new file mode 100644 index 0000000000..e541607161 --- /dev/null +++ b/changelog.d/16435.misc @@ -0,0 +1 @@ +Remove unused method. diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 101403578c..dfcbf0a175 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -142,26 +142,6 @@ class DataStore( super().__init__(database, db_conn, hs) - async def get_users(self) -> List[JsonDict]: - """Function to retrieve a list of users in users table. - - Returns: - A list of dictionaries representing users. - """ - return await self.db_pool.simple_select_list( - table="users", - keyvalues={}, - retcols=[ - "name", - "password_hash", - "is_guest", - "admin", - "user_type", - "deactivated", - ], - desc="get_users", - ) - async def get_users_paginate( self, start: int, -- cgit 1.5.1 From 7615e2bf48d7bed3da7235d60f84a3c847ac78f5 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 6 Oct 2023 10:12:43 -0400 Subject: Return ThumbnailInfo in more places (#16438) Improves type hints by using concrete types instead of dictionaries. --- changelog.d/16438.misc | 1 + synapse/media/_base.py | 2 +- synapse/media/media_repository.py | 3 + synapse/rest/media/thumbnail_resource.py | 98 ++++++++++------------ synapse/storage/databases/main/media_repository.py | 30 +++++-- tests/media/test_media_storage.py | 36 ++++---- 6 files changed, 90 insertions(+), 80 deletions(-) create mode 100644 changelog.d/16438.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16438.misc b/changelog.d/16438.misc new file mode 100644 index 0000000000..bd7cdd42af --- /dev/null +++ b/changelog.d/16438.misc @@ -0,0 +1 @@ +Reduce memory allocations. diff --git a/synapse/media/_base.py b/synapse/media/_base.py index d103b43449..13345acf75 100644 --- a/synapse/media/_base.py +++ b/synapse/media/_base.py @@ -332,7 +332,7 @@ class ThumbnailInfo: # Content type of thumbnail, e.g. image/png type: str # The size of the media file, in bytes. - length: Optional[int] = None + length: int @attr.s(slots=True, frozen=True, auto_attribs=True) diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index d11c2ff4ee..7fd46901f7 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -624,6 +624,7 @@ class MediaRepository: height=t_height, method=t_method, type=t_type, + length=t_byte_source.tell(), ), ) @@ -694,6 +695,7 @@ class MediaRepository: height=t_height, method=t_method, type=t_type, + length=t_byte_source.tell(), ), ) @@ -839,6 +841,7 @@ class MediaRepository: height=t_height, method=t_method, type=t_type, + length=t_byte_source.tell(), ), ) diff --git a/synapse/rest/media/thumbnail_resource.py b/synapse/rest/media/thumbnail_resource.py index f9cd773f77..85b6bdbe72 100644 --- a/synapse/rest/media/thumbnail_resource.py +++ b/synapse/rest/media/thumbnail_resource.py @@ -15,7 +15,7 @@ import logging import re -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, List, Optional, Tuple from synapse.api.errors import Codes, SynapseError, cs_error from synapse.config.repository import THUMBNAIL_SUPPORTED_MEDIA_FORMAT_MAP @@ -159,30 +159,24 @@ class ThumbnailResource(RestServlet): thumbnail_infos = await self.store.get_local_media_thumbnails(media_id) for info in thumbnail_infos: - t_w = info["thumbnail_width"] == desired_width - t_h = info["thumbnail_height"] == desired_height - t_method = info["thumbnail_method"] == desired_method - t_type = info["thumbnail_type"] == desired_type + t_w = info.width == desired_width + t_h = info.height == desired_height + t_method = info.method == desired_method + t_type = info.type == desired_type if t_w and t_h and t_method and t_type: file_info = FileInfo( server_name=None, file_id=media_id, url_cache=media_info["url_cache"], - thumbnail=ThumbnailInfo( - width=info["thumbnail_width"], - height=info["thumbnail_height"], - type=info["thumbnail_type"], - method=info["thumbnail_method"], - ), + thumbnail=info, ) - t_type = file_info.thumbnail_type - t_length = info["thumbnail_length"] - responder = await self.media_storage.fetch_media(file_info) if responder: - await respond_with_responder(request, responder, t_type, t_length) + await respond_with_responder( + request, responder, info.type, info.length + ) return logger.debug("We don't have a thumbnail of that size. Generating") @@ -222,29 +216,23 @@ class ThumbnailResource(RestServlet): file_id = media_info["filesystem_id"] for info in thumbnail_infos: - t_w = info["thumbnail_width"] == desired_width - t_h = info["thumbnail_height"] == desired_height - t_method = info["thumbnail_method"] == desired_method - t_type = info["thumbnail_type"] == desired_type + t_w = info.width == desired_width + t_h = info.height == desired_height + t_method = info.method == desired_method + t_type = info.type == desired_type if t_w and t_h and t_method and t_type: file_info = FileInfo( server_name=server_name, file_id=media_info["filesystem_id"], - thumbnail=ThumbnailInfo( - width=info["thumbnail_width"], - height=info["thumbnail_height"], - type=info["thumbnail_type"], - method=info["thumbnail_method"], - ), + thumbnail=info, ) - t_type = file_info.thumbnail_type - t_length = info["thumbnail_length"] - responder = await self.media_storage.fetch_media(file_info) if responder: - await respond_with_responder(request, responder, t_type, t_length) + await respond_with_responder( + request, responder, info.type, info.length + ) return logger.debug("We don't have a thumbnail of that size. Generating") @@ -304,7 +292,7 @@ class ThumbnailResource(RestServlet): desired_height: int, desired_method: str, desired_type: str, - thumbnail_infos: List[Dict[str, Any]], + thumbnail_infos: List[ThumbnailInfo], media_id: str, file_id: str, url_cache: bool, @@ -319,7 +307,7 @@ class ThumbnailResource(RestServlet): desired_height: The desired height, the returned thumbnail may be larger than this. desired_method: The desired method used to generate the thumbnail. desired_type: The desired content-type of the thumbnail. - thumbnail_infos: A list of dictionaries of candidate thumbnails. + thumbnail_infos: A list of thumbnail info of candidate thumbnails. file_id: The ID of the media that a thumbnail is being requested for. url_cache: True if this is from a URL cache. server_name: The server name, if this is a remote thumbnail. @@ -443,7 +431,7 @@ class ThumbnailResource(RestServlet): desired_height: int, desired_method: str, desired_type: str, - thumbnail_infos: List[Dict[str, Any]], + thumbnail_infos: List[ThumbnailInfo], file_id: str, url_cache: bool, server_name: Optional[str], @@ -456,7 +444,7 @@ class ThumbnailResource(RestServlet): desired_height: The desired height, the returned thumbnail may be larger than this. desired_method: The desired method used to generate the thumbnail. desired_type: The desired content-type of the thumbnail. - thumbnail_infos: A list of dictionaries of candidate thumbnails. + thumbnail_infos: A list of thumbnail infos of candidate thumbnails. file_id: The ID of the media that a thumbnail is being requested for. url_cache: True if this is from a URL cache. server_name: The server name, if this is a remote thumbnail. @@ -474,21 +462,25 @@ class ThumbnailResource(RestServlet): if desired_method == "crop": # Thumbnails that match equal or larger sizes of desired width/height. - crop_info_list: List[Tuple[int, int, int, bool, int, Dict[str, Any]]] = [] + crop_info_list: List[ + Tuple[int, int, int, bool, Optional[int], ThumbnailInfo] + ] = [] # Other thumbnails. - crop_info_list2: List[Tuple[int, int, int, bool, int, Dict[str, Any]]] = [] + crop_info_list2: List[ + Tuple[int, int, int, bool, Optional[int], ThumbnailInfo] + ] = [] for info in thumbnail_infos: # Skip thumbnails generated with different methods. - if info["thumbnail_method"] != "crop": + if info.method != "crop": continue - t_w = info["thumbnail_width"] - t_h = info["thumbnail_height"] + t_w = info.width + t_h = info.height aspect_quality = abs(d_w * t_h - d_h * t_w) min_quality = 0 if d_w <= t_w and d_h <= t_h else 1 size_quality = abs((d_w - t_w) * (d_h - t_h)) - type_quality = desired_type != info["thumbnail_type"] - length_quality = info["thumbnail_length"] + type_quality = desired_type != info.type + length_quality = info.length if t_w >= d_w or t_h >= d_h: crop_info_list.append( ( @@ -513,7 +505,7 @@ class ThumbnailResource(RestServlet): ) # Pick the most appropriate thumbnail. Some values of `desired_width` and # `desired_height` may result in a tie, in which case we avoid comparing on - # the thumbnail info dictionary and pick the thumbnail that appears earlier + # the thumbnail info and pick the thumbnail that appears earlier # in the list of candidates. if crop_info_list: thumbnail_info = min(crop_info_list, key=lambda t: t[:-1])[-1] @@ -521,20 +513,20 @@ class ThumbnailResource(RestServlet): thumbnail_info = min(crop_info_list2, key=lambda t: t[:-1])[-1] elif desired_method == "scale": # Thumbnails that match equal or larger sizes of desired width/height. - info_list: List[Tuple[int, bool, int, Dict[str, Any]]] = [] + info_list: List[Tuple[int, bool, int, ThumbnailInfo]] = [] # Other thumbnails. - info_list2: List[Tuple[int, bool, int, Dict[str, Any]]] = [] + info_list2: List[Tuple[int, bool, int, ThumbnailInfo]] = [] for info in thumbnail_infos: # Skip thumbnails generated with different methods. - if info["thumbnail_method"] != "scale": + if info.method != "scale": continue - t_w = info["thumbnail_width"] - t_h = info["thumbnail_height"] + t_w = info.width + t_h = info.height size_quality = abs((d_w - t_w) * (d_h - t_h)) - type_quality = desired_type != info["thumbnail_type"] - length_quality = info["thumbnail_length"] + type_quality = desired_type != info.type + length_quality = info.length if t_w >= d_w or t_h >= d_h: info_list.append((size_quality, type_quality, length_quality, info)) else: @@ -543,7 +535,7 @@ class ThumbnailResource(RestServlet): ) # Pick the most appropriate thumbnail. Some values of `desired_width` and # `desired_height` may result in a tie, in which case we avoid comparing on - # the thumbnail info dictionary and pick the thumbnail that appears earlier + # the thumbnail info and pick the thumbnail that appears earlier # in the list of candidates. if info_list: thumbnail_info = min(info_list, key=lambda t: t[:-1])[-1] @@ -555,13 +547,7 @@ class ThumbnailResource(RestServlet): file_id=file_id, url_cache=url_cache, server_name=server_name, - thumbnail=ThumbnailInfo( - width=thumbnail_info["thumbnail_width"], - height=thumbnail_info["thumbnail_height"], - type=thumbnail_info["thumbnail_type"], - method=thumbnail_info["thumbnail_method"], - length=thumbnail_info["thumbnail_length"], - ), + thumbnail=thumbnail_info, ) # No matching thumbnail was found. diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index 8cebeb5189..2e6b176bd2 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -28,6 +28,7 @@ from typing import ( from synapse.api.constants import Direction from synapse.logging.opentracing import trace +from synapse.media._base import ThumbnailInfo from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, @@ -435,8 +436,8 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): desc="store_url_cache", ) - async def get_local_media_thumbnails(self, media_id: str) -> List[Dict[str, Any]]: - return await self.db_pool.simple_select_list( + async def get_local_media_thumbnails(self, media_id: str) -> List[ThumbnailInfo]: + rows = await self.db_pool.simple_select_list( "local_media_repository_thumbnails", {"media_id": media_id}, ( @@ -448,6 +449,16 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): ), desc="get_local_media_thumbnails", ) + return [ + ThumbnailInfo( + width=row["thumbnail_width"], + height=row["thumbnail_height"], + method=row["thumbnail_method"], + type=row["thumbnail_type"], + length=row["thumbnail_length"], + ) + for row in rows + ] @trace async def store_local_thumbnail( @@ -556,8 +567,8 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): async def get_remote_media_thumbnails( self, origin: str, media_id: str - ) -> List[Dict[str, Any]]: - return await self.db_pool.simple_select_list( + ) -> List[ThumbnailInfo]: + rows = await self.db_pool.simple_select_list( "remote_media_cache_thumbnails", {"media_origin": origin, "media_id": media_id}, ( @@ -566,10 +577,19 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): "thumbnail_method", "thumbnail_type", "thumbnail_length", - "filesystem_id", ), desc="get_remote_media_thumbnails", ) + return [ + ThumbnailInfo( + width=row["thumbnail_width"], + height=row["thumbnail_height"], + method=row["thumbnail_method"], + type=row["thumbnail_type"], + length=row["thumbnail_length"], + ) + for row in rows + ] @trace async def get_remote_media_thumbnail( diff --git a/tests/media/test_media_storage.py b/tests/media/test_media_storage.py index ba00e35a9e..15f5d644e4 100644 --- a/tests/media/test_media_storage.py +++ b/tests/media/test_media_storage.py @@ -34,7 +34,7 @@ from synapse.api.errors import Codes from synapse.events import EventBase from synapse.http.types import QueryParams from synapse.logging.context import make_deferred_yieldable -from synapse.media._base import FileInfo +from synapse.media._base import FileInfo, ThumbnailInfo from synapse.media.filepath import MediaFilePaths from synapse.media.media_storage import MediaStorage, ReadableFileWrapper from synapse.media.storage_provider import FileStorageProviderBackend @@ -605,6 +605,8 @@ class MediaRepoTests(unittest.HomeserverTestCase): """Test that choosing between thumbnails with the same quality rating succeeds. We are not particular about which thumbnail is chosen.""" + + content_type = self.test_image.content_type.decode() media_repo = self.hs.get_media_repository() thumbnail_resouce = ThumbnailResource( self.hs, media_repo, media_repo.media_storage @@ -615,26 +617,24 @@ class MediaRepoTests(unittest.HomeserverTestCase): desired_width=desired_size, desired_height=desired_size, desired_method=method, - desired_type=self.test_image.content_type, # type: ignore[arg-type] + desired_type=content_type, # Provide two identical thumbnails which are guaranteed to have the same # quality rating. thumbnail_infos=[ - { - "thumbnail_width": 32, - "thumbnail_height": 32, - "thumbnail_method": method, - "thumbnail_type": self.test_image.content_type, - "thumbnail_length": 256, - "filesystem_id": f"thumbnail1{self.test_image.extension.decode()}", - }, - { - "thumbnail_width": 32, - "thumbnail_height": 32, - "thumbnail_method": method, - "thumbnail_type": self.test_image.content_type, - "thumbnail_length": 256, - "filesystem_id": f"thumbnail2{self.test_image.extension.decode()}", - }, + ThumbnailInfo( + width=32, + height=32, + method=method, + type=content_type, + length=256, + ), + ThumbnailInfo( + width=32, + height=32, + method=method, + type=content_type, + length=256, + ), ], file_id=f"image{self.test_image.extension.decode()}", url_cache=False, -- cgit 1.5.1 From 06bbf1029cf2558213646d3b692621bed5178066 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 6 Oct 2023 11:41:57 -0400 Subject: Convert simple_select_list_paginate_txn to return tuples. (#16433) --- changelog.d/16433.misc | 1 + synapse/api/presence.py | 4 -- synapse/federation/send_queue.py | 2 +- synapse/rest/admin/federation.py | 8 +++- synapse/storage/database.py | 6 +-- synapse/storage/databases/main/presence.py | 58 +++++++++++++++++--------- synapse/storage/databases/main/transactions.py | 27 +++++++----- 7 files changed, 67 insertions(+), 39 deletions(-) create mode 100644 changelog.d/16433.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16433.misc b/changelog.d/16433.misc new file mode 100644 index 0000000000..bd7cdd42af --- /dev/null +++ b/changelog.d/16433.misc @@ -0,0 +1 @@ +Reduce memory allocations. diff --git a/synapse/api/presence.py b/synapse/api/presence.py index b78f419994..afef6712e1 100644 --- a/synapse/api/presence.py +++ b/synapse/api/presence.py @@ -80,10 +80,6 @@ class UserPresenceState: def as_dict(self) -> JsonDict: return attr.asdict(self) - @staticmethod - def from_dict(d: JsonDict) -> "UserPresenceState": - return UserPresenceState(**d) - def copy_and_replace(self, **kwargs: Any) -> "UserPresenceState": return attr.evolve(self, **kwargs) diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 6520795635..525968bcba 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -395,7 +395,7 @@ class PresenceDestinationsRow(BaseFederationRow): @staticmethod def from_data(data: JsonDict) -> "PresenceDestinationsRow": return PresenceDestinationsRow( - state=UserPresenceState.from_dict(data["state"]), destinations=data["dests"] + state=UserPresenceState(**data["state"]), destinations=data["dests"] ) def to_data(self) -> JsonDict: diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py index e0ee55bd0e..8a617af599 100644 --- a/synapse/rest/admin/federation.py +++ b/synapse/rest/admin/federation.py @@ -198,7 +198,13 @@ class DestinationMembershipRestServlet(RestServlet): rooms, total = await self._store.get_destination_rooms_paginate( destination, start, limit, direction ) - response = {"rooms": rooms, "total": total} + response = { + "rooms": [ + {"room_id": room_id, "stream_ordering": stream_ordering} + for room_id, stream_ordering in rooms + ], + "total": total, + } if (start + limit) < total: response["next_token"] = str(start + len(rooms)) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index ca894edd5a..7d8af5c610 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -2418,7 +2418,7 @@ class DatabasePool: keyvalues: Optional[Dict[str, Any]] = None, exclude_keyvalues: Optional[Dict[str, Any]] = None, order_direction: str = "ASC", - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[Any, ...]]: """ Executes a SELECT query on the named table with start and limit, of row numbers, which may return zero or number of rows from start to limit, @@ -2447,7 +2447,7 @@ class DatabasePool: order_direction: Whether the results should be ordered "ASC" or "DESC". Returns: - The result as a list of dictionaries. + The result as a list of tuples. """ if order_direction not in ["ASC", "DESC"]: raise ValueError("order_direction must be one of 'ASC' or 'DESC'.") @@ -2474,7 +2474,7 @@ class DatabasePool: ) txn.execute(sql, arg_list + [limit, start]) - return cls.cursor_to_dict(txn) + return txn.fetchall() async def simple_search_list( self, diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py index 805c23f89f..519f05fb60 100644 --- a/synapse/storage/databases/main/presence.py +++ b/synapse/storage/databases/main/presence.py @@ -20,6 +20,7 @@ from typing import ( Mapping, Optional, Tuple, + Union, cast, ) @@ -385,28 +386,47 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore) limit = 100 offset = 0 while True: - rows = await self.db_pool.runInteraction( - "get_presence_for_all_users", - self.db_pool.simple_select_list_paginate_txn, - "presence_stream", - orderby="stream_id", - start=offset, - limit=limit, - exclude_keyvalues=exclude_keyvalues, - retcols=( - "user_id", - "state", - "last_active_ts", - "last_federation_update_ts", - "last_user_sync_ts", - "status_msg", - "currently_active", + rows = cast( + List[Tuple[str, str, int, int, int, Optional[str], Union[int, bool]]], + await self.db_pool.runInteraction( + "get_presence_for_all_users", + self.db_pool.simple_select_list_paginate_txn, + "presence_stream", + orderby="stream_id", + start=offset, + limit=limit, + exclude_keyvalues=exclude_keyvalues, + retcols=( + "user_id", + "state", + "last_active_ts", + "last_federation_update_ts", + "last_user_sync_ts", + "status_msg", + "currently_active", + ), + order_direction="ASC", ), - order_direction="ASC", ) - for row in rows: - users_to_state[row["user_id"]] = UserPresenceState(**row) + for ( + user_id, + state, + last_active_ts, + last_federation_update_ts, + last_user_sync_ts, + status_msg, + currently_active, + ) in rows: + users_to_state[user_id] = UserPresenceState( + user_id=user_id, + state=state, + last_active_ts=last_active_ts, + last_federation_update_ts=last_federation_update_ts, + last_user_sync_ts=last_user_sync_ts, + status_msg=status_msg, + currently_active=bool(currently_active), + ) # We've run out of updates to query if len(rows) < limit: diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index 8f70eff809..f35757280d 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -526,7 +526,7 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): start: int, limit: int, direction: Direction = Direction.FORWARDS, - ) -> Tuple[List[JsonDict], int]: + ) -> Tuple[List[Tuple[str, int]], int]: """Function to retrieve a paginated list of destination's rooms. This will return a json list of rooms and the total number of rooms. @@ -537,12 +537,14 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): limit: number of rows to retrieve direction: sort ascending or descending by room_id Returns: - A tuple of a dict of rooms and a count of total rooms. + A tuple of a list of room tuples and a count of total rooms. + + Each room tuple is room_id, stream_ordering. """ def get_destination_rooms_paginate_txn( txn: LoggingTransaction, - ) -> Tuple[List[JsonDict], int]: + ) -> Tuple[List[Tuple[str, int]], int]: if direction == Direction.BACKWARDS: order = "DESC" else: @@ -556,14 +558,17 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): txn.execute(sql, [destination]) count = cast(Tuple[int], txn.fetchone())[0] - rooms = self.db_pool.simple_select_list_paginate_txn( - txn=txn, - table="destination_rooms", - orderby="room_id", - start=start, - limit=limit, - retcols=("room_id", "stream_ordering"), - order_direction=order, + rooms = cast( + List[Tuple[str, int]], + self.db_pool.simple_select_list_paginate_txn( + txn=txn, + table="destination_rooms", + orderby="room_id", + start=start, + limit=limit, + retcols=("room_id", "stream_ordering"), + order_direction=order, + ), ) return rooms, count -- cgit 1.5.1 From 8902b3031d1437b6a8779cc4831e3f17d732a6a6 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 9 Oct 2023 14:41:17 +0000 Subject: Disable statement timeout whilst purging rooms (#16455) * Disable statement timeout whilst purging rooms * Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) * Note the introduction version --------- Signed-off-by: Olivier Wilkinson (reivilibre) --- changelog.d/16455.bugfix | 1 + synapse/storage/databases/main/purge_events.py | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 changelog.d/16455.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/16455.bugfix b/changelog.d/16455.bugfix new file mode 100644 index 0000000000..653a25d3b6 --- /dev/null +++ b/changelog.d/16455.bugfix @@ -0,0 +1 @@ +Prevent the purging of large rooms from timing out when Postgres is in use. The timeout which causes this issue was introduced in Synapse 1.88.0. diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index dea0e0458c..1e11bf2706 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -89,6 +89,11 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): # furthermore, we might already have the table from a previous (failed) # purge attempt, so let's drop the table first. + if isinstance(self.database_engine, PostgresEngine): + # Disable statement timeouts for this transaction; purging rooms can + # take a while! + txn.execute("SET LOCAL statement_timeout = 0") + txn.execute("DROP TABLE IF EXISTS events_to_purge") txn.execute( -- cgit 1.5.1 From 28fd28e92e9743ae98833fdb2a233aee64568a06 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 10 Oct 2023 10:33:39 +0100 Subject: Add DB indices to speed up purging rooms (#16457) --- changelog.d/16457.bugfix | 1 + synapse/storage/databases/main/account_data.py | 7 +++++++ synapse/storage/databases/main/e2e_room_keys.py | 7 +++++++ .../delta/82/04_add_indices_for_purging_rooms.sql | 20 ++++++++++++++++++++ 4 files changed, 35 insertions(+) create mode 100644 changelog.d/16457.bugfix create mode 100644 synapse/storage/schema/main/delta/82/04_add_indices_for_purging_rooms.sql (limited to 'synapse/storage/databases') diff --git a/changelog.d/16457.bugfix b/changelog.d/16457.bugfix new file mode 100644 index 0000000000..b9a95cc510 --- /dev/null +++ b/changelog.d/16457.bugfix @@ -0,0 +1 @@ +Improve the performance of purging rooms, particularly encrypted rooms. diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 16c284807a..39498d52c6 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -103,6 +103,13 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) "AccountDataAndTagsChangeCache", account_max ) + self.db_pool.updates.register_background_index_update( + update_name="room_account_data_index_room_id", + index_name="room_account_data_room_id", + table="room_account_data", + columns=("room_id",), + ) + self.db_pool.updates.register_background_update_handler( "delete_account_data_for_deactivated_users", self._delete_account_data_for_deactivated_users, diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py index bc7c6a6346..aac4cfb054 100644 --- a/synapse/storage/databases/main/e2e_room_keys.py +++ b/synapse/storage/databases/main/e2e_room_keys.py @@ -53,6 +53,13 @@ class EndToEndRoomKeyBackgroundStore(SQLBaseStore): ): super().__init__(database, db_conn, hs) + self.db_pool.updates.register_background_index_update( + update_name="e2e_room_keys_index_room_id", + index_name="e2e_room_keys_room_id", + table="e2e_room_keys", + columns=("room_id",), + ) + self.db_pool.updates.register_background_update_handler( "delete_e2e_backup_keys_for_deactivated_users", self._delete_e2e_backup_keys_for_deactivated_users, diff --git a/synapse/storage/schema/main/delta/82/04_add_indices_for_purging_rooms.sql b/synapse/storage/schema/main/delta/82/04_add_indices_for_purging_rooms.sql new file mode 100644 index 0000000000..fc948166e6 --- /dev/null +++ b/synapse/storage/schema/main/delta/82/04_add_indices_for_purging_rooms.sql @@ -0,0 +1,20 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (8204, 'e2e_room_keys_index_room_id', '{}'); + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (8204, 'room_account_data_index_room_id', '{}'); -- cgit 1.5.1 From f1e43018b7d526f3e969796bf882c1848b663449 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 10 Oct 2023 12:16:36 -0400 Subject: Inline simple_search_list/simple_search_list_txn. (#16434) This only has a single use and is over abstracted. Inline it so that we can improve type hints. --- changelog.d/16434.misc | 1 + synapse/rest/admin/users.py | 13 ++++++- synapse/storage/database.py | 62 ------------------------------ synapse/storage/databases/main/__init__.py | 46 +++++++++++++++++----- 4 files changed, 49 insertions(+), 73 deletions(-) create mode 100644 changelog.d/16434.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16434.misc b/changelog.d/16434.misc new file mode 100644 index 0000000000..bd7cdd42af --- /dev/null +++ b/changelog.d/16434.misc @@ -0,0 +1 @@ +Reduce memory allocations. diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 5b743a1d03..cd995e8dbb 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -842,7 +842,18 @@ class SearchUsersRestServlet(RestServlet): logger.info("term: %s ", term) ret = await self.store.search_users(term) - return HTTPStatus.OK, ret + results = [ + { + "name": name, + "password_hash": password_hash, + "is_guest": bool(is_guest), + "admin": bool(admin), + "user_type": user_type, + } + for name, password_hash, is_guest, admin, user_type in ret + ] + + return HTTPStatus.OK, results class UserAdminServlet(RestServlet): diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 7d8af5c610..7714ec2bf9 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -2476,68 +2476,6 @@ class DatabasePool: return txn.fetchall() - async def simple_search_list( - self, - table: str, - term: Optional[str], - col: str, - retcols: Collection[str], - desc: str = "simple_search_list", - ) -> Optional[List[Dict[str, Any]]]: - """Executes a SELECT query on the named table, which may return zero or - more rows, returning the result as a list of dicts. - - Args: - table: the table name - term: term for searching the table matched to a column. - col: column to query term should be matched to - retcols: the names of the columns to return - - Returns: - A list of dictionaries or None. - """ - - return await self.runInteraction( - desc, - self.simple_search_list_txn, - table, - term, - col, - retcols, - db_autocommit=True, - ) - - @classmethod - def simple_search_list_txn( - cls, - txn: LoggingTransaction, - table: str, - term: Optional[str], - col: str, - retcols: Iterable[str], - ) -> Optional[List[Dict[str, Any]]]: - """Executes a SELECT query on the named table, which may return zero or - more rows, returning the result as a list of dicts. - - Args: - txn: Transaction object - table: the table name - term: term for searching the table matched to a column. - col: column to query term should be matched to - retcols: the names of the columns to return - - Returns: - None if no term is given, otherwise a list of dictionaries. - """ - if term: - sql = "SELECT %s FROM %s WHERE %s LIKE ?" % (", ".join(retcols), table, col) - termvalues = ["%%" + term + "%%"] - txn.execute(sql, termvalues) - else: - return None - - return cls.cursor_to_dict(txn) - def make_in_list_sql_clause( database_engine: BaseDatabaseEngine, column: str, iterable: Collection[Any] diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index dfcbf0a175..840d725114 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, List, Optional, Tuple, Union, cast from synapse.api.constants import Direction from synapse.config.homeserver import HomeServerConfig @@ -296,7 +296,11 @@ class DataStore( "get_users_paginate_txn", get_users_paginate_txn ) - async def search_users(self, term: str) -> Optional[List[JsonDict]]: + async def search_users( + self, term: str + ) -> List[ + Tuple[str, Optional[str], Union[int, bool], Union[int, bool], Optional[str]] + ]: """Function to search users list for one or more users with the matched term. @@ -304,15 +308,37 @@ class DataStore( term: search term Returns: - A list of dictionaries or None. + A list of tuples of name, password_hash, is_guest, admin, user_type or None. """ - return await self.db_pool.simple_search_list( - table="users", - term=term, - col="name", - retcols=["name", "password_hash", "is_guest", "admin", "user_type"], - desc="search_users", - ) + + def search_users( + txn: LoggingTransaction, + ) -> List[ + Tuple[str, Optional[str], Union[int, bool], Union[int, bool], Optional[str]] + ]: + search_term = "%%" + term + "%%" + + sql = """ + SELECT name, password_hash, is_guest, admin, user_type + FROM users + WHERE name LIKE ? + """ + txn.execute(sql, (search_term,)) + + return cast( + List[ + Tuple[ + str, + Optional[str], + Union[int, bool], + Union[int, bool], + Optional[str], + ] + ], + txn.fetchall(), + ) + + return await self.db_pool.runInteraction("search_users", search_users) def check_database_before_upgrade( -- cgit 1.5.1 From a4904dcb04b31ce8ed0deaa2c5c80657780f6618 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 11 Oct 2023 13:24:56 -0400 Subject: Convert simple_select_many_batch, simple_select_many_txn to tuples. (#16444) --- changelog.d/16444.misc | 1 + synapse/storage/database.py | 18 ++-- synapse/storage/databases/main/deviceinbox.py | 42 ++++---- synapse/storage/databases/main/devices.py | 49 ++++++---- synapse/storage/databases/main/end_to_end_keys.py | 19 ++-- synapse/storage/databases/main/event_federation.py | 107 +++++++++++---------- synapse/storage/databases/main/events.py | 79 ++++++++------- .../storage/databases/main/events_bg_updates.py | 62 ++++++------ synapse/storage/databases/main/events_worker.py | 36 ++++--- synapse/storage/databases/main/keys.py | 46 +++++---- synapse/storage/databases/main/presence.py | 51 ++++++---- synapse/storage/databases/main/push_rule.py | 97 +++++++++++++------ synapse/storage/databases/main/relations.py | 19 ++-- synapse/storage/databases/main/room.py | 19 ++-- synapse/storage/databases/main/roommember.py | 78 ++++++++------- synapse/storage/databases/main/state.py | 62 +++++++----- synapse/storage/databases/main/stats.py | 37 +++---- synapse/storage/databases/main/transactions.py | 28 ++++-- synapse/storage/databases/main/ui_auth.py | 41 ++++---- synapse/storage/databases/main/user_directory.py | 54 ++++++----- .../storage/databases/main/user_erasure_store.py | 19 ++-- synapse/storage/databases/state/store.py | 54 +++++++---- tests/storage/test_event_chain.py | 64 +++++++----- 23 files changed, 640 insertions(+), 442 deletions(-) create mode 100644 changelog.d/16444.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16444.misc b/changelog.d/16444.misc new file mode 100644 index 0000000000..bd7cdd42af --- /dev/null +++ b/changelog.d/16444.misc @@ -0,0 +1 @@ +Reduce memory allocations. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 7714ec2bf9..81f661160c 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1874,9 +1874,9 @@ class DatabasePool: keyvalues: Optional[Dict[str, Any]] = None, desc: str = "simple_select_many_batch", batch_size: int = 100, - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[Any, ...]]: """Executes a SELECT query on the named table, which may return zero or - more rows, returning the result as a list of dicts. + more rows. Filters rows by whether the value of `column` is in `iterable`. @@ -1888,10 +1888,13 @@ class DatabasePool: keyvalues: dict of column names and values to select the rows with desc: description of the transaction, for logging and metrics batch_size: the number of rows for each select query + + Returns: + The results as a list of tuples. """ keyvalues = keyvalues or {} - results: List[Dict[str, Any]] = [] + results: List[Tuple[Any, ...]] = [] for chunk in batch_iter(iterable, batch_size): rows = await self.runInteraction( @@ -1918,9 +1921,9 @@ class DatabasePool: iterable: Collection[Any], keyvalues: Dict[str, Any], retcols: Iterable[str], - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[Any, ...]]: """Executes a SELECT query on the named table, which may return zero or - more rows, returning the result as a list of dicts. + more rows. Filters rows by whether the value of `column` is in `iterable`. @@ -1931,6 +1934,9 @@ class DatabasePool: iterable: list keyvalues: dict of column names and values to select the rows with retcols: list of strings giving the names of the columns to return + + Returns: + The results as a list of tuples. """ if not iterable: return [] @@ -1949,7 +1955,7 @@ class DatabasePool: ) txn.execute(sql, values) - return cls.cursor_to_dict(txn) + return txn.fetchall() async def simple_update( self, diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 744e98c6d0..1cf649d371 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -344,18 +344,19 @@ class DeviceInboxWorkerStore(SQLBaseStore): # Note that this is more efficient than just dropping `device_id` from the query, # since device_inbox has an index on `(user_id, device_id, stream_id)` if not device_ids_to_query: - user_device_dicts = self.db_pool.simple_select_many_txn( - txn, - table="devices", - column="user_id", - iterable=user_ids_to_query, - keyvalues={"hidden": False}, - retcols=("device_id",), + user_device_dicts = cast( + List[Tuple[str]], + self.db_pool.simple_select_many_txn( + txn, + table="devices", + column="user_id", + iterable=user_ids_to_query, + keyvalues={"hidden": False}, + retcols=("device_id",), + ), ) - device_ids_to_query.update( - {row["device_id"] for row in user_device_dicts} - ) + device_ids_to_query.update({row[0] for row in user_device_dicts}) if not device_ids_to_query: # We've ended up with no devices to query. @@ -845,20 +846,21 @@ class DeviceInboxWorkerStore(SQLBaseStore): # We exclude hidden devices (such as cross-signing keys) here as they are # not expected to receive to-device messages. - rows = self.db_pool.simple_select_many_txn( - txn, - table="devices", - keyvalues={"user_id": user_id, "hidden": False}, - column="device_id", - iterable=devices, - retcols=("device_id",), + rows = cast( + List[Tuple[str]], + self.db_pool.simple_select_many_txn( + txn, + table="devices", + keyvalues={"user_id": user_id, "hidden": False}, + column="device_id", + iterable=devices, + retcols=("device_id",), + ), ) - for row in rows: + for (device_id,) in rows: # Only insert into the local inbox if the device exists on # this server - device_id = row["device_id"] - with start_active_span("serialise_to_device_message"): msg = messages_by_device[device_id] set_tag(SynapseTags.TO_DEVICE_TYPE, msg["type"]) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 9f3804a504..fc23d18eba 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1052,16 +1052,19 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): async def get_device_list_last_stream_id_for_remotes( self, user_ids: Iterable[str] ) -> Mapping[str, Optional[str]]: - rows = await self.db_pool.simple_select_many_batch( - table="device_lists_remote_extremeties", - column="user_id", - iterable=user_ids, - retcols=("user_id", "stream_id"), - desc="get_device_list_last_stream_id_for_remotes", + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_many_batch( + table="device_lists_remote_extremeties", + column="user_id", + iterable=user_ids, + retcols=("user_id", "stream_id"), + desc="get_device_list_last_stream_id_for_remotes", + ), ) results: Dict[str, Optional[str]] = {user_id: None for user_id in user_ids} - results.update({row["user_id"]: row["stream_id"] for row in rows}) + results.update(rows) return results @@ -1077,22 +1080,30 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): The IDs of users whose device lists need resync. """ if user_ids: - rows = await self.db_pool.simple_select_many_batch( - table="device_lists_remote_resync", - column="user_id", - iterable=user_ids, - retcols=("user_id",), - desc="get_user_ids_requiring_device_list_resync_with_iterable", + row_tuples = cast( + List[Tuple[str]], + await self.db_pool.simple_select_many_batch( + table="device_lists_remote_resync", + column="user_id", + iterable=user_ids, + retcols=("user_id",), + desc="get_user_ids_requiring_device_list_resync_with_iterable", + ), ) + + return {row[0] for row in row_tuples} else: - rows = await self.db_pool.simple_select_list( - table="device_lists_remote_resync", - keyvalues=None, - retcols=("user_id",), - desc="get_user_ids_requiring_device_list_resync", + rows = cast( + List[Dict[str, str]], + await self.db_pool.simple_select_list( + table="device_lists_remote_resync", + keyvalues=None, + retcols=("user_id",), + desc="get_user_ids_requiring_device_list_resync", + ), ) - return {row["user_id"] for row in rows} + return {row["user_id"] for row in rows} async def mark_remote_users_device_caches_as_stale( self, user_ids: StrCollection diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 749ae54e20..f13d776b0d 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -493,15 +493,18 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker A map from (algorithm, key_id) to json string for key """ - rows = await self.db_pool.simple_select_many_batch( - table="e2e_one_time_keys_json", - column="key_id", - iterable=key_ids, - retcols=("algorithm", "key_id", "key_json"), - keyvalues={"user_id": user_id, "device_id": device_id}, - desc="add_e2e_one_time_keys_check", + rows = cast( + List[Tuple[str, str, str]], + await self.db_pool.simple_select_many_batch( + table="e2e_one_time_keys_json", + column="key_id", + iterable=key_ids, + retcols=("algorithm", "key_id", "key_json"), + keyvalues={"user_id": user_id, "device_id": device_id}, + desc="add_e2e_one_time_keys_check", + ), ) - result = {(row["algorithm"], row["key_id"]): row["key_json"] for row in rows} + result = {(algorithm, key_id): key_json for algorithm, key_id, key_json in rows} log_kv({"message": "Fetched one time keys for user", "one_time_keys": result}) return result diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index afffa54985..4f80ce75cc 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1049,15 +1049,18 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas Args: event_ids: The event IDs to calculate the max depth of. """ - rows = await self.db_pool.simple_select_many_batch( - table="events", - column="event_id", - iterable=event_ids, - retcols=( - "event_id", - "depth", + rows = cast( + List[Tuple[str, int]], + await self.db_pool.simple_select_many_batch( + table="events", + column="event_id", + iterable=event_ids, + retcols=( + "event_id", + "depth", + ), + desc="get_max_depth_of", ), - desc="get_max_depth_of", ) if not rows: @@ -1065,10 +1068,10 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas else: max_depth_event_id = "" current_max_depth = 0 - for row in rows: - if row["depth"] > current_max_depth: - max_depth_event_id = row["event_id"] - current_max_depth = row["depth"] + for event_id, depth in rows: + if depth > current_max_depth: + max_depth_event_id = event_id + current_max_depth = depth return max_depth_event_id, current_max_depth @@ -1078,15 +1081,18 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas Args: event_ids: The event IDs to calculate the max depth of. """ - rows = await self.db_pool.simple_select_many_batch( - table="events", - column="event_id", - iterable=event_ids, - retcols=( - "event_id", - "depth", + rows = cast( + List[Tuple[str, int]], + await self.db_pool.simple_select_many_batch( + table="events", + column="event_id", + iterable=event_ids, + retcols=( + "event_id", + "depth", + ), + desc="get_min_depth_of", ), - desc="get_min_depth_of", ) if not rows: @@ -1094,10 +1100,10 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas else: min_depth_event_id = "" current_min_depth = MAX_DEPTH - for row in rows: - if row["depth"] < current_min_depth: - min_depth_event_id = row["event_id"] - current_min_depth = row["depth"] + for event_id, depth in rows: + if depth < current_min_depth: + min_depth_event_id = event_id + current_min_depth = depth return min_depth_event_id, current_min_depth @@ -1553,19 +1559,18 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas A filtered down list of `event_ids` that have previous failed pull attempts. """ - rows = await self.db_pool.simple_select_many_batch( - table="event_failed_pull_attempts", - column="event_id", - iterable=event_ids, - keyvalues={}, - retcols=("event_id",), - desc="get_event_ids_with_failed_pull_attempts", + rows = cast( + List[Tuple[str]], + await self.db_pool.simple_select_many_batch( + table="event_failed_pull_attempts", + column="event_id", + iterable=event_ids, + keyvalues={}, + retcols=("event_id",), + desc="get_event_ids_with_failed_pull_attempts", + ), ) - event_ids_with_failed_pull_attempts: Set[str] = { - row["event_id"] for row in rows - } - - return event_ids_with_failed_pull_attempts + return {row[0] for row in rows} @trace async def get_event_ids_to_not_pull_from_backoff( @@ -1585,32 +1590,34 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas A dictionary of event_ids that should not be attempted to be pulled and the next timestamp at which we may try pulling them again. """ - event_failed_pull_attempts = await self.db_pool.simple_select_many_batch( - table="event_failed_pull_attempts", - column="event_id", - iterable=event_ids, - keyvalues={}, - retcols=( - "event_id", - "last_attempt_ts", - "num_attempts", + event_failed_pull_attempts = cast( + List[Tuple[str, int, int]], + await self.db_pool.simple_select_many_batch( + table="event_failed_pull_attempts", + column="event_id", + iterable=event_ids, + keyvalues={}, + retcols=( + "event_id", + "last_attempt_ts", + "num_attempts", + ), + desc="get_event_ids_to_not_pull_from_backoff", ), - desc="get_event_ids_to_not_pull_from_backoff", ) current_time = self._clock.time_msec() event_ids_with_backoff = {} - for event_failed_pull_attempt in event_failed_pull_attempts: - event_id = event_failed_pull_attempt["event_id"] + for event_id, last_attempt_ts, num_attempts in event_failed_pull_attempts: # Exponential back-off (up to the upper bound) so we don't try to # pull the same event over and over. ex. 2hr, 4hr, 8hr, 16hr, etc. backoff_end_time = ( - event_failed_pull_attempt["last_attempt_ts"] + last_attempt_ts + ( 2 ** min( - event_failed_pull_attempt["num_attempts"], + num_attempts, BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS, ) ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index d4dcdb898c..ef6766b5e0 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -27,6 +27,7 @@ from typing import ( Optional, Set, Tuple, + Union, cast, ) @@ -501,16 +502,19 @@ class PersistEventsStore: # We ignore legacy rooms that we aren't filling the chain cover index # for. - rows = self.db_pool.simple_select_many_txn( - txn, - table="rooms", - column="room_id", - iterable={event.room_id for event in events if event.is_state()}, - keyvalues={}, - retcols=("room_id", "has_auth_chain_index"), + rows = cast( + List[Tuple[str, Optional[Union[int, bool]]]], + self.db_pool.simple_select_many_txn( + txn, + table="rooms", + column="room_id", + iterable={event.room_id for event in events if event.is_state()}, + keyvalues={}, + retcols=("room_id", "has_auth_chain_index"), + ), ) rooms_using_chain_index = { - row["room_id"] for row in rows if row["has_auth_chain_index"] + room_id for room_id, has_auth_chain_index in rows if has_auth_chain_index } state_events = { @@ -571,19 +575,18 @@ class PersistEventsStore: # We check if there are any events that need to be handled in the rooms # we're looking at. These should just be out of band memberships, where # we didn't have the auth chain when we first persisted. - rows = db_pool.simple_select_many_txn( - txn, - table="event_auth_chain_to_calculate", - keyvalues={}, - column="room_id", - iterable=set(event_to_room_id.values()), - retcols=("event_id", "type", "state_key"), + auth_chain_to_calc_rows = cast( + List[Tuple[str, str, str]], + db_pool.simple_select_many_txn( + txn, + table="event_auth_chain_to_calculate", + keyvalues={}, + column="room_id", + iterable=set(event_to_room_id.values()), + retcols=("event_id", "type", "state_key"), + ), ) - for row in rows: - event_id = row["event_id"] - event_type = row["type"] - state_key = row["state_key"] - + for event_id, event_type, state_key in auth_chain_to_calc_rows: # (We could pull out the auth events for all rows at once using # simple_select_many, but this case happens rarely and almost always # with a single row.) @@ -753,23 +756,31 @@ class PersistEventsStore: # Step 1, fetch all existing links from all the chains we've seen # referenced. chain_links = _LinkMap() - rows = db_pool.simple_select_many_txn( - txn, - table="event_auth_chain_links", - column="origin_chain_id", - iterable={chain_id for chain_id, _ in chain_map.values()}, - keyvalues={}, - retcols=( - "origin_chain_id", - "origin_sequence_number", - "target_chain_id", - "target_sequence_number", + auth_chain_rows = cast( + List[Tuple[int, int, int, int]], + db_pool.simple_select_many_txn( + txn, + table="event_auth_chain_links", + column="origin_chain_id", + iterable={chain_id for chain_id, _ in chain_map.values()}, + keyvalues={}, + retcols=( + "origin_chain_id", + "origin_sequence_number", + "target_chain_id", + "target_sequence_number", + ), ), ) - for row in rows: + for ( + origin_chain_id, + origin_sequence_number, + target_chain_id, + target_sequence_number, + ) in auth_chain_rows: chain_links.add_link( - (row["origin_chain_id"], row["origin_sequence_number"]), - (row["target_chain_id"], row["target_sequence_number"]), + (origin_chain_id, origin_sequence_number), + (target_chain_id, target_sequence_number), new=False, ) diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index daef3685b0..c5fce1c82b 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -369,18 +369,20 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): chunks = [event_ids[i : i + 100] for i in range(0, len(event_ids), 100)] for chunk in chunks: - ev_rows = self.db_pool.simple_select_many_txn( - txn, - table="event_json", - column="event_id", - iterable=chunk, - retcols=["event_id", "json"], - keyvalues={}, + ev_rows = cast( + List[Tuple[str, str]], + self.db_pool.simple_select_many_txn( + txn, + table="event_json", + column="event_id", + iterable=chunk, + retcols=["event_id", "json"], + keyvalues={}, + ), ) - for row in ev_rows: - event_id = row["event_id"] - event_json = db_to_json(row["json"]) + for event_id, json in ev_rows: + event_json = db_to_json(json) try: origin_server_ts = event_json["origin_server_ts"] except (KeyError, AttributeError): @@ -563,15 +565,18 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): if deleted: # We now need to invalidate the caches of these rooms - rows = self.db_pool.simple_select_many_txn( - txn, - table="events", - column="event_id", - iterable=to_delete, - keyvalues={}, - retcols=("room_id",), + rows = cast( + List[Tuple[str]], + self.db_pool.simple_select_many_txn( + txn, + table="events", + column="event_id", + iterable=to_delete, + keyvalues={}, + retcols=("room_id",), + ), ) - room_ids = {row["room_id"] for row in rows} + room_ids = {row[0] for row in rows} for room_id in room_ids: txn.call_after( self.get_latest_event_ids_in_room.invalidate, (room_id,) # type: ignore[attr-defined] @@ -1038,18 +1043,21 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): count = len(rows) # We also need to fetch the auth events for them. - auth_events = self.db_pool.simple_select_many_txn( - txn, - table="event_auth", - column="event_id", - iterable=event_to_room_id, - keyvalues={}, - retcols=("event_id", "auth_id"), + auth_events = cast( + List[Tuple[str, str]], + self.db_pool.simple_select_many_txn( + txn, + table="event_auth", + column="event_id", + iterable=event_to_room_id, + keyvalues={}, + retcols=("event_id", "auth_id"), + ), ) event_to_auth_chain: Dict[str, List[str]] = {} - for row in auth_events: - event_to_auth_chain.setdefault(row["event_id"], []).append(row["auth_id"]) + for event_id, auth_id in auth_events: + event_to_auth_chain.setdefault(event_id, []).append(auth_id) # Calculate and persist the chain cover index for this set of events. # diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index b788d70fc5..8af638d60f 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1584,16 +1584,19 @@ class EventsWorkerStore(SQLBaseStore): """Given a list of event ids, check if we have already processed and stored them as non outliers. """ - rows = await self.db_pool.simple_select_many_batch( - table="events", - retcols=("event_id",), - column="event_id", - iterable=list(event_ids), - keyvalues={"outlier": False}, - desc="have_events_in_timeline", + rows = cast( + List[Tuple[str]], + await self.db_pool.simple_select_many_batch( + table="events", + retcols=("event_id",), + column="event_id", + iterable=list(event_ids), + keyvalues={"outlier": False}, + desc="have_events_in_timeline", + ), ) - return {r["event_id"] for r in rows} + return {r[0] for r in rows} @trace @tag_args @@ -2336,15 +2339,18 @@ class EventsWorkerStore(SQLBaseStore): a dict mapping from event id to partial-stateness. We return True for any of the events which are unknown (or are outliers). """ - result = await self.db_pool.simple_select_many_batch( - table="partial_state_events", - column="event_id", - iterable=event_ids, - retcols=["event_id"], - desc="get_partial_state_events", + result = cast( + List[Tuple[str]], + await self.db_pool.simple_select_many_batch( + table="partial_state_events", + column="event_id", + iterable=event_ids, + retcols=["event_id"], + desc="get_partial_state_events", + ), ) # convert the result to a dict, to make @cachedList work - partial = {r["event_id"] for r in result} + partial = {r[0] for r in result} return {e_id: e_id in partial for e_id in event_ids} @cached() diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py index 889c578b9c..ea797864b9 100644 --- a/synapse/storage/databases/main/keys.py +++ b/synapse/storage/databases/main/keys.py @@ -16,7 +16,7 @@ import itertools import json import logging -from typing import Dict, Iterable, Mapping, Optional, Tuple +from typing import Dict, Iterable, List, Mapping, Optional, Tuple, Union, cast from canonicaljson import encode_canonical_json from signedjson.key import decode_verify_key_bytes @@ -205,35 +205,39 @@ class KeyStore(CacheInvalidationWorkerStore): If we have multiple entries for a given key ID, returns the most recent. """ - rows = await self.db_pool.simple_select_many_batch( - table="server_keys_json", - column="key_id", - iterable=key_ids, - keyvalues={"server_name": server_name}, - retcols=( - "key_id", - "from_server", - "ts_added_ms", - "ts_valid_until_ms", - "key_json", + rows = cast( + List[Tuple[str, str, int, int, Union[bytes, memoryview]]], + await self.db_pool.simple_select_many_batch( + table="server_keys_json", + column="key_id", + iterable=key_ids, + keyvalues={"server_name": server_name}, + retcols=( + "key_id", + "from_server", + "ts_added_ms", + "ts_valid_until_ms", + "key_json", + ), + desc="get_server_keys_json_for_remote", ), - desc="get_server_keys_json_for_remote", ) if not rows: return {} - # We sort the rows so that the most recently added entry is picked up. - rows.sort(key=lambda r: r["ts_added_ms"]) + # We sort the rows by ts_added_ms so that the most recently added entry + # will stomp over older entries in the dictionary. + rows.sort(key=lambda r: r[2]) return { - row["key_id"]: FetchKeyResultForRemote( + key_id: FetchKeyResultForRemote( # Cast to bytes since postgresql returns a memoryview. - key_json=bytes(row["key_json"]), - valid_until_ts=row["ts_valid_until_ms"], - added_ts=row["ts_added_ms"], + key_json=bytes(key_json), + valid_until_ts=ts_valid_until_ms, + added_ts=ts_added_ms, ) - for row in rows + for key_id, from_server, ts_added_ms, ts_valid_until_ms, key_json in rows } async def get_all_server_keys_json_for_remote( @@ -260,6 +264,8 @@ class KeyStore(CacheInvalidationWorkerStore): if not rows: return {} + # We sort the rows by ts_added_ms so that the most recently added entry + # will stomp over older entries in the dictionary. rows.sort(key=lambda r: r["ts_added_ms"]) return { diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py index 519f05fb60..3b444d2d07 100644 --- a/synapse/storage/databases/main/presence.py +++ b/synapse/storage/databases/main/presence.py @@ -261,27 +261,40 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore) async def get_presence_for_users( self, user_ids: Iterable[str] ) -> Mapping[str, UserPresenceState]: - rows = await self.db_pool.simple_select_many_batch( - table="presence_stream", - column="user_id", - iterable=user_ids, - keyvalues={}, - retcols=( - "user_id", - "state", - "last_active_ts", - "last_federation_update_ts", - "last_user_sync_ts", - "status_msg", - "currently_active", + # TODO All these columns are nullable, but we don't expect that: + # https://github.com/matrix-org/synapse/issues/16467 + rows = cast( + List[Tuple[str, str, int, int, int, Optional[str], Union[int, bool]]], + await self.db_pool.simple_select_many_batch( + table="presence_stream", + column="user_id", + iterable=user_ids, + keyvalues={}, + retcols=( + "user_id", + "state", + "last_active_ts", + "last_federation_update_ts", + "last_user_sync_ts", + "status_msg", + "currently_active", + ), + desc="get_presence_for_users", ), - desc="get_presence_for_users", ) - for row in rows: - row["currently_active"] = bool(row["currently_active"]) - - return {row["user_id"]: UserPresenceState(**row) for row in rows} + return { + user_id: UserPresenceState( + user_id=user_id, + state=state, + last_active_ts=last_active_ts, + last_federation_update_ts=last_federation_update_ts, + last_user_sync_ts=last_user_sync_ts, + status_msg=status_msg, + currently_active=bool(currently_active), + ) + for user_id, state, last_active_ts, last_federation_update_ts, last_user_sync_ts, status_msg, currently_active in rows + } async def should_user_receive_full_presence_with_token( self, @@ -386,6 +399,8 @@ class PresenceStore(PresenceBackgroundUpdateStore, CacheInvalidationWorkerStore) limit = 100 offset = 0 while True: + # TODO All these columns are nullable, but we don't expect that: + # https://github.com/matrix-org/synapse/issues/16467 rows = cast( List[Tuple[str, str, int, int, int, Optional[str], Union[int, bool]]], await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 923166974c..f5356e7f80 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -62,20 +62,34 @@ logger = logging.getLogger(__name__) def _load_rules( - rawrules: List[JsonDict], + rawrules: List[Tuple[str, int, str, str]], enabled_map: Dict[str, bool], experimental_config: ExperimentalConfig, ) -> FilteredPushRules: """Take the DB rows returned from the DB and convert them into a full `FilteredPushRules` object. + + Args: + rawrules: List of tuples of: + * rule ID + * Priority lass + * Conditions (as serialized JSON) + * Actions (as serialized JSON) + enabled_map: A dictionary of rule ID to a boolean of whether the rule is + enabled. This might not include all rule IDs from rawrules. + experimental_config: The `experimental_features` section of the Synapse + config. (Used to check if various features are enabled.) + + Returns: + A new FilteredPushRules object. """ ruleslist = [ PushRule.from_db( - rule_id=rawrule["rule_id"], - priority_class=rawrule["priority_class"], - conditions=rawrule["conditions"], - actions=rawrule["actions"], + rule_id=rawrule[0], + priority_class=rawrule[1], + conditions=rawrule[2], + actions=rawrule[3], ) for rawrule in rawrules ] @@ -183,7 +197,19 @@ class PushRulesWorkerStore( enabled_map = await self.get_push_rules_enabled_for_user(user_id) - return _load_rules(rows, enabled_map, self.hs.config.experimental) + return _load_rules( + [ + ( + row["rule_id"], + row["priority_class"], + row["conditions"], + row["actions"], + ) + for row in rows + ], + enabled_map, + self.hs.config.experimental, + ) async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool]: results = await self.db_pool.simple_select_list( @@ -221,21 +247,36 @@ class PushRulesWorkerStore( if not user_ids: return {} - raw_rules: Dict[str, List[JsonDict]] = {user_id: [] for user_id in user_ids} + raw_rules: Dict[str, List[Tuple[str, int, str, str]]] = { + user_id: [] for user_id in user_ids + } - rows = await self.db_pool.simple_select_many_batch( - table="push_rules", - column="user_name", - iterable=user_ids, - retcols=("*",), - desc="bulk_get_push_rules", - batch_size=1000, + rows = cast( + List[Tuple[str, str, int, int, str, str]], + await self.db_pool.simple_select_many_batch( + table="push_rules", + column="user_name", + iterable=user_ids, + retcols=( + "user_name", + "rule_id", + "priority_class", + "priority", + "conditions", + "actions", + ), + desc="bulk_get_push_rules", + batch_size=1000, + ), ) - rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"]))) + # Sort by highest priority_class, then highest priority. + rows.sort(key=lambda row: (-int(row[2]), -int(row[3]))) - for row in rows: - raw_rules.setdefault(row["user_name"], []).append(row) + for user_name, rule_id, priority_class, _, conditions, actions in rows: + raw_rules.setdefault(user_name, []).append( + (rule_id, priority_class, conditions, actions) + ) enabled_map_by_user = await self.bulk_get_push_rules_enabled(user_ids) @@ -256,17 +297,19 @@ class PushRulesWorkerStore( results: Dict[str, Dict[str, bool]] = {user_id: {} for user_id in user_ids} - rows = await self.db_pool.simple_select_many_batch( - table="push_rules_enable", - column="user_name", - iterable=user_ids, - retcols=("user_name", "rule_id", "enabled"), - desc="bulk_get_push_rules_enabled", - batch_size=1000, + rows = cast( + List[Tuple[str, str, Optional[int]]], + await self.db_pool.simple_select_many_batch( + table="push_rules_enable", + column="user_name", + iterable=user_ids, + retcols=("user_name", "rule_id", "enabled"), + desc="bulk_get_push_rules_enabled", + batch_size=1000, + ), ) - for row in rows: - enabled = bool(row["enabled"]) - results.setdefault(row["user_name"], {})[row["rule_id"]] = enabled + for user_name, rule_id, enabled in rows: + results.setdefault(user_name, {})[rule_id] = bool(enabled) return results async def get_all_push_rule_updates( diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 9246b418f5..7f40e2c446 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -349,16 +349,19 @@ class RelationsWorkerStore(SQLBaseStore): def get_all_relation_ids_for_event_with_types_txn( txn: LoggingTransaction, ) -> List[str]: - rows = self.db_pool.simple_select_many_txn( - txn=txn, - table="event_relations", - column="relation_type", - iterable=relation_types, - keyvalues={"relates_to_id": event_id}, - retcols=["event_id"], + rows = cast( + List[Tuple[str]], + self.db_pool.simple_select_many_txn( + txn=txn, + table="event_relations", + column="relation_type", + iterable=relation_types, + keyvalues={"relates_to_id": event_id}, + retcols=["event_id"], + ), ) - return [row["event_id"] for row in rows] + return [row[0] for row in rows] return await self.db_pool.runInteraction( desc="get_all_relation_ids_for_event_with_types", diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 1d4d99932b..9d24d2c347 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1296,14 +1296,17 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): complete. """ - rows: List[Dict[str, str]] = await self.db_pool.simple_select_many_batch( - table="partial_state_rooms", - column="room_id", - iterable=room_ids, - retcols=("room_id",), - desc="is_partial_state_room_batched", - ) - partial_state_rooms = {row_dict["room_id"] for row_dict in rows} + rows = cast( + List[Tuple[str]], + await self.db_pool.simple_select_many_batch( + table="partial_state_rooms", + column="room_id", + iterable=room_ids, + retcols=("room_id",), + desc="is_partial_state_room_batched", + ), + ) + partial_state_rooms = {row[0] for row in rows} return {room_id: room_id in partial_state_rooms for room_id in room_ids} async def get_join_event_id_and_device_lists_stream_id_for_partial_state( diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index bbe08368db..3a87eba430 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -27,6 +27,7 @@ from typing import ( Set, Tuple, Union, + cast, ) import attr @@ -683,25 +684,28 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): Map from user_id to set of rooms that is currently in. """ - rows = await self.db_pool.simple_select_many_batch( - table="current_state_events", - column="state_key", - iterable=user_ids, - retcols=( - "state_key", - "room_id", + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_many_batch( + table="current_state_events", + column="state_key", + iterable=user_ids, + retcols=( + "state_key", + "room_id", + ), + keyvalues={ + "type": EventTypes.Member, + "membership": Membership.JOIN, + }, + desc="get_rooms_for_users", ), - keyvalues={ - "type": EventTypes.Member, - "membership": Membership.JOIN, - }, - desc="get_rooms_for_users", ) user_rooms: Dict[str, Set[str]] = {user_id: set() for user_id in user_ids} - for row in rows: - user_rooms[row["state_key"]].add(row["room_id"]) + for state_key, room_id in rows: + user_rooms[state_key].add(room_id) return {key: frozenset(rooms) for key, rooms in user_rooms.items()} @@ -892,17 +896,20 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): Map from event ID to `user_id`, or None if event is not a join. """ - rows = await self.db_pool.simple_select_many_batch( - table="room_memberships", - column="event_id", - iterable=event_ids, - retcols=("user_id", "event_id"), - keyvalues={"membership": Membership.JOIN}, - batch_size=1000, - desc="_get_user_ids_from_membership_event_ids", + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_many_batch( + table="room_memberships", + column="event_id", + iterable=event_ids, + retcols=("event_id", "user_id"), + keyvalues={"membership": Membership.JOIN}, + batch_size=1000, + desc="_get_user_ids_from_membership_event_ids", + ), ) - return {row["event_id"]: row["user_id"] for row in rows} + return dict(rows) @cached(max_entries=10000) async def is_host_joined(self, room_id: str, host: str) -> bool: @@ -1202,21 +1209,22 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): membership event, otherwise the value is None. """ - rows = await self.db_pool.simple_select_many_batch( - table="room_memberships", - column="event_id", - iterable=member_event_ids, - retcols=("user_id", "membership", "event_id"), - keyvalues={}, - batch_size=500, - desc="get_membership_from_event_ids", + rows = cast( + List[Tuple[str, str, str]], + await self.db_pool.simple_select_many_batch( + table="room_memberships", + column="event_id", + iterable=member_event_ids, + retcols=("user_id", "membership", "event_id"), + keyvalues={}, + batch_size=500, + desc="get_membership_from_event_ids", + ), ) return { - row["event_id"]: EventIdMembership( - membership=row["membership"], user_id=row["user_id"] - ) - for row in rows + event_id: EventIdMembership(membership=membership, user_id=user_id) + for user_id, membership, event_id in rows } async def is_local_host_in_room_ignoring_users( diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 5eaaff5b68..598025dd91 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -20,10 +20,12 @@ from typing import ( Collection, Dict, Iterable, + List, Mapping, Optional, Set, Tuple, + cast, ) import attr @@ -388,16 +390,19 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Raises: RuntimeError if the state is unknown at any of the given events """ - rows = await self.db_pool.simple_select_many_batch( - table="event_to_state_groups", - column="event_id", - iterable=event_ids, - keyvalues={}, - retcols=("event_id", "state_group"), - desc="_get_state_group_for_events", + rows = cast( + List[Tuple[str, int]], + await self.db_pool.simple_select_many_batch( + table="event_to_state_groups", + column="event_id", + iterable=event_ids, + keyvalues={}, + retcols=("event_id", "state_group"), + desc="_get_state_group_for_events", + ), ) - res = {row["event_id"]: row["state_group"] for row in rows} + res = dict(rows) for e in event_ids: if e not in res: raise RuntimeError("No state group for unknown or outlier event %s" % e) @@ -415,16 +420,19 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): The subset of state groups that are referenced. """ - rows = await self.db_pool.simple_select_many_batch( - table="event_to_state_groups", - column="state_group", - iterable=state_groups, - keyvalues={}, - retcols=("DISTINCT state_group",), - desc="get_referenced_state_groups", + rows = cast( + List[Tuple[int]], + await self.db_pool.simple_select_many_batch( + table="event_to_state_groups", + column="state_group", + iterable=state_groups, + keyvalues={}, + retcols=("DISTINCT state_group",), + desc="get_referenced_state_groups", + ), ) - return {row["state_group"] for row in rows} + return {row[0] for row in rows} async def update_state_for_partial_state_event( self, @@ -624,16 +632,22 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore): # potentially stale, since there may have been a period where the # server didn't share a room with the remote user and therefore may # have missed any device updates. - rows = self.db_pool.simple_select_many_txn( - txn, - table="current_state_events", - column="room_id", - iterable=to_delete, - keyvalues={"type": EventTypes.Member, "membership": Membership.JOIN}, - retcols=("state_key",), + rows = cast( + List[Tuple[str]], + self.db_pool.simple_select_many_txn( + txn, + table="current_state_events", + column="room_id", + iterable=to_delete, + keyvalues={ + "type": EventTypes.Member, + "membership": Membership.JOIN, + }, + retcols=("state_key",), + ), ) - potentially_left_users = {row["state_key"] for row in rows} + potentially_left_users = {row[0] for row in rows} # Now lets actually delete the rooms from the DB. self.db_pool.simple_delete_many_txn( diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 9d403919e4..5b2d0ba870 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -506,25 +506,28 @@ class StatsStore(StateDeltasStore): ) -> Tuple[List[str], Dict[str, int], int, List[str], int]: pos = self.get_room_max_stream_ordering() # type: ignore[attr-defined] - rows = self.db_pool.simple_select_many_txn( - txn, - table="current_state_events", - column="type", - iterable=[ - EventTypes.Create, - EventTypes.JoinRules, - EventTypes.RoomHistoryVisibility, - EventTypes.RoomEncryption, - EventTypes.Name, - EventTypes.Topic, - EventTypes.RoomAvatar, - EventTypes.CanonicalAlias, - ], - keyvalues={"room_id": room_id, "state_key": ""}, - retcols=["event_id"], + rows = cast( + List[Tuple[str]], + self.db_pool.simple_select_many_txn( + txn, + table="current_state_events", + column="type", + iterable=[ + EventTypes.Create, + EventTypes.JoinRules, + EventTypes.RoomHistoryVisibility, + EventTypes.RoomEncryption, + EventTypes.Name, + EventTypes.Topic, + EventTypes.RoomAvatar, + EventTypes.CanonicalAlias, + ], + keyvalues={"room_id": room_id, "state_key": ""}, + retcols=["event_id"], + ), ) - event_ids = cast(List[str], [row["event_id"] for row in rows]) + event_ids = [row[0] for row in rows] txn.execute( """ diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index f35757280d..c4a6475060 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -211,18 +211,28 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): async def get_destination_retry_timings_batch( self, destinations: StrCollection ) -> Mapping[str, Optional[DestinationRetryTimings]]: - rows = await self.db_pool.simple_select_many_batch( - table="destinations", - iterable=destinations, - column="destination", - retcols=("destination", "failure_ts", "retry_last_ts", "retry_interval"), - desc="get_destination_retry_timings_batch", + rows = cast( + List[Tuple[str, Optional[int], Optional[int], Optional[int]]], + await self.db_pool.simple_select_many_batch( + table="destinations", + iterable=destinations, + column="destination", + retcols=( + "destination", + "failure_ts", + "retry_last_ts", + "retry_interval", + ), + desc="get_destination_retry_timings_batch", + ), ) return { - row.pop("destination"): DestinationRetryTimings(**row) - for row in rows - if row["retry_last_ts"] and row["failure_ts"] and row["retry_interval"] + destination: DestinationRetryTimings( + failure_ts, retry_last_ts, retry_interval + ) + for destination, failure_ts, retry_last_ts, retry_interval in rows + if retry_last_ts and failure_ts and retry_interval } async def set_destination_retry_timings( diff --git a/synapse/storage/databases/main/ui_auth.py b/synapse/storage/databases/main/ui_auth.py index f38bedbbcd..919c66f553 100644 --- a/synapse/storage/databases/main/ui_auth.py +++ b/synapse/storage/databases/main/ui_auth.py @@ -337,13 +337,16 @@ class UIAuthWorkerStore(SQLBaseStore): # If a registration token was used, decrement the pending counter # before deleting the session. - rows = self.db_pool.simple_select_many_txn( - txn, - table="ui_auth_sessions_credentials", - column="session_id", - iterable=session_ids, - keyvalues={"stage_type": LoginType.REGISTRATION_TOKEN}, - retcols=["result"], + rows = cast( + List[Tuple[str]], + self.db_pool.simple_select_many_txn( + txn, + table="ui_auth_sessions_credentials", + column="session_id", + iterable=session_ids, + keyvalues={"stage_type": LoginType.REGISTRATION_TOKEN}, + retcols=["result"], + ), ) # Get the tokens used and how much pending needs to be decremented by. @@ -353,23 +356,25 @@ class UIAuthWorkerStore(SQLBaseStore): # registration token stage for that session will be True. # If a token was used to authenticate, but registration was # never completed, the result will be the token used. - token = db_to_json(r["result"]) + token = db_to_json(r[0]) if isinstance(token, str): token_counts[token] = token_counts.get(token, 0) + 1 # Update the `pending` counters. if len(token_counts) > 0: - token_rows = self.db_pool.simple_select_many_txn( - txn, - table="registration_tokens", - column="token", - iterable=list(token_counts.keys()), - keyvalues={}, - retcols=["token", "pending"], + token_rows = cast( + List[Tuple[str, int]], + self.db_pool.simple_select_many_txn( + txn, + table="registration_tokens", + column="token", + iterable=list(token_counts.keys()), + keyvalues={}, + retcols=["token", "pending"], + ), ) - for token_row in token_rows: - token = token_row["token"] - new_pending = token_row["pending"] - token_counts[token] + for token, pending in token_rows: + new_pending = pending - token_counts[token] self.db_pool.simple_update_one_txn( txn, table="registration_tokens", diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index f0dc31fee6..23eb92c514 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -410,25 +410,24 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): ) # Next fetch their profiles. Note that not all users have profiles. - profile_rows = self.db_pool.simple_select_many_txn( - txn, - table="profiles", - column="full_user_id", - iterable=list(users_to_insert), - retcols=( - "full_user_id", - "displayname", - "avatar_url", + profile_rows = cast( + List[Tuple[str, Optional[str], Optional[str]]], + self.db_pool.simple_select_many_txn( + txn, + table="profiles", + column="full_user_id", + iterable=list(users_to_insert), + retcols=( + "full_user_id", + "displayname", + "avatar_url", + ), + keyvalues={}, ), - keyvalues={}, ) profiles = { - row["full_user_id"]: _UserDirProfile( - row["full_user_id"], - row["displayname"], - row["avatar_url"], - ) - for row in profile_rows + full_user_id: _UserDirProfile(full_user_id, displayname, avatar_url) + for full_user_id, displayname, avatar_url in profile_rows } profiles_to_insert = [ @@ -517,18 +516,21 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): and not self.get_if_app_services_interested_in_user(user) # type: ignore[attr-defined] ] - rows = self.db_pool.simple_select_many_txn( - txn, - table="users", - column="name", - iterable=users, - keyvalues={ - "deactivated": 0, - }, - retcols=("name", "user_type"), + rows = cast( + List[Tuple[str, Optional[str]]], + self.db_pool.simple_select_many_txn( + txn, + table="users", + column="name", + iterable=users, + keyvalues={ + "deactivated": 0, + }, + retcols=("name", "user_type"), + ), ) - return [row["name"] for row in rows if row["user_type"] != UserTypes.SUPPORT] + return [name for name, user_type in rows if user_type != UserTypes.SUPPORT] async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> bool: """Check if the room is either world_readable or publically joinable""" diff --git a/synapse/storage/databases/main/user_erasure_store.py b/synapse/storage/databases/main/user_erasure_store.py index 06fcbe5e54..8bd58c6e3d 100644 --- a/synapse/storage/databases/main/user_erasure_store.py +++ b/synapse/storage/databases/main/user_erasure_store.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, Mapping +from typing import Iterable, List, Mapping, Tuple, cast from synapse.storage.database import LoggingTransaction from synapse.storage.databases.main import CacheInvalidationWorkerStore @@ -50,14 +50,17 @@ class UserErasureWorkerStore(CacheInvalidationWorkerStore): Returns: for each user, whether the user has requested erasure. """ - rows = await self.db_pool.simple_select_many_batch( - table="erased_users", - column="user_id", - iterable=user_ids, - retcols=("user_id",), - desc="are_users_erased", + rows = cast( + List[Tuple[str]], + await self.db_pool.simple_select_many_batch( + table="erased_users", + column="user_id", + iterable=user_ids, + retcols=("user_id",), + desc="are_users_erased", + ), ) - erased_users = {row["user_id"] for row in rows} + erased_users = {row[0] for row in rows} return {u: u in erased_users for u in user_ids} diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 6984d11352..09d2a8c5b3 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -13,7 +13,17 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, + cast, +) import attr @@ -730,19 +740,22 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): "[purge] found %i state groups to delete", len(state_groups_to_delete) ) - rows = self.db_pool.simple_select_many_txn( - txn, - table="state_group_edges", - column="prev_state_group", - iterable=state_groups_to_delete, - keyvalues={}, - retcols=("state_group",), + rows = cast( + List[Tuple[int]], + self.db_pool.simple_select_many_txn( + txn, + table="state_group_edges", + column="prev_state_group", + iterable=state_groups_to_delete, + keyvalues={}, + retcols=("state_group",), + ), ) remaining_state_groups = { - row["state_group"] - for row in rows - if row["state_group"] not in state_groups_to_delete + state_group + for state_group, in rows + if state_group not in state_groups_to_delete } logger.info( @@ -799,16 +812,19 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): A mapping from state group to previous state group. """ - rows = await self.db_pool.simple_select_many_batch( - table="state_group_edges", - column="prev_state_group", - iterable=state_groups, - keyvalues={}, - retcols=("prev_state_group", "state_group"), - desc="get_previous_state_groups", + rows = cast( + List[Tuple[int, int]], + await self.db_pool.simple_select_many_batch( + table="state_group_edges", + column="prev_state_group", + iterable=state_groups, + keyvalues={}, + retcols=("state_group", "prev_state_group"), + desc="get_previous_state_groups", + ), ) - return {row["state_group"]: row["prev_state_group"] for row in rows} + return dict(rows) async def purge_room_state( self, room_id: str, state_groups_to_delete: Collection[int] diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index b55dd07f14..2f6499966c 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Set, Tuple +from typing import Dict, List, Set, Tuple, cast from twisted.test.proto_helpers import MemoryReactor from twisted.trial import unittest @@ -421,41 +421,53 @@ class EventChainStoreTestCase(HomeserverTestCase): self, events: List[EventBase] ) -> Tuple[Dict[str, Tuple[int, int]], _LinkMap]: # Fetch the map from event ID -> (chain ID, sequence number) - rows = self.get_success( - self.store.db_pool.simple_select_many_batch( - table="event_auth_chains", - column="event_id", - iterable=[e.event_id for e in events], - retcols=("event_id", "chain_id", "sequence_number"), - keyvalues={}, - ) + rows = cast( + List[Tuple[str, int, int]], + self.get_success( + self.store.db_pool.simple_select_many_batch( + table="event_auth_chains", + column="event_id", + iterable=[e.event_id for e in events], + retcols=("event_id", "chain_id", "sequence_number"), + keyvalues={}, + ) + ), ) chain_map = { - row["event_id"]: (row["chain_id"], row["sequence_number"]) for row in rows + event_id: (chain_id, sequence_number) + for event_id, chain_id, sequence_number in rows } # Fetch all the links and pass them to the _LinkMap. - rows = self.get_success( - self.store.db_pool.simple_select_many_batch( - table="event_auth_chain_links", - column="origin_chain_id", - iterable=[chain_id for chain_id, _ in chain_map.values()], - retcols=( - "origin_chain_id", - "origin_sequence_number", - "target_chain_id", - "target_sequence_number", - ), - keyvalues={}, - ) + auth_chain_rows = cast( + List[Tuple[int, int, int, int]], + self.get_success( + self.store.db_pool.simple_select_many_batch( + table="event_auth_chain_links", + column="origin_chain_id", + iterable=[chain_id for chain_id, _ in chain_map.values()], + retcols=( + "origin_chain_id", + "origin_sequence_number", + "target_chain_id", + "target_sequence_number", + ), + keyvalues={}, + ) + ), ) link_map = _LinkMap() - for row in rows: + for ( + origin_chain_id, + origin_sequence_number, + target_chain_id, + target_sequence_number, + ) in auth_chain_rows: added = link_map.add_link( - (row["origin_chain_id"], row["origin_sequence_number"]), - (row["target_chain_id"], row["target_sequence_number"]), + (origin_chain_id, origin_sequence_number), + (target_chain_id, target_sequence_number), ) # We shouldn't have persisted any redundant links -- cgit 1.5.1 From cc865fffc0e556005a6ab596717a77230ba82ee7 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 11 Oct 2023 20:08:11 -0400 Subject: Convert user_get_threepids response to attrs. (#16468) This improves type annotations by not having a dictionary of Any values. --- changelog.d/16468.misc | 1 + synapse/handlers/account_validity.py | 4 ++-- synapse/handlers/admin.py | 4 +++- synapse/handlers/deactivate_account.py | 4 ++-- synapse/module_api/__init__.py | 2 +- synapse/rest/admin/users.py | 3 +-- synapse/rest/client/account.py | 4 +++- synapse/storage/databases/main/registration.py | 19 ++++++++++++++----- tests/module_api/test_api.py | 8 ++++---- 9 files changed, 31 insertions(+), 18 deletions(-) create mode 100644 changelog.d/16468.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16468.misc b/changelog.d/16468.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/16468.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/synapse/handlers/account_validity.py b/synapse/handlers/account_validity.py index f1a7a05df6..6c2a49a3b9 100644 --- a/synapse/handlers/account_validity.py +++ b/synapse/handlers/account_validity.py @@ -212,8 +212,8 @@ class AccountValidityHandler: addresses = [] for threepid in threepids: - if threepid["medium"] == "email": - addresses.append(threepid["address"]) + if threepid.medium == "email": + addresses.append(threepid.address) return addresses diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 97fd1fd427..2c2baeac67 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -16,6 +16,8 @@ import abc import logging from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Sequence, Set +import attr + from synapse.api.constants import Direction, Membership from synapse.events import EventBase from synapse.types import JsonMapping, RoomStreamToken, StateMap, UserID, UserInfo @@ -93,7 +95,7 @@ class AdminHandler: ] user_info_dict["displayname"] = profile.display_name user_info_dict["avatar_url"] = profile.avatar_url - user_info_dict["threepids"] = threepids + user_info_dict["threepids"] = [attr.asdict(t) for t in threepids] user_info_dict["external_ids"] = external_ids user_info_dict["erased"] = await self._store.is_user_erased(user.to_string()) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 67adeae6a7..6a8f8f2fd1 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -117,9 +117,9 @@ class DeactivateAccountHandler: # Remove any local threepid associations for this account. local_threepids = await self.store.user_get_threepids(user_id) - for threepid in local_threepids: + for local_threepid in local_threepids: await self._auth_handler.delete_local_threepid( - user_id, threepid["medium"], threepid["address"] + user_id, local_threepid.medium, local_threepid.address ) # delete any devices belonging to the user, which will also diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 65e2aca456..0786d20635 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -678,7 +678,7 @@ class ModuleApi: "msisdn" for phone numbers, and an "address" key which value is the threepid's address. """ - return await self._store.user_get_threepids(user_id) + return [attr.asdict(t) for t in await self._store.user_get_threepids(user_id)] def check_user_exists(self, user_id: str) -> "defer.Deferred[Optional[str]]": """Check if user exists. diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index cd995e8dbb..7fe16130e7 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -329,9 +329,8 @@ class UserRestServletV2(RestServlet): if threepids is not None: # get changed threepids (added and removed) - # convert List[Dict[str, Any]] into Set[Tuple[str, str]] cur_threepids = { - (threepid["medium"], threepid["address"]) + (threepid.medium, threepid.address) for threepid in await self.store.user_get_threepids(user_id) } add_threepids = new_threepids - cur_threepids diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index e74a87af4d..641390cb30 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -24,6 +24,8 @@ if TYPE_CHECKING or HAS_PYDANTIC_V2: from pydantic.v1 import StrictBool, StrictStr, constr else: from pydantic import StrictBool, StrictStr, constr + +import attr from typing_extensions import Literal from twisted.web.server import Request @@ -595,7 +597,7 @@ class ThreepidRestServlet(RestServlet): threepids = await self.datastore.user_get_threepids(requester.user.to_string()) - return 200, {"threepids": threepids} + return 200, {"threepids": [attr.asdict(t) for t in threepids]} # NOTE(dmr): I have chosen not to use Pydantic to parse this request's body, because # the endpoint is deprecated. (If you really want to, you could do this by reusing diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 64a2c31a5d..9e8643ae4d 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -143,6 +143,14 @@ class LoginTokenLookupResult: """The session ID advertised by the SSO Identity Provider.""" +@attr.s(frozen=True, slots=True, auto_attribs=True) +class ThreepidResult: + medium: str + address: str + validated_at: int + added_at: int + + class RegistrationWorkerStore(CacheInvalidationWorkerStore): def __init__( self, @@ -988,13 +996,14 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): {"user_id": user_id, "validated_at": validated_at, "added_at": added_at}, ) - async def user_get_threepids(self, user_id: str) -> List[Dict[str, Any]]: - return await self.db_pool.simple_select_list( + async def user_get_threepids(self, user_id: str) -> List[ThreepidResult]: + results = await self.db_pool.simple_select_list( "user_threepids", - {"user_id": user_id}, - ["medium", "address", "validated_at", "added_at"], - "user_get_threepids", + keyvalues={"user_id": user_id}, + retcols=["medium", "address", "validated_at", "added_at"], + desc="user_get_threepids", ) + return [ThreepidResult(**r) for r in results] async def user_delete_threepid( self, user_id: str, medium: str, address: str diff --git a/tests/module_api/test_api.py b/tests/module_api/test_api.py index 172fc3a736..1dabf52156 100644 --- a/tests/module_api/test_api.py +++ b/tests/module_api/test_api.py @@ -94,12 +94,12 @@ class ModuleApiTestCase(BaseModuleApiTestCase): self.assertEqual(len(emails), 1) email = emails[0] - self.assertEqual(email["medium"], "email") - self.assertEqual(email["address"], "bob@bobinator.bob") + self.assertEqual(email.medium, "email") + self.assertEqual(email.address, "bob@bobinator.bob") # Should these be 0? - self.assertEqual(email["validated_at"], 0) - self.assertEqual(email["added_at"], 0) + self.assertEqual(email.validated_at, 0) + self.assertEqual(email.added_at, 0) # Check that the displayname was assigned displayname = self.get_success( -- cgit 1.5.1 From eee6474bce4e387a05428de6f8291933ea6b72f7 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Mon, 16 Oct 2023 12:06:27 +0200 Subject: Remove useless async job to delete device messages on sync (#16491) --- changelog.d/16491.misc | 1 + synapse/handlers/sync.py | 22 ---------------------- synapse/storage/databases/main/deviceinbox.py | 5 +++-- 3 files changed, 4 insertions(+), 24 deletions(-) create mode 100644 changelog.d/16491.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16491.misc b/changelog.d/16491.misc new file mode 100644 index 0000000000..70b5771373 --- /dev/null +++ b/changelog.d/16491.misc @@ -0,0 +1 @@ +Remove useless async job to delete device messages on sync, since we only deliver (and hence delete) up to 100 device messages at a time. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 744e080309..60b4d95cd7 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -40,7 +40,6 @@ from synapse.api.filtering import FilterCollection from synapse.api.presence import UserPresenceState from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import EventBase -from synapse.handlers.device import DELETE_DEVICE_MSGS_TASK_NAME from synapse.handlers.relations import BundledAggregations from synapse.logging import issue9533_logger from synapse.logging.context import current_context @@ -363,36 +362,15 @@ class SyncHandler: # (since we now know that the device has received them) if since_token is not None: since_stream_id = since_token.to_device_key - # Fast path: delete a limited number of to-device messages up front. - # We do this to avoid the overhead of scheduling a task for every - # sync. - device_deletion_limit = 100 deleted = await self.store.delete_messages_for_device( sync_config.user.to_string(), sync_config.device_id, since_stream_id, - limit=device_deletion_limit, ) logger.debug( "Deleted %d to-device messages up to %d", deleted, since_stream_id ) - # If we hit the limit, schedule a background task to delete the rest. - if deleted >= device_deletion_limit: - await self._task_scheduler.schedule_task( - DELETE_DEVICE_MSGS_TASK_NAME, - resource_id=sync_config.device_id, - params={ - "user_id": sync_config.user.to_string(), - "device_id": sync_config.device_id, - "up_to_stream_id": since_stream_id, - }, - ) - logger.debug( - "Deletion of to-device messages up to %d scheduled", - since_stream_id, - ) - if timeout == 0 or since_token is None or full_state: # we are going to return immediately, so don't bother calling # notifier.wait_for_events. diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 1cf649d371..1faa6f04b2 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -450,7 +450,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): user_id: str, device_id: Optional[str], up_to_stream_id: int, - limit: int, + limit: Optional[int] = None, ) -> int: """ Args: @@ -481,11 +481,12 @@ class DeviceInboxWorkerStore(SQLBaseStore): ROW_ID_NAME = self.database_engine.row_id_name def delete_messages_for_device_txn(txn: LoggingTransaction) -> int: + limit_statement = "" if limit is None else f"LIMIT {limit}" sql = f""" DELETE FROM device_inbox WHERE {ROW_ID_NAME} IN ( SELECT {ROW_ID_NAME} FROM device_inbox WHERE user_id = ? AND device_id = ? AND stream_id <= ? - LIMIT {limit} + {limit_statement} ) """ txn.execute(sql, (user_id, device_id, up_to_stream_id)) -- cgit 1.5.1 From e3e0ae4ab1f48974ca66a4c4e6be8019aaa38fd1 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 16 Oct 2023 07:35:22 -0400 Subject: Convert state delta processing from a dict to attrs. (#16469) For improved type checking & memory usage. --- changelog.d/16469.misc | 1 + synapse/handlers/presence.py | 32 ++++++------- synapse/handlers/room_member.py | 21 ++++----- synapse/handlers/stats.py | 64 +++++++++++++------------- synapse/handlers/user_directory.py | 34 +++++++------- synapse/storage/controllers/state.py | 14 +----- synapse/storage/databases/main/state_deltas.py | 52 +++++++++++++-------- tests/handlers/test_typing.py | 2 +- 8 files changed, 111 insertions(+), 109 deletions(-) create mode 100644 changelog.d/16469.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16469.misc b/changelog.d/16469.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/16469.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 7c7cda3e95..dfc0b9db07 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -110,6 +110,7 @@ from synapse.replication.http.streams import ReplicationGetStreamUpdates from synapse.replication.tcp.commands import ClearUserSyncsCommand from synapse.replication.tcp.streams import PresenceFederationStream, PresenceStream from synapse.storage.databases.main import DataStore +from synapse.storage.databases.main.state_deltas import StateDelta from synapse.streams import EventSource from synapse.types import ( JsonDict, @@ -1499,9 +1500,9 @@ class PresenceHandler(BasePresenceHandler): # We may get multiple deltas for different rooms, but we want to # handle them on a room by room basis, so we batch them up by # room. - deltas_by_room: Dict[str, List[JsonDict]] = {} + deltas_by_room: Dict[str, List[StateDelta]] = {} for delta in deltas: - deltas_by_room.setdefault(delta["room_id"], []).append(delta) + deltas_by_room.setdefault(delta.room_id, []).append(delta) for room_id, deltas_for_room in deltas_by_room.items(): await self._handle_state_delta(room_id, deltas_for_room) @@ -1513,7 +1514,7 @@ class PresenceHandler(BasePresenceHandler): max_pos ) - async def _handle_state_delta(self, room_id: str, deltas: List[JsonDict]) -> None: + async def _handle_state_delta(self, room_id: str, deltas: List[StateDelta]) -> None: """Process current state deltas for the room to find new joins that need to be handled. """ @@ -1524,31 +1525,30 @@ class PresenceHandler(BasePresenceHandler): newly_joined_users = set() for delta in deltas: - assert room_id == delta["room_id"] + assert room_id == delta.room_id - typ = delta["type"] - state_key = delta["state_key"] - event_id = delta["event_id"] - prev_event_id = delta["prev_event_id"] - - logger.debug("Handling: %r %r, %s", typ, state_key, event_id) + logger.debug( + "Handling: %r %r, %s", delta.event_type, delta.state_key, delta.event_id + ) # Drop any event that isn't a membership join - if typ != EventTypes.Member: + if delta.event_type != EventTypes.Member: continue - if event_id is None: + if delta.event_id is None: # state has been deleted, so this is not a join. We only care about # joins. continue - event = await self.store.get_event(event_id, allow_none=True) + event = await self.store.get_event(delta.event_id, allow_none=True) if not event or event.content.get("membership") != Membership.JOIN: # We only care about joins continue - if prev_event_id: - prev_event = await self.store.get_event(prev_event_id, allow_none=True) + if delta.prev_event_id: + prev_event = await self.store.get_event( + delta.prev_event_id, allow_none=True + ) if ( prev_event and prev_event.content.get("membership") == Membership.JOIN @@ -1556,7 +1556,7 @@ class PresenceHandler(BasePresenceHandler): # Ignore changes to join events. continue - newly_joined_users.add(state_key) + newly_joined_users.add(delta.state_key) if not newly_joined_users: # If nobody has joined then there's nothing to do. diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 130eee7e1d..918eb203e2 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -16,7 +16,7 @@ import abc import logging import random from http import HTTPStatus -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple from synapse import types from synapse.api.constants import ( @@ -44,6 +44,7 @@ from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME from synapse.logging import opentracing from synapse.metrics import event_processing_positions from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage.databases.main.state_deltas import StateDelta from synapse.types import ( JsonDict, Requester, @@ -2146,24 +2147,18 @@ class RoomForgetterHandler(StateDeltasHandler): await self._store.update_room_forgetter_stream_pos(max_pos) - async def _handle_deltas(self, deltas: List[Dict[str, Any]]) -> None: + async def _handle_deltas(self, deltas: List[StateDelta]) -> None: """Called with the state deltas to process""" for delta in deltas: - typ = delta["type"] - state_key = delta["state_key"] - room_id = delta["room_id"] - event_id = delta["event_id"] - prev_event_id = delta["prev_event_id"] - - if typ != EventTypes.Member: + if delta.event_type != EventTypes.Member: continue - if not self._hs.is_mine_id(state_key): + if not self._hs.is_mine_id(delta.state_key): continue change = await self._get_key_change( - prev_event_id, - event_id, + delta.prev_event_id, + delta.event_id, key_name="membership", public_value=Membership.JOIN, ) @@ -2172,7 +2167,7 @@ class RoomForgetterHandler(StateDeltasHandler): if is_leave: try: await self._room_member_handler.forget( - UserID.from_string(state_key), room_id + UserID.from_string(delta.state_key), delta.room_id ) except SynapseError as e: if e.code == 400: diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 3dde19fc81..817b41aa37 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -27,6 +27,7 @@ from typing import ( from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.metrics import event_processing_positions from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage.databases.main.state_deltas import StateDelta from synapse.types import JsonDict if TYPE_CHECKING: @@ -142,7 +143,7 @@ class StatsHandler: self.pos = max_pos async def _handle_deltas( - self, deltas: Iterable[JsonDict] + self, deltas: Iterable[StateDelta] ) -> Tuple[Dict[str, CounterType[str]], Dict[str, CounterType[str]]]: """Called with the state deltas to process @@ -157,51 +158,50 @@ class StatsHandler: room_to_state_updates: Dict[str, Dict[str, Any]] = {} for delta in deltas: - typ = delta["type"] - state_key = delta["state_key"] - room_id = delta["room_id"] - event_id = delta["event_id"] - stream_id = delta["stream_id"] - prev_event_id = delta["prev_event_id"] - - logger.debug("Handling: %r, %r %r, %s", room_id, typ, state_key, event_id) + logger.debug( + "Handling: %r, %r %r, %s", + delta.room_id, + delta.event_type, + delta.state_key, + delta.event_id, + ) - token = await self.store.get_earliest_token_for_stats("room", room_id) + token = await self.store.get_earliest_token_for_stats("room", delta.room_id) # If the earliest token to begin from is larger than our current # stream ID, skip processing this delta. - if token is not None and token >= stream_id: + if token is not None and token >= delta.stream_id: logger.debug( "Ignoring: %s as earlier than this room's initial ingestion event", - event_id, + delta.event_id, ) continue - if event_id is None and prev_event_id is None: + if delta.event_id is None and delta.prev_event_id is None: logger.error( "event ID is None and so is the previous event ID. stream_id: %s", - stream_id, + delta.stream_id, ) continue event_content: JsonDict = {} - if event_id is not None: - event = await self.store.get_event(event_id, allow_none=True) + if delta.event_id is not None: + event = await self.store.get_event(delta.event_id, allow_none=True) if event: event_content = event.content or {} # All the values in this dict are deltas (RELATIVE changes) - room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter()) + room_stats_delta = room_to_stats_deltas.setdefault(delta.room_id, Counter()) - room_state = room_to_state_updates.setdefault(room_id, {}) + room_state = room_to_state_updates.setdefault(delta.room_id, {}) - if prev_event_id is None: + if delta.prev_event_id is None: # this state event doesn't overwrite another, # so it is a new effective/current state event room_stats_delta["current_state_events"] += 1 - if typ == EventTypes.Member: + if delta.event_type == EventTypes.Member: # we could use StateDeltasHandler._get_key_change here but it's # a bit inefficient given we're not testing for a specific # result; might as well just grab the prev_membership and @@ -210,9 +210,9 @@ class StatsHandler: # in the absence of a previous event because we do not want to # reduce the leave count when a new-to-the-room user joins. prev_membership = None - if prev_event_id is not None: + if delta.prev_event_id is not None: prev_event = await self.store.get_event( - prev_event_id, allow_none=True + delta.prev_event_id, allow_none=True ) if prev_event: prev_event_content = prev_event.content @@ -256,7 +256,7 @@ class StatsHandler: else: raise ValueError("%r is not a valid membership" % (membership,)) - user_id = state_key + user_id = delta.state_key if self.is_mine_id(user_id): # this accounts for transitions like leave → ban and so on. has_changed_joinedness = (prev_membership == Membership.JOIN) != ( @@ -272,30 +272,30 @@ class StatsHandler: room_stats_delta["local_users_in_room"] += membership_delta - elif typ == EventTypes.Create: + elif delta.event_type == EventTypes.Create: room_state["is_federatable"] = ( event_content.get(EventContentFields.FEDERATE, True) is True ) room_type = event_content.get(EventContentFields.ROOM_TYPE) if isinstance(room_type, str): room_state["room_type"] = room_type - elif typ == EventTypes.JoinRules: + elif delta.event_type == EventTypes.JoinRules: room_state["join_rules"] = event_content.get("join_rule") - elif typ == EventTypes.RoomHistoryVisibility: + elif delta.event_type == EventTypes.RoomHistoryVisibility: room_state["history_visibility"] = event_content.get( "history_visibility" ) - elif typ == EventTypes.RoomEncryption: + elif delta.event_type == EventTypes.RoomEncryption: room_state["encryption"] = event_content.get("algorithm") - elif typ == EventTypes.Name: + elif delta.event_type == EventTypes.Name: room_state["name"] = event_content.get("name") - elif typ == EventTypes.Topic: + elif delta.event_type == EventTypes.Topic: room_state["topic"] = event_content.get("topic") - elif typ == EventTypes.RoomAvatar: + elif delta.event_type == EventTypes.RoomAvatar: room_state["avatar"] = event_content.get("url") - elif typ == EventTypes.CanonicalAlias: + elif delta.event_type == EventTypes.CanonicalAlias: room_state["canonical_alias"] = event_content.get("alias") - elif typ == EventTypes.GuestAccess: + elif delta.event_type == EventTypes.GuestAccess: room_state["guest_access"] = event_content.get( EventContentFields.GUEST_ACCESS ) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index a0f5568000..75717ba4f9 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -14,7 +14,7 @@ import logging from http import HTTPStatus -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, List, Optional, Set, Tuple from twisted.internet.interfaces import IDelayedCall @@ -23,6 +23,7 @@ from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, Memb from synapse.api.errors import Codes, SynapseError from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.storage.databases.main.state_deltas import StateDelta from synapse.storage.databases.main.user_directory import SearchResult from synapse.storage.roommember import ProfileInfo from synapse.types import UserID @@ -247,32 +248,31 @@ class UserDirectoryHandler(StateDeltasHandler): await self.store.update_user_directory_stream_pos(max_pos) - async def _handle_deltas(self, deltas: List[Dict[str, Any]]) -> None: + async def _handle_deltas(self, deltas: List[StateDelta]) -> None: """Called with the state deltas to process""" for delta in deltas: - typ = delta["type"] - state_key = delta["state_key"] - room_id = delta["room_id"] - event_id: Optional[str] = delta["event_id"] - prev_event_id: Optional[str] = delta["prev_event_id"] - - logger.debug("Handling: %r %r, %s", typ, state_key, event_id) + logger.debug( + "Handling: %r %r, %s", delta.event_type, delta.state_key, delta.event_id + ) # For join rule and visibility changes we need to check if the room # may have become public or not and add/remove the users in said room - if typ in (EventTypes.RoomHistoryVisibility, EventTypes.JoinRules): + if delta.event_type in ( + EventTypes.RoomHistoryVisibility, + EventTypes.JoinRules, + ): await self._handle_room_publicity_change( - room_id, prev_event_id, event_id, typ + delta.room_id, delta.prev_event_id, delta.event_id, delta.event_type ) - elif typ == EventTypes.Member: + elif delta.event_type == EventTypes.Member: await self._handle_room_membership_event( - room_id, - prev_event_id, - event_id, - state_key, + delta.room_id, + delta.prev_event_id, + delta.event_id, + delta.state_key, ) else: - logger.debug("Ignoring irrelevant type: %r", typ) + logger.debug("Ignoring irrelevant type: %r", delta.event_type) async def _handle_room_publicity_change( self, diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 46957723a1..9f7959c45d 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -16,7 +16,6 @@ from itertools import chain from typing import ( TYPE_CHECKING, AbstractSet, - Any, Callable, Collection, Dict, @@ -32,6 +31,7 @@ from typing import ( from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase from synapse.logging.opentracing import tag_args, trace +from synapse.storage.databases.main.state_deltas import StateDelta from synapse.storage.roommember import ProfileInfo from synapse.storage.util.partial_state_events_tracker import ( PartialCurrentStateTracker, @@ -531,19 +531,9 @@ class StateStorageController: @tag_args async def get_current_state_deltas( self, prev_stream_id: int, max_stream_id: int - ) -> Tuple[int, List[Dict[str, Any]]]: + ) -> Tuple[int, List[StateDelta]]: """Fetch a list of room state changes since the given stream id - Each entry in the result contains the following fields: - - stream_id (int) - - room_id (str) - - type (str): event type - - state_key (str): - - event_id (str|None): new event_id for this state key. None if the - state has been deleted. - - prev_event_id (str|None): previous event_id for this state key. None - if it's new state. - Args: prev_stream_id: point to get changes since (exclusive) max_stream_id: the point that we know has been correctly persisted diff --git a/synapse/storage/databases/main/state_deltas.py b/synapse/storage/databases/main/state_deltas.py index 445213e12a..3151186e0c 100644 --- a/synapse/storage/databases/main/state_deltas.py +++ b/synapse/storage/databases/main/state_deltas.py @@ -13,7 +13,9 @@ # limitations under the License. import logging -from typing import Any, Dict, List, Tuple +from typing import List, Optional, Tuple + +import attr from synapse.storage._base import SQLBaseStore from synapse.storage.database import LoggingTransaction @@ -22,6 +24,20 @@ from synapse.util.caches.stream_change_cache import StreamChangeCache logger = logging.getLogger(__name__) +@attr.s(slots=True, frozen=True, auto_attribs=True) +class StateDelta: + stream_id: int + room_id: str + event_type: str + state_key: str + + event_id: Optional[str] + """new event_id for this state key. None if the state has been deleted.""" + + prev_event_id: Optional[str] + """previous event_id for this state key. None if it's new state.""" + + class StateDeltasStore(SQLBaseStore): # This class must be mixed in with a child class which provides the following # attribute. TODO: can we get static analysis to enforce this? @@ -29,31 +45,21 @@ class StateDeltasStore(SQLBaseStore): async def get_partial_current_state_deltas( self, prev_stream_id: int, max_stream_id: int - ) -> Tuple[int, List[Dict[str, Any]]]: + ) -> Tuple[int, List[StateDelta]]: """Fetch a list of room state changes since the given stream id - Each entry in the result contains the following fields: - - stream_id (int) - - room_id (str) - - type (str): event type - - state_key (str): - - event_id (str|None): new event_id for this state key. None if the - state has been deleted. - - prev_event_id (str|None): previous event_id for this state key. None - if it's new state. - This may be the partial state if we're lazy joining the room. Args: prev_stream_id: point to get changes since (exclusive) max_stream_id: the point that we know has been correctly persisted - - ie, an upper limit to return changes from. + - ie, an upper limit to return changes from. Returns: A tuple consisting of: - - the stream id which these results go up to - - list of current_state_delta_stream rows. If it is empty, we are - up to date. + - the stream id which these results go up to + - list of current_state_delta_stream rows. If it is empty, we are + up to date. """ prev_stream_id = int(prev_stream_id) @@ -72,7 +78,7 @@ class StateDeltasStore(SQLBaseStore): def get_current_state_deltas_txn( txn: LoggingTransaction, - ) -> Tuple[int, List[Dict[str, Any]]]: + ) -> Tuple[int, List[StateDelta]]: # First we calculate the max stream id that will give us less than # N results. # We arbitrarily limit to 100 stream_id entries to ensure we don't @@ -112,7 +118,17 @@ class StateDeltasStore(SQLBaseStore): ORDER BY stream_id ASC """ txn.execute(sql, (prev_stream_id, clipped_stream_id)) - return clipped_stream_id, self.db_pool.cursor_to_dict(txn) + return clipped_stream_id, [ + StateDelta( + stream_id=row[0], + room_id=row[1], + event_type=row[2], + state_key=row[3], + event_id=row[4], + prev_event_id=row[5], + ) + for row in txn.fetchall() + ] return await self.db_pool.runInteraction( "get_current_state_deltas", get_current_state_deltas_txn diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 3060bc9744..d7025c6f2c 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -174,7 +174,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): return_value=1 ) - self.datastore.get_partial_current_state_deltas = Mock(return_value=(0, None)) # type: ignore[method-assign] + self.datastore.get_partial_current_state_deltas = Mock(return_value=(0, [])) # type: ignore[method-assign] self.datastore.get_to_device_stream_token = Mock( # type: ignore[method-assign] return_value=0 -- cgit 1.5.1 From 6ad1f9eac2c5ffc496597acbc5728482441c64c7 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 17 Oct 2023 08:47:42 -0400 Subject: Convert DeviceLastConnectionInfo to attrs. (#16507) To improve type safety & memory usage. --- changelog.d/16507.misc | 1 + synapse/handlers/device.py | 23 ++--- synapse/storage/databases/main/client_ips.py | 46 +++++---- tests/storage/test_client_ips.py | 137 ++++++++++++++------------- 4 files changed, 104 insertions(+), 103 deletions(-) create mode 100644 changelog.d/16507.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16507.misc b/changelog.d/16507.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/16507.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 50df4f2b06..544bc7c13d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -14,17 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import ( - TYPE_CHECKING, - Any, - Dict, - Iterable, - List, - Mapping, - Optional, - Set, - Tuple, -) +from typing import TYPE_CHECKING, Dict, Iterable, List, Mapping, Optional, Set, Tuple from synapse.api import errors from synapse.api.constants import EduTypes, EventTypes @@ -41,6 +31,7 @@ from synapse.metrics.background_process_metrics import ( run_as_background_process, wrap_as_background_process, ) +from synapse.storage.databases.main.client_ips import DeviceLastConnectionInfo from synapse.types import ( JsonDict, JsonMapping, @@ -1008,14 +999,14 @@ class DeviceHandler(DeviceWorkerHandler): def _update_device_from_client_ips( - device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]] + device: JsonDict, client_ips: Mapping[Tuple[str, str], DeviceLastConnectionInfo] ) -> None: - ip = client_ips.get((device["user_id"], device["device_id"]), {}) + ip = client_ips.get((device["user_id"], device["device_id"])) device.update( { - "last_seen_user_agent": ip.get("user_agent"), - "last_seen_ts": ip.get("last_seen"), - "last_seen_ip": ip.get("ip"), + "last_seen_user_agent": ip.user_agent if ip else None, + "last_seen_ts": ip.last_seen if ip else None, + "last_seen_ip": ip.ip if ip else None, } ) diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 7da47c3dd7..8be1511859 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -15,6 +15,7 @@ import logging from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union, cast +import attr from typing_extensions import TypedDict from synapse.metrics.background_process_metrics import wrap_as_background_process @@ -42,7 +43,8 @@ logger = logging.getLogger(__name__) LAST_SEEN_GRANULARITY = 120 * 1000 -class DeviceLastConnectionInfo(TypedDict): +@attr.s(slots=True, frozen=True, auto_attribs=True) +class DeviceLastConnectionInfo: """Metadata for the last connection seen for a user and device combination""" # These types must match the columns in the `devices` table @@ -499,24 +501,29 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke device_id: If None fetches all devices for the user Returns: - A dictionary mapping a tuple of (user_id, device_id) to dicts, with - keys giving the column names from the devices table. + A dictionary mapping a tuple of (user_id, device_id) to DeviceLastConnectionInfo. """ keyvalues = {"user_id": user_id} if device_id is not None: keyvalues["device_id"] = device_id - res = cast( - List[DeviceLastConnectionInfo], - await self.db_pool.simple_select_list( - table="devices", - keyvalues=keyvalues, - retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), - ), + res = await self.db_pool.simple_select_list( + table="devices", + keyvalues=keyvalues, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), ) - return {(d["user_id"], d["device_id"]): d for d in res} + return { + (d["user_id"], d["device_id"]): DeviceLastConnectionInfo( + user_id=d["user_id"], + device_id=d["device_id"], + ip=d["ip"], + user_agent=d["user_agent"], + last_seen=d["last_seen"], + ) + for d in res + } async def _get_user_ip_and_agents_from_database( self, user: UserID, since_ts: int = 0 @@ -683,8 +690,7 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke device_id: If None fetches all devices for the user Returns: - A dictionary mapping a tuple of (user_id, device_id) to dicts, with - keys giving the column names from the devices table. + A dictionary mapping a tuple of (user_id, device_id) to DeviceLastConnectionInfo. """ ret = await self._get_last_client_ip_by_device_from_database(user_id, device_id) @@ -705,13 +711,13 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke continue if not device_id or did == device_id: - ret[(user_id, did)] = { - "user_id": user_id, - "ip": ip, - "user_agent": user_agent, - "device_id": did, - "last_seen": last_seen, - } + ret[(user_id, did)] = DeviceLastConnectionInfo( + user_id=user_id, + ip=ip, + user_agent=user_agent, + device_id=did, + last_seen=last_seen, + ) return ret async def get_user_ip_and_agents( diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 6b9692c486..0c054a598f 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -24,7 +24,10 @@ import synapse.rest.admin from synapse.http.site import XForwardedForRequest from synapse.rest.client import login from synapse.server import HomeServer -from synapse.storage.databases.main.client_ips import LAST_SEEN_GRANULARITY +from synapse.storage.databases.main.client_ips import ( + LAST_SEEN_GRANULARITY, + DeviceLastConnectionInfo, +) from synapse.types import UserID from synapse.util import Clock @@ -65,15 +68,15 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) r = result[(user_id, device_id)] - self.assertLessEqual( - { - "user_id": user_id, - "device_id": device_id, - "ip": "ip", - "user_agent": "user_agent", - "last_seen": 12345678000, - }.items(), - r.items(), + self.assertEqual( + DeviceLastConnectionInfo( + user_id=user_id, + device_id=device_id, + ip="ip", + user_agent="user_agent", + last_seen=12345678000, + ), + r, ) def test_insert_new_client_ip_none_device_id(self) -> None: @@ -201,13 +204,13 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): self.assertEqual( result, { - (user_id, device_id): { - "user_id": user_id, - "device_id": device_id, - "ip": "ip", - "user_agent": "user_agent", - "last_seen": 12345678000, - }, + (user_id, device_id): DeviceLastConnectionInfo( + user_id=user_id, + device_id=device_id, + ip="ip", + user_agent="user_agent", + last_seen=12345678000, + ), }, ) @@ -292,20 +295,20 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): self.assertEqual( result, { - (user_id, device_id_1): { - "user_id": user_id, - "device_id": device_id_1, - "ip": "ip_1", - "user_agent": "user_agent_1", - "last_seen": 12345678000, - }, - (user_id, device_id_2): { - "user_id": user_id, - "device_id": device_id_2, - "ip": "ip_2", - "user_agent": "user_agent_3", - "last_seen": 12345688000 + LAST_SEEN_GRANULARITY, - }, + (user_id, device_id_1): DeviceLastConnectionInfo( + user_id=user_id, + device_id=device_id_1, + ip="ip_1", + user_agent="user_agent_1", + last_seen=12345678000, + ), + (user_id, device_id_2): DeviceLastConnectionInfo( + user_id=user_id, + device_id=device_id_2, + ip="ip_2", + user_agent="user_agent_3", + last_seen=12345688000 + LAST_SEEN_GRANULARITY, + ), }, ) @@ -526,15 +529,15 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) r = result[(user_id, device_id)] - self.assertLessEqual( - { - "user_id": user_id, - "device_id": device_id, - "ip": None, - "user_agent": None, - "last_seen": None, - }.items(), - r.items(), + self.assertEqual( + DeviceLastConnectionInfo( + user_id=user_id, + device_id=device_id, + ip=None, + user_agent=None, + last_seen=None, + ), + r, ) # Register the background update to run again. @@ -561,15 +564,15 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) r = result[(user_id, device_id)] - self.assertLessEqual( - { - "user_id": user_id, - "device_id": device_id, - "ip": "ip", - "user_agent": "user_agent", - "last_seen": 0, - }.items(), - r.items(), + self.assertEqual( + DeviceLastConnectionInfo( + user_id=user_id, + device_id=device_id, + ip="ip", + user_agent="user_agent", + last_seen=0, + ), + r, ) def test_old_user_ips_pruned(self) -> None: @@ -640,15 +643,15 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) r = result2[(user_id, device_id)] - self.assertLessEqual( - { - "user_id": user_id, - "device_id": device_id, - "ip": "ip", - "user_agent": "user_agent", - "last_seen": 0, - }.items(), - r.items(), + self.assertEqual( + DeviceLastConnectionInfo( + user_id=user_id, + device_id=device_id, + ip="ip", + user_agent="user_agent", + last_seen=0, + ), + r, ) def test_invalid_user_agents_are_ignored(self) -> None: @@ -777,13 +780,13 @@ class ClientIpAuthTestCase(unittest.HomeserverTestCase): self.store.get_last_client_ip_by_device(self.user_id, device_id) ) r = result[(self.user_id, device_id)] - self.assertLessEqual( - { - "user_id": self.user_id, - "device_id": device_id, - "ip": expected_ip, - "user_agent": "Mozzila pizza", - "last_seen": 123456100, - }.items(), - r.items(), + self.assertEqual( + DeviceLastConnectionInfo( + user_id=self.user_id, + device_id=device_id, + ip=expected_ip, + user_agent="Mozzila pizza", + last_seen=123456100, + ), + r, ) -- cgit 1.5.1 From bcff01b40673238dca29c0f22dc4fda05f635030 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Wed, 18 Oct 2023 17:42:01 +0200 Subject: Improve performance of delete device messages query (#16492) --- changelog.d/16492.misc | 1 + synapse/handlers/device.py | 2 ++ synapse/storage/databases/main/deviceinbox.py | 15 ++++++++------- 3 files changed, 11 insertions(+), 7 deletions(-) create mode 100644 changelog.d/16492.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16492.misc b/changelog.d/16492.misc new file mode 100644 index 0000000000..ecb3356bdd --- /dev/null +++ b/changelog.d/16492.misc @@ -0,0 +1 @@ +Improve performance of delete device messages query, cf issue [16479](https://github.com/matrix-org/synapse/issues/16479). diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 544bc7c13d..3ce96ef3cb 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -592,6 +592,8 @@ class DeviceHandler(DeviceWorkerHandler): ) # Delete device messages asynchronously and in batches using the task scheduler + # We specify an upper stream id to avoid deleting non delivered messages + # if an user re-uses a device ID. await self._task_scheduler.schedule_task( DELETE_DEVICE_MSGS_TASK_NAME, resource_id=device_id, diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 1faa6f04b2..3e7425d4a6 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -478,18 +478,19 @@ class DeviceInboxWorkerStore(SQLBaseStore): log_kv({"message": "No changes in cache since last check"}) return 0 - ROW_ID_NAME = self.database_engine.row_id_name - def delete_messages_for_device_txn(txn: LoggingTransaction) -> int: limit_statement = "" if limit is None else f"LIMIT {limit}" sql = f""" - DELETE FROM device_inbox WHERE {ROW_ID_NAME} IN ( - SELECT {ROW_ID_NAME} FROM device_inbox - WHERE user_id = ? AND device_id = ? AND stream_id <= ? - {limit_statement} + DELETE FROM device_inbox WHERE user_id = ? AND device_id = ? AND stream_id <= ( + SELECT MAX(stream_id) FROM ( + SELECT stream_id FROM device_inbox + WHERE user_id = ? AND device_id = ? AND stream_id <= ? + ORDER BY stream_id + {limit_statement} + ) AS q1 ) """ - txn.execute(sql, (user_id, device_id, up_to_stream_id)) + txn.execute(sql, (user_id, device_id, user_id, device_id, up_to_stream_id)) return txn.rowcount count = await self.db_pool.runInteraction( -- cgit 1.5.1 From 49c9745b4516dec8728c260f1a6784f2c510110c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 18 Oct 2023 12:26:01 -0400 Subject: Avoid sending massive replication updates when purging a room. (#16510) --- changelog.d/16510.misc | 1 + synapse/replication/tcp/streams/events.py | 45 +++++++++++++- synapse/storage/databases/main/cache.py | 8 +++ tests/replication/tcp/streams/test_events.py | 91 +++++++++++++++++++--------- 4 files changed, 115 insertions(+), 30 deletions(-) create mode 100644 changelog.d/16510.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16510.misc b/changelog.d/16510.misc new file mode 100644 index 0000000000..5556b5d74c --- /dev/null +++ b/changelog.d/16510.misc @@ -0,0 +1 @@ +Improve replication performance when purging rooms. diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index ad9b760713..da6d948e1b 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import heapq +from collections import defaultdict from typing import TYPE_CHECKING, Iterable, Optional, Tuple, Type, TypeVar, cast import attr @@ -51,8 +52,19 @@ data part are: * The state_key of the state which has changed * The event id of the new state +A "state-all" row is sent whenever the "current state" in a room changes, but there are +too many state updates for a particular room in the same update. This replaces any +"state" rows on a per-room basis. The fields in the data part are: + +* The room id for the state changes + """ +# Any room with more than _MAX_STATE_UPDATES_PER_ROOM will send a EventsStreamAllStateRow +# instead of individual EventsStreamEventRow. This is predominantly useful when +# purging large rooms. +_MAX_STATE_UPDATES_PER_ROOM = 150 + @attr.s(slots=True, frozen=True, auto_attribs=True) class EventsStreamRow: @@ -111,9 +123,17 @@ class EventsStreamCurrentStateRow(BaseEventsStreamRow): event_id: Optional[str] +@attr.s(slots=True, frozen=True, auto_attribs=True) +class EventsStreamAllStateRow(BaseEventsStreamRow): + TypeId = "state-all" + + room_id: str + + _EventRows: Tuple[Type[BaseEventsStreamRow], ...] = ( EventsStreamEventRow, EventsStreamCurrentStateRow, + EventsStreamAllStateRow, ) TypeToRow = {Row.TypeId: Row for Row in _EventRows} @@ -213,9 +233,28 @@ class EventsStream(Stream): if stream_id <= upper_limit ) + # Separate out rooms that have many state updates, listeners should clear + # all state for those rooms. + state_updates_by_room = defaultdict(list) + for stream_id, room_id, _type, _state_key, _event_id in state_rows: + state_updates_by_room[room_id].append(stream_id) + + state_all_rows = [ + (stream_ids[-1], room_id) + for room_id, stream_ids in state_updates_by_room.items() + if len(stream_ids) >= _MAX_STATE_UPDATES_PER_ROOM + ] + state_all_updates: Iterable[Tuple[int, Tuple]] = ( + (max_stream_id, (EventsStreamAllStateRow.TypeId, (room_id,))) + for (max_stream_id, room_id) in state_all_rows + ) + + # Any remaining state updates are sent individually. + state_all_rooms = {room_id for _, room_id in state_all_rows} state_updates: Iterable[Tuple[int, Tuple]] = ( (stream_id, (EventsStreamCurrentStateRow.TypeId, rest)) for (stream_id, *rest) in state_rows + if rest[0] not in state_all_rooms ) ex_outliers_updates: Iterable[Tuple[int, Tuple]] = ( @@ -224,7 +263,11 @@ class EventsStream(Stream): ) # we need to return a sorted list, so merge them together. - updates = list(heapq.merge(event_updates, state_updates, ex_outliers_updates)) + updates = list( + heapq.merge( + event_updates, state_all_updates, state_updates, ex_outliers_updates + ) + ) return updates, upper_limit, limited @classmethod diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 2fbd389c71..4d0470ffd9 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -23,6 +23,7 @@ from synapse.metrics.background_process_metrics import wrap_as_background_proces from synapse.replication.tcp.streams import BackfillStream, CachesStream from synapse.replication.tcp.streams.events import ( EventsStream, + EventsStreamAllStateRow, EventsStreamCurrentStateRow, EventsStreamEventRow, EventsStreamRow, @@ -264,6 +265,13 @@ class CacheInvalidationWorkerStore(SQLBaseStore): (data.state_key,) ) self.get_rooms_for_user.invalidate((data.state_key,)) # type: ignore[attr-defined] + elif row.type == EventsStreamAllStateRow.TypeId: + assert isinstance(data, EventsStreamAllStateRow) + # Similar to the above, but the entire caches are invalidated. This is + # unfortunate for the membership caches, but should recover quickly. + self._curr_state_delta_stream_cache.entity_has_changed(data.room_id, token) # type: ignore[attr-defined] + self.get_rooms_for_user_with_stream_ordering.invalidate_all() # type: ignore[attr-defined] + self.get_rooms_for_user.invalidate_all() # type: ignore[attr-defined] else: raise Exception("Unknown events stream row type %s" % (row.type,)) diff --git a/tests/replication/tcp/streams/test_events.py b/tests/replication/tcp/streams/test_events.py index 128fc3e046..b8ab4ee54b 100644 --- a/tests/replication/tcp/streams/test_events.py +++ b/tests/replication/tcp/streams/test_events.py @@ -14,6 +14,8 @@ from typing import Any, List, Optional +from parameterized import parameterized + from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import EventTypes, Membership @@ -21,6 +23,8 @@ from synapse.events import EventBase from synapse.replication.tcp.commands import RdataCommand from synapse.replication.tcp.streams._base import _STREAM_UPDATE_TARGET_ROW_COUNT from synapse.replication.tcp.streams.events import ( + _MAX_STATE_UPDATES_PER_ROOM, + EventsStreamAllStateRow, EventsStreamCurrentStateRow, EventsStreamEventRow, EventsStreamRow, @@ -106,11 +110,21 @@ class EventsStreamTestCase(BaseStreamTestCase): self.assertEqual([], received_rows) - def test_update_function_huge_state_change(self) -> None: + @parameterized.expand( + [(_STREAM_UPDATE_TARGET_ROW_COUNT, False), (_MAX_STATE_UPDATES_PER_ROOM, True)] + ) + def test_update_function_huge_state_change( + self, num_state_changes: int, collapse_state_changes: bool + ) -> None: """Test replication with many state events Ensures that all events are correctly replicated when there are lots of state change rows to be replicated. + + Args: + num_state_changes: The number of state changes to create. + collapse_state_changes: Whether the state changes are expected to be + collapsed or not. """ # we want to generate lots of state changes at a single stream ID. @@ -145,7 +159,7 @@ class EventsStreamTestCase(BaseStreamTestCase): events = [ self._inject_state_event(sender=OTHER_USER) - for _ in range(_STREAM_UPDATE_TARGET_ROW_COUNT) + for _ in range(num_state_changes) ] self.replicate() @@ -202,8 +216,7 @@ class EventsStreamTestCase(BaseStreamTestCase): row for row in self.test_handler.received_rdata_rows if row[0] == "events" ] - # first check the first two rows, which should be state1 - + # first check the first two rows, which should be the state1 event. stream_name, token, row = received_rows.pop(0) self.assertEqual("events", stream_name) self.assertIsInstance(row, EventsStreamRow) @@ -217,7 +230,7 @@ class EventsStreamTestCase(BaseStreamTestCase): self.assertIsInstance(row.data, EventsStreamCurrentStateRow) self.assertEqual(row.data.event_id, state1.event_id) - # now the last two rows, which should be state2 + # now the last two rows, which should be the state2 event. stream_name, token, row = received_rows.pop(-2) self.assertEqual("events", stream_name) self.assertIsInstance(row, EventsStreamRow) @@ -231,34 +244,54 @@ class EventsStreamTestCase(BaseStreamTestCase): self.assertIsInstance(row.data, EventsStreamCurrentStateRow) self.assertEqual(row.data.event_id, state2.event_id) - # that should leave us with the rows for the PL event - self.assertEqual(len(received_rows), len(events) + 2) + # Based on the number of + if collapse_state_changes: + # that should leave us with the rows for the PL event, the state changes + # get collapsed into a single row. + self.assertEqual(len(received_rows), 2) - stream_name, token, row = received_rows.pop(0) - self.assertEqual("events", stream_name) - self.assertIsInstance(row, EventsStreamRow) - self.assertEqual(row.type, "ev") - self.assertIsInstance(row.data, EventsStreamEventRow) - self.assertEqual(row.data.event_id, pl_event.event_id) + stream_name, token, row = received_rows.pop(0) + self.assertEqual("events", stream_name) + self.assertIsInstance(row, EventsStreamRow) + self.assertEqual(row.type, "ev") + self.assertIsInstance(row.data, EventsStreamEventRow) + self.assertEqual(row.data.event_id, pl_event.event_id) - # the state rows are unsorted - state_rows: List[EventsStreamCurrentStateRow] = [] - for stream_name, _, row in received_rows: + stream_name, token, row = received_rows.pop(0) + self.assertIsInstance(row, EventsStreamRow) + self.assertEqual(row.type, "state-all") + self.assertIsInstance(row.data, EventsStreamAllStateRow) + self.assertEqual(row.data.room_id, state2.room_id) + + else: + # that should leave us with the rows for the PL event + self.assertEqual(len(received_rows), len(events) + 2) + + stream_name, token, row = received_rows.pop(0) self.assertEqual("events", stream_name) self.assertIsInstance(row, EventsStreamRow) - self.assertEqual(row.type, "state") - self.assertIsInstance(row.data, EventsStreamCurrentStateRow) - state_rows.append(row.data) - - state_rows.sort(key=lambda r: r.state_key) - - sr = state_rows.pop(0) - self.assertEqual(sr.type, EventTypes.PowerLevels) - self.assertEqual(sr.event_id, pl_event.event_id) - for sr in state_rows: - self.assertEqual(sr.type, "test_state_event") - # "None" indicates the state has been deleted - self.assertIsNone(sr.event_id) + self.assertEqual(row.type, "ev") + self.assertIsInstance(row.data, EventsStreamEventRow) + self.assertEqual(row.data.event_id, pl_event.event_id) + + # the state rows are unsorted + state_rows: List[EventsStreamCurrentStateRow] = [] + for stream_name, _, row in received_rows: + self.assertEqual("events", stream_name) + self.assertIsInstance(row, EventsStreamRow) + self.assertEqual(row.type, "state") + self.assertIsInstance(row.data, EventsStreamCurrentStateRow) + state_rows.append(row.data) + + state_rows.sort(key=lambda r: r.state_key) + + sr = state_rows.pop(0) + self.assertEqual(sr.type, EventTypes.PowerLevels) + self.assertEqual(sr.event_id, pl_event.event_id) + for sr in state_rows: + self.assertEqual(sr.type, "test_state_event") + # "None" indicates the state has been deleted + self.assertIsNone(sr.event_id) def test_update_function_state_row_limit(self) -> None: """Test replication with many state events over several stream ids.""" -- cgit 1.5.1 From e9069c9f919685606506f04527332e83fbfa44d9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 19 Oct 2023 15:04:18 +0100 Subject: Mark sync as limited if there is a gap in the timeline (#16485) This splits thinsg into two queries, but most of the time we won't have new event backwards extremities so this shouldn't actually add an extra RTT for the majority of cases. Note this removes the check for events with no prev events, but that was part of MSC2716 work that has since been removed. --- changelog.d/16485.bugfix | 1 + synapse/handlers/sync.py | 52 ++++++++++++++--- synapse/storage/databases/main/events.py | 74 ++++++++++++++++--------- synapse/storage/databases/main/stream.py | 47 ++++++++++++++++ synapse/storage/schema/main/delta/82/05gaps.sql | 25 +++++++++ 5 files changed, 166 insertions(+), 33 deletions(-) create mode 100644 changelog.d/16485.bugfix create mode 100644 synapse/storage/schema/main/delta/82/05gaps.sql (limited to 'synapse/storage/databases') diff --git a/changelog.d/16485.bugfix b/changelog.d/16485.bugfix new file mode 100644 index 0000000000..3cd7e1877f --- /dev/null +++ b/changelog.d/16485.bugfix @@ -0,0 +1 @@ +Fix long-standing bug where `/sync` incorrectly did not mark a room as `limited` in a sync requests when there were missing remote events. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 60b4d95cd7..f131c0e8e0 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -500,12 +500,27 @@ class SyncHandler: async def _load_filtered_recents( self, room_id: str, + sync_result_builder: "SyncResultBuilder", sync_config: SyncConfig, - now_token: StreamToken, + upto_token: StreamToken, since_token: Optional[StreamToken] = None, potential_recents: Optional[List[EventBase]] = None, newly_joined_room: bool = False, ) -> TimelineBatch: + """Create a timeline batch for the room + + Args: + room_id + sync_result_builder + sync_config + upto_token: The token up to which we should fetch (more) events. + If `potential_results` is non-empty then this is *start* of + the the list. + since_token + potential_recents: If non-empty, the events between the since token + and current token to send down to clients. + newly_joined_room + """ with Measure(self.clock, "load_filtered_recents"): timeline_limit = sync_config.filter_collection.timeline_limit() block_all_timeline = ( @@ -521,6 +536,20 @@ class SyncHandler: else: limited = False + # Check if there is a gap, if so we need to mark this as limited and + # recalculate which events to send down. + gap_token = await self.store.get_timeline_gaps( + room_id, + since_token.room_key if since_token else None, + sync_result_builder.now_token.room_key, + ) + if gap_token: + # There's a gap, so we need to ignore the passed in + # `potential_recents`, and reset `upto_token` to match. + potential_recents = None + upto_token = sync_result_builder.now_token + limited = True + log_kv({"limited": limited}) if potential_recents: @@ -559,10 +588,10 @@ class SyncHandler: recents = [] if not limited or block_all_timeline: - prev_batch_token = now_token + prev_batch_token = upto_token if recents: room_key = recents[0].internal_metadata.before - prev_batch_token = now_token.copy_and_replace( + prev_batch_token = upto_token.copy_and_replace( StreamKeyType.ROOM, room_key ) @@ -573,11 +602,15 @@ class SyncHandler: filtering_factor = 2 load_limit = max(timeline_limit * filtering_factor, 10) max_repeat = 5 # Only try a few times per room, otherwise - room_key = now_token.room_key + room_key = upto_token.room_key end_key = room_key since_key = None - if since_token and not newly_joined_room: + if since_token and gap_token: + # If there is a gap then we need to only include events after + # it. + since_key = gap_token + elif since_token and not newly_joined_room: since_key = since_token.room_key while limited and len(recents) < timeline_limit and max_repeat: @@ -647,7 +680,7 @@ class SyncHandler: recents = recents[-timeline_limit:] room_key = recents[0].internal_metadata.before - prev_batch_token = now_token.copy_and_replace(StreamKeyType.ROOM, room_key) + prev_batch_token = upto_token.copy_and_replace(StreamKeyType.ROOM, room_key) # Don't bother to bundle aggregations if the timeline is unlimited, # as clients will have all the necessary information. @@ -662,7 +695,9 @@ class SyncHandler: return TimelineBatch( events=recents, prev_batch=prev_batch_token, - limited=limited or newly_joined_room, + # Also mark as limited if this is a new room or there has been a gap + # (to force client to paginate the gap). + limited=limited or newly_joined_room or gap_token is not None, bundled_aggregations=bundled_aggregations, ) @@ -2397,8 +2432,9 @@ class SyncHandler: batch = await self._load_filtered_recents( room_id, + sync_result_builder, sync_config, - now_token=upto_token, + upto_token=upto_token, since_token=since_token, potential_recents=events, newly_joined_room=newly_joined, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ef6766b5e0..3c1492e3ad 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2267,35 +2267,59 @@ class PersistEventsStore: Forward extremities are handled when we first start persisting the events. """ - # From the events passed in, add all of the prev events as backwards extremities. - # Ignore any events that are already backwards extrems or outliers. - query = ( - "INSERT INTO event_backward_extremities (event_id, room_id)" - " SELECT ?, ? WHERE NOT EXISTS (" - " SELECT 1 FROM event_backward_extremities" - " WHERE event_id = ? AND room_id = ?" - " )" - # 1. Don't add an event as a extremity again if we already persisted it - # as a non-outlier. - # 2. Don't add an outlier as an extremity if it has no prev_events - " AND NOT EXISTS (" - " SELECT 1 FROM events" - " LEFT JOIN event_edges edge" - " ON edge.event_id = events.event_id" - " WHERE events.event_id = ? AND events.room_id = ? AND (events.outlier = FALSE OR edge.event_id IS NULL)" - " )" + + room_id = events[0].room_id + + potential_backwards_extremities = { + e_id + for ev in events + for e_id in ev.prev_event_ids() + if not ev.internal_metadata.is_outlier() + } + + if not potential_backwards_extremities: + return + + existing_events_outliers = self.db_pool.simple_select_many_txn( + txn, + table="events", + column="event_id", + iterable=potential_backwards_extremities, + keyvalues={"outlier": False}, + retcols=("event_id",), ) - txn.execute_batch( - query, - [ - (e_id, ev.room_id, e_id, ev.room_id, e_id, ev.room_id) - for ev in events - for e_id in ev.prev_event_ids() - if not ev.internal_metadata.is_outlier() - ], + potential_backwards_extremities.difference_update( + e for e, in existing_events_outliers ) + if potential_backwards_extremities: + self.db_pool.simple_upsert_many_txn( + txn, + table="event_backward_extremities", + key_names=("room_id", "event_id"), + key_values=[(room_id, ev) for ev in potential_backwards_extremities], + value_names=(), + value_values=(), + ) + + # Record the stream orderings where we have new gaps. + gap_events = [ + (room_id, self._instance_name, ev.internal_metadata.stream_ordering) + for ev in events + if any( + e_id in potential_backwards_extremities + for e_id in ev.prev_event_ids() + ) + ] + + self.db_pool.simple_insert_many_txn( + txn, + table="timeline_gaps", + keys=("room_id", "instance_name", "stream_ordering"), + values=gap_events, + ) + # Delete all these events that we've already fetched and now know that their # prev events are the new backwards extremeties. query = ( diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index ea06e4eee0..872df6bda1 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1616,3 +1616,50 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): retcol="instance_name", desc="get_name_from_instance_id", ) + + async def get_timeline_gaps( + self, + room_id: str, + from_token: Optional[RoomStreamToken], + to_token: RoomStreamToken, + ) -> Optional[RoomStreamToken]: + """Check if there is a gap, and return a token that marks the position + of the gap in the stream. + """ + + sql = """ + SELECT instance_name, stream_ordering + FROM timeline_gaps + WHERE room_id = ? AND ? < stream_ordering AND stream_ordering <= ? + ORDER BY stream_ordering + """ + + rows = await self.db_pool.execute( + "get_timeline_gaps", + None, + sql, + room_id, + from_token.stream if from_token else 0, + to_token.get_max_stream_pos(), + ) + + if not rows: + return None + + positions = [ + PersistedEventPosition(instance_name, stream_ordering) + for instance_name, stream_ordering in rows + ] + if from_token: + positions = [p for p in positions if p.persisted_after(from_token)] + + positions = [p for p in positions if not p.persisted_after(to_token)] + + if positions: + # We return a stream token that ensures the event *at* the position + # of the gap is included (as the gap is *before* the persisted + # event). + last_position = positions[-1] + return RoomStreamToken(stream=last_position.stream - 1) + + return None diff --git a/synapse/storage/schema/main/delta/82/05gaps.sql b/synapse/storage/schema/main/delta/82/05gaps.sql new file mode 100644 index 0000000000..6813b488ca --- /dev/null +++ b/synapse/storage/schema/main/delta/82/05gaps.sql @@ -0,0 +1,25 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Records when we see a "gap in the timeline", due to missing events over +-- federation. We record this so that we can tell clients there is a gap (by +-- marking the timeline section of a sync request as limited). +CREATE TABLE IF NOT EXISTS timeline_gaps ( + room_id TEXT NOT NULL, + instance_name TEXT NOT NULL, + stream_ordering BIGINT NOT NULL +); + +CREATE INDEX timeline_gaps_room_id ON timeline_gaps(room_id, stream_ordering); -- cgit 1.5.1 From 12ca87f5eac06450abaf024e5f4906147d5322e3 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 23 Oct 2023 07:37:45 -0400 Subject: Remove the last reference to event_txn_id. (#16521) This table was no longer used, except for a background process which purged old entries in it. --- changelog.d/16521.misc | 1 + synapse/storage/databases/main/events_worker.py | 6 ------ synapse/storage/schema/__init__.py | 5 ++++- 3 files changed, 5 insertions(+), 7 deletions(-) create mode 100644 changelog.d/16521.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16521.misc b/changelog.d/16521.misc new file mode 100644 index 0000000000..c6a8ddcf9c --- /dev/null +++ b/changelog.d/16521.misc @@ -0,0 +1 @@ +Stop deleting from an unused table. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 8af638d60f..5bf864c1fb 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -2095,12 +2095,6 @@ class EventsWorkerStore(SQLBaseStore): def _cleanup_old_transaction_ids_txn(txn: LoggingTransaction) -> None: one_day_ago = self._clock.time_msec() - 24 * 60 * 60 * 1000 - sql = """ - DELETE FROM event_txn_id - WHERE inserted_ts < ? - """ - txn.execute(sql, (one_day_ago,)) - sql = """ DELETE FROM event_txn_id_device_id WHERE inserted_ts < ? diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 5b50bd66bc..158b528dce 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 82 # remember to update the list below when updating +SCHEMA_VERSION = 83 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -121,6 +121,9 @@ Changes in SCHEMA_VERSION = 81 Changes in SCHEMA_VERSION = 82 - The insertion_events, insertion_event_extremities, insertion_event_edges, and batch_events tables are no longer purged in preparation for their removal. + +Changes in SCHEMA_VERSION = 83 + - The event_txn_id is no longer used. """ -- cgit 1.5.1 From 3bc23cc45cb6a70d53ba4032a9116029bc4f538c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Oct 2023 14:39:25 +0100 Subject: Fix bug that could cause a `/sync` to tightloop with sqlite after restart (#16540) This could happen if the last rows in the account data stream were inserted into `account_data`. After a restart the max account ID would be calculated without looking at the `account_data` table, and so have an old ID. --- changelog.d/16540.bugfix | 1 + synapse/storage/databases/main/account_data.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 changelog.d/16540.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/16540.bugfix b/changelog.d/16540.bugfix new file mode 100644 index 0000000000..34ee9facf9 --- /dev/null +++ b/changelog.d/16540.bugfix @@ -0,0 +1 @@ +Fix long-standing bug where `/sync` could tightloop after restart when using SQLite. diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 39498d52c6..84ef8136c2 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -94,7 +94,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) hs.get_replication_notifier(), "room_account_data", "stream_id", - extra_tables=[("room_tags_revisions", "stream_id")], + extra_tables=[ + ("account_data", "stream_id"), + ("room_tags_revisions", "stream_id"), + ], is_writer=self._instance_name in hs.config.worker.writers.account_data, ) -- cgit 1.5.1 From ba47fea5286e084ec70d568aa62eb4820b857c47 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 Oct 2023 16:16:19 +0100 Subject: Allow multiple workers to write to receipts stream. (#16432) Fixes #16417 --- changelog.d/16432.feature | 1 + synapse/config/workers.py | 4 +- synapse/handlers/appservice.py | 42 ++-- synapse/handlers/initial_sync.py | 2 +- synapse/handlers/receipts.py | 19 +- synapse/handlers/sync.py | 7 +- synapse/notifier.py | 45 +++- synapse/replication/tcp/client.py | 3 +- synapse/storage/databases/main/receipts.py | 148 +++++++++---- synapse/storage/databases/main/relations.py | 4 +- .../delta/83/03_instance_name_receipts.sql.sqlite | 17 ++ synapse/streams/events.py | 4 +- synapse/types/__init__.py | 137 +++++++++++- tests/handlers/test_appservice.py | 17 +- tests/replication/test_sharded_receipts.py | 243 +++++++++++++++++++++ 15 files changed, 604 insertions(+), 89 deletions(-) create mode 100644 changelog.d/16432.feature create mode 100644 synapse/storage/schema/main/delta/83/03_instance_name_receipts.sql.sqlite create mode 100644 tests/replication/test_sharded_receipts.py (limited to 'synapse/storage/databases') diff --git a/changelog.d/16432.feature b/changelog.d/16432.feature new file mode 100644 index 0000000000..9a76e85592 --- /dev/null +++ b/changelog.d/16432.feature @@ -0,0 +1 @@ +Allow multiple workers to write to receipts stream. diff --git a/synapse/config/workers.py b/synapse/config/workers.py index f1766088fc..6d67a8cd5c 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -358,9 +358,9 @@ class WorkerConfig(Config): "Must only specify one instance to handle `account_data` messages." ) - if len(self.writers.receipts) != 1: + if len(self.writers.receipts) == 0: raise ConfigError( - "Must only specify one instance to handle `receipts` messages." + "Must specify at least one instance to handle `receipts` messages." ) if len(self.writers.events) == 0: diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index c200a45f3a..873dadc3bd 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -47,6 +47,7 @@ from synapse.types import ( DeviceListUpdates, JsonDict, JsonMapping, + MultiWriterStreamToken, RoomAlias, RoomStreamToken, StreamKeyType, @@ -217,7 +218,7 @@ class ApplicationServicesHandler: def notify_interested_services_ephemeral( self, stream_key: StreamKeyType, - new_token: Union[int, RoomStreamToken], + new_token: Union[int, RoomStreamToken, MultiWriterStreamToken], users: Collection[Union[str, UserID]], ) -> None: """ @@ -259,19 +260,6 @@ class ApplicationServicesHandler: ): return - # Assert that new_token is an integer (and not a RoomStreamToken). - # All of the supported streams that this function handles use an - # integer to track progress (rather than a RoomStreamToken - a - # vector clock implementation) as they don't support multiple - # stream writers. - # - # As a result, we simply assert that new_token is an integer. - # If we do end up needing to pass a RoomStreamToken down here - # in the future, using RoomStreamToken.stream (the minimum stream - # position) to convert to an ascending integer value should work. - # Additional context: https://github.com/matrix-org/synapse/pull/11137 - assert isinstance(new_token, int) - # Ignore to-device messages if the feature flag is not enabled if ( stream_key == StreamKeyType.TO_DEVICE @@ -286,6 +274,9 @@ class ApplicationServicesHandler: ): return + # We know we're not a `RoomStreamToken` at this point. + assert not isinstance(new_token, RoomStreamToken) + # Check whether there are any appservices which have registered to receive # ephemeral events. # @@ -327,7 +318,7 @@ class ApplicationServicesHandler: self, services: List[ApplicationService], stream_key: StreamKeyType, - new_token: int, + new_token: Union[int, MultiWriterStreamToken], users: Collection[Union[str, UserID]], ) -> None: logger.debug("Checking interested services for %s", stream_key) @@ -340,6 +331,7 @@ class ApplicationServicesHandler: # # Instead we simply grab the latest typing updates in _handle_typing # and, if they apply to this application service, send it off. + assert isinstance(new_token, int) events = await self._handle_typing(service, new_token) if events: self.scheduler.enqueue_for_appservice(service, ephemeral=events) @@ -350,15 +342,23 @@ class ApplicationServicesHandler: (service.id, stream_key) ): if stream_key == StreamKeyType.RECEIPT: + assert isinstance(new_token, MultiWriterStreamToken) + + # We store appservice tokens as integers, so we ignore + # the `instance_map` components and instead simply + # follow the base stream position. + new_token = MultiWriterStreamToken(stream=new_token.stream) + events = await self._handle_receipts(service, new_token) self.scheduler.enqueue_for_appservice(service, ephemeral=events) # Persist the latest handled stream token for this appservice await self.store.set_appservice_stream_type_pos( - service, "read_receipt", new_token + service, "read_receipt", new_token.stream ) elif stream_key == StreamKeyType.PRESENCE: + assert isinstance(new_token, int) events = await self._handle_presence(service, users, new_token) self.scheduler.enqueue_for_appservice(service, ephemeral=events) @@ -368,6 +368,7 @@ class ApplicationServicesHandler: ) elif stream_key == StreamKeyType.TO_DEVICE: + assert isinstance(new_token, int) # Retrieve a list of to-device message events, as well as the # maximum stream token of the messages we were able to retrieve. to_device_messages = await self._get_to_device_messages( @@ -383,6 +384,7 @@ class ApplicationServicesHandler: ) elif stream_key == StreamKeyType.DEVICE_LIST: + assert isinstance(new_token, int) device_list_summary = await self._get_device_list_summary( service, new_token ) @@ -432,7 +434,7 @@ class ApplicationServicesHandler: return typing async def _handle_receipts( - self, service: ApplicationService, new_token: int + self, service: ApplicationService, new_token: MultiWriterStreamToken ) -> List[JsonMapping]: """ Return the latest read receipts that the given application service should receive. @@ -455,15 +457,17 @@ class ApplicationServicesHandler: from_key = await self.store.get_type_stream_id_for_appservice( service, "read_receipt" ) - if new_token is not None and new_token <= from_key: + if new_token is not None and new_token.stream <= from_key: logger.debug( "Rejecting token lower than or equal to stored: %s" % (new_token,) ) return [] + from_token = MultiWriterStreamToken(stream=from_key) + receipts_source = self.event_sources.sources.receipt receipts, _ = await receipts_source.get_new_events_as( - service=service, from_key=from_key, to_key=new_token + service=service, from_key=from_token, to_key=new_token ) return receipts diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index c34bd7db95..b1d8be866f 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -145,7 +145,7 @@ class InitialSyncHandler: joined_rooms = [r.room_id for r in room_list if r.membership == Membership.JOIN] receipt = await self.store.get_linearized_receipts_for_rooms( joined_rooms, - to_key=int(now_token.receipt_key), + to_key=now_token.receipt_key, ) receipt = ReceiptEventSource.filter_out_private_receipts(receipt, user_id) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 69ac468f75..b5f7a8b47e 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -20,6 +20,7 @@ from synapse.streams import EventSource from synapse.types import ( JsonDict, JsonMapping, + MultiWriterStreamToken, ReadReceipt, StreamKeyType, UserID, @@ -200,7 +201,7 @@ class ReceiptsHandler: await self.federation_sender.send_read_receipt(receipt) -class ReceiptEventSource(EventSource[int, JsonMapping]): +class ReceiptEventSource(EventSource[MultiWriterStreamToken, JsonMapping]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.config = hs.config @@ -273,13 +274,12 @@ class ReceiptEventSource(EventSource[int, JsonMapping]): async def get_new_events( self, user: UserID, - from_key: int, + from_key: MultiWriterStreamToken, limit: int, room_ids: Iterable[str], is_guest: bool, explicit_room_id: Optional[str] = None, - ) -> Tuple[List[JsonMapping], int]: - from_key = int(from_key) + ) -> Tuple[List[JsonMapping], MultiWriterStreamToken]: to_key = self.get_current_key() if from_key == to_key: @@ -296,8 +296,11 @@ class ReceiptEventSource(EventSource[int, JsonMapping]): return events, to_key async def get_new_events_as( - self, from_key: int, to_key: int, service: ApplicationService - ) -> Tuple[List[JsonMapping], int]: + self, + from_key: MultiWriterStreamToken, + to_key: MultiWriterStreamToken, + service: ApplicationService, + ) -> Tuple[List[JsonMapping], MultiWriterStreamToken]: """Returns a set of new read receipt events that an appservice may be interested in. @@ -312,8 +315,6 @@ class ReceiptEventSource(EventSource[int, JsonMapping]): appservice may be interested in. * The current read receipt stream token. """ - from_key = int(from_key) - if from_key == to_key: return [], to_key @@ -333,5 +334,5 @@ class ReceiptEventSource(EventSource[int, JsonMapping]): return events, to_key - def get_current_key(self) -> int: + def get_current_key(self) -> MultiWriterStreamToken: return self.store.get_max_receipt_stream_id() diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index f131c0e8e0..f75c1548ca 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -57,6 +57,7 @@ from synapse.types import ( DeviceListUpdates, JsonDict, JsonMapping, + MultiWriterStreamToken, MutableStateMap, Requester, RoomStreamToken, @@ -477,7 +478,11 @@ class SyncHandler: event_copy = {k: v for (k, v) in event.items() if k != "room_id"} ephemeral_by_room.setdefault(room_id, []).append(event_copy) - receipt_key = since_token.receipt_key if since_token else 0 + receipt_key = ( + since_token.receipt_key + if since_token + else MultiWriterStreamToken(stream=0) + ) receipt_source = self.event_sources.sources.receipt receipts, receipt_key = await receipt_source.get_new_events( diff --git a/synapse/notifier.py b/synapse/notifier.py index 99e7715896..ee0bd84f1e 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -21,11 +21,13 @@ from typing import ( Dict, Iterable, List, + Literal, Optional, Set, Tuple, TypeVar, Union, + overload, ) import attr @@ -44,6 +46,7 @@ from synapse.metrics import LaterGauge from synapse.streams.config import PaginationConfig from synapse.types import ( JsonDict, + MultiWriterStreamToken, PersistedEventPosition, RoomStreamToken, StrCollection, @@ -127,7 +130,7 @@ class _NotifierUserStream: def notify( self, stream_key: StreamKeyType, - stream_id: Union[int, RoomStreamToken], + stream_id: Union[int, RoomStreamToken, MultiWriterStreamToken], time_now_ms: int, ) -> None: """Notify any listeners for this user of a new event from an @@ -452,10 +455,48 @@ class Notifier: except Exception: logger.exception("Error pusher pool of event") + @overload + def on_new_event( + self, + stream_key: Literal[StreamKeyType.ROOM], + new_token: RoomStreamToken, + users: Optional[Collection[Union[str, UserID]]] = None, + rooms: Optional[StrCollection] = None, + ) -> None: + ... + + @overload + def on_new_event( + self, + stream_key: Literal[StreamKeyType.RECEIPT], + new_token: MultiWriterStreamToken, + users: Optional[Collection[Union[str, UserID]]] = None, + rooms: Optional[StrCollection] = None, + ) -> None: + ... + + @overload + def on_new_event( + self, + stream_key: Literal[ + StreamKeyType.ACCOUNT_DATA, + StreamKeyType.DEVICE_LIST, + StreamKeyType.PRESENCE, + StreamKeyType.PUSH_RULES, + StreamKeyType.TO_DEVICE, + StreamKeyType.TYPING, + StreamKeyType.UN_PARTIAL_STATED_ROOMS, + ], + new_token: int, + users: Optional[Collection[Union[str, UserID]]] = None, + rooms: Optional[StrCollection] = None, + ) -> None: + ... + def on_new_event( self, stream_key: StreamKeyType, - new_token: Union[int, RoomStreamToken], + new_token: Union[int, RoomStreamToken, MultiWriterStreamToken], users: Optional[Collection[Union[str, UserID]]] = None, rooms: Optional[StrCollection] = None, ) -> None: diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 384355698d..1312b6f21e 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -126,8 +126,9 @@ class ReplicationDataHandler: StreamKeyType.ACCOUNT_DATA, token, users=[row.user_id for row in rows] ) elif stream_name == ReceiptsStream.NAME: + new_token = self.store.get_max_receipt_stream_id() self.notifier.on_new_event( - StreamKeyType.RECEIPT, token, rooms=[row.room_id for row in rows] + StreamKeyType.RECEIPT, new_token, rooms=[row.room_id for row in rows] ) await self._pusher_pool.on_new_receipts({row.user_id for row in rows}) elif stream_name == ToDeviceStream.NAME: diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index b2645ab43c..56e8eb16a8 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -28,6 +28,8 @@ from typing import ( cast, ) +from immutabledict import immutabledict + from synapse.api.constants import EduTypes from synapse.replication.tcp.streams import ReceiptsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause @@ -43,7 +45,12 @@ from synapse.storage.util.id_generators import ( MultiWriterIdGenerator, StreamIdGenerator, ) -from synapse.types import JsonDict, JsonMapping +from synapse.types import ( + JsonDict, + JsonMapping, + MultiWriterStreamToken, + PersistedPosition, +) from synapse.util import json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -105,7 +112,7 @@ class ReceiptsWorkerStore(SQLBaseStore): "receipts_linearized", entity_column="room_id", stream_column="stream_id", - max_value=max_receipts_stream_id, + max_value=max_receipts_stream_id.stream, limit=10000, ) self._receipts_stream_cache = StreamChangeCache( @@ -114,9 +121,31 @@ class ReceiptsWorkerStore(SQLBaseStore): prefilled_cache=receipts_stream_prefill, ) - def get_max_receipt_stream_id(self) -> int: + def get_max_receipt_stream_id(self) -> MultiWriterStreamToken: """Get the current max stream ID for receipts stream""" - return self._receipts_id_gen.get_current_token() + + min_pos = self._receipts_id_gen.get_current_token() + + positions = {} + if isinstance(self._receipts_id_gen, MultiWriterIdGenerator): + # The `min_pos` is the minimum position that we know all instances + # have finished persisting to, so we only care about instances whose + # positions are ahead of that. (Instance positions can be behind the + # min position as there are times we can work out that the minimum + # position is ahead of the naive minimum across all current + # positions. See MultiWriterIdGenerator for details) + positions = { + i: p + for i, p in self._receipts_id_gen.get_positions().items() + if p > min_pos + } + + return MultiWriterStreamToken( + stream=min_pos, instance_map=immutabledict(positions) + ) + + def get_receipt_stream_id_for_instance(self, instance_name: str) -> int: + return self._receipts_id_gen.get_current_token_for_writer(instance_name) def get_last_unthreaded_receipt_for_user_txn( self, @@ -257,7 +286,10 @@ class ReceiptsWorkerStore(SQLBaseStore): } async def get_linearized_receipts_for_rooms( - self, room_ids: Iterable[str], to_key: int, from_key: Optional[int] = None + self, + room_ids: Iterable[str], + to_key: MultiWriterStreamToken, + from_key: Optional[MultiWriterStreamToken] = None, ) -> List[JsonMapping]: """Get receipts for multiple rooms for sending to clients. @@ -276,7 +308,7 @@ class ReceiptsWorkerStore(SQLBaseStore): # Only ask the database about rooms where there have been new # receipts added since `from_key` room_ids = self._receipts_stream_cache.get_entities_changed( - room_ids, from_key + room_ids, from_key.stream ) results = await self._get_linearized_receipts_for_rooms( @@ -286,7 +318,10 @@ class ReceiptsWorkerStore(SQLBaseStore): return [ev for res in results.values() for ev in res] async def get_linearized_receipts_for_room( - self, room_id: str, to_key: int, from_key: Optional[int] = None + self, + room_id: str, + to_key: MultiWriterStreamToken, + from_key: Optional[MultiWriterStreamToken] = None, ) -> Sequence[JsonMapping]: """Get receipts for a single room for sending to clients. @@ -302,36 +337,49 @@ class ReceiptsWorkerStore(SQLBaseStore): if from_key is not None: # Check the cache first to see if any new receipts have been added # since`from_key`. If not we can no-op. - if not self._receipts_stream_cache.has_entity_changed(room_id, from_key): + if not self._receipts_stream_cache.has_entity_changed( + room_id, from_key.stream + ): return [] return await self._get_linearized_receipts_for_room(room_id, to_key, from_key) @cached(tree=True) async def _get_linearized_receipts_for_room( - self, room_id: str, to_key: int, from_key: Optional[int] = None + self, + room_id: str, + to_key: MultiWriterStreamToken, + from_key: Optional[MultiWriterStreamToken] = None, ) -> Sequence[JsonMapping]: """See get_linearized_receipts_for_room""" def f(txn: LoggingTransaction) -> List[Tuple[str, str, str, str]]: if from_key: - sql = ( - "SELECT receipt_type, user_id, event_id, data" - " FROM receipts_linearized WHERE" - " room_id = ? AND stream_id > ? AND stream_id <= ?" - ) + sql = """ + SELECT stream_id, instance_name, receipt_type, user_id, event_id, data + FROM receipts_linearized + WHERE room_id = ? AND stream_id > ? AND stream_id <= ? + """ - txn.execute(sql, (room_id, from_key, to_key)) - else: - sql = ( - "SELECT receipt_type, user_id, event_id, data" - " FROM receipts_linearized WHERE" - " room_id = ? AND stream_id <= ?" + txn.execute( + sql, (room_id, from_key.stream, to_key.get_max_stream_pos()) ) + else: + sql = """ + SELECT stream_id, instance_name, receipt_type, user_id, event_id, data + FROM receipts_linearized WHERE + room_id = ? AND stream_id <= ? + """ - txn.execute(sql, (room_id, to_key)) + txn.execute(sql, (room_id, to_key.get_max_stream_pos())) - return cast(List[Tuple[str, str, str, str]], txn.fetchall()) + return [ + (receipt_type, user_id, event_id, data) + for stream_id, instance_name, receipt_type, user_id, event_id, data in txn + if MultiWriterStreamToken.is_stream_position_in_range( + from_key, to_key, instance_name, stream_id + ) + ] rows = await self.db_pool.runInteraction("get_linearized_receipts_for_room", f) @@ -352,7 +400,10 @@ class ReceiptsWorkerStore(SQLBaseStore): num_args=3, ) async def _get_linearized_receipts_for_rooms( - self, room_ids: Collection[str], to_key: int, from_key: Optional[int] = None + self, + room_ids: Collection[str], + to_key: MultiWriterStreamToken, + from_key: Optional[MultiWriterStreamToken] = None, ) -> Mapping[str, Sequence[JsonMapping]]: if not room_ids: return {} @@ -362,7 +413,8 @@ class ReceiptsWorkerStore(SQLBaseStore): ) -> List[Tuple[str, str, str, str, Optional[str], str]]: if from_key: sql = """ - SELECT room_id, receipt_type, user_id, event_id, thread_id, data + SELECT stream_id, instance_name, room_id, receipt_type, + user_id, event_id, thread_id, data FROM receipts_linearized WHERE stream_id > ? AND stream_id <= ? AND """ @@ -370,10 +422,14 @@ class ReceiptsWorkerStore(SQLBaseStore): self.database_engine, "room_id", room_ids ) - txn.execute(sql + clause, [from_key, to_key] + list(args)) + txn.execute( + sql + clause, + [from_key.stream, to_key.get_max_stream_pos()] + list(args), + ) else: sql = """ - SELECT room_id, receipt_type, user_id, event_id, thread_id, data + SELECT stream_id, instance_name, room_id, receipt_type, + user_id, event_id, thread_id, data FROM receipts_linearized WHERE stream_id <= ? AND """ @@ -382,11 +438,15 @@ class ReceiptsWorkerStore(SQLBaseStore): self.database_engine, "room_id", room_ids ) - txn.execute(sql + clause, [to_key] + list(args)) + txn.execute(sql + clause, [to_key.get_max_stream_pos()] + list(args)) - return cast( - List[Tuple[str, str, str, str, Optional[str], str]], txn.fetchall() - ) + return [ + (room_id, receipt_type, user_id, event_id, thread_id, data) + for stream_id, instance_name, room_id, receipt_type, user_id, event_id, thread_id, data in txn + if MultiWriterStreamToken.is_stream_position_in_range( + from_key, to_key, instance_name, stream_id + ) + ] txn_results = await self.db_pool.runInteraction( "_get_linearized_receipts_for_rooms", f @@ -420,7 +480,9 @@ class ReceiptsWorkerStore(SQLBaseStore): num_args=2, ) async def get_linearized_receipts_for_all_rooms( - self, to_key: int, from_key: Optional[int] = None + self, + to_key: MultiWriterStreamToken, + from_key: Optional[MultiWriterStreamToken] = None, ) -> Mapping[str, JsonMapping]: """Get receipts for all rooms between two stream_ids, up to a limit of the latest 100 read receipts. @@ -437,25 +499,31 @@ class ReceiptsWorkerStore(SQLBaseStore): def f(txn: LoggingTransaction) -> List[Tuple[str, str, str, str, str]]: if from_key: sql = """ - SELECT room_id, receipt_type, user_id, event_id, data + SELECT stream_id, instance_name, room_id, receipt_type, user_id, event_id, data FROM receipts_linearized WHERE stream_id > ? AND stream_id <= ? ORDER BY stream_id DESC LIMIT 100 """ - txn.execute(sql, [from_key, to_key]) + txn.execute(sql, [from_key.stream, to_key.get_max_stream_pos()]) else: sql = """ - SELECT room_id, receipt_type, user_id, event_id, data + SELECT stream_id, instance_name, room_id, receipt_type, user_id, event_id, data FROM receipts_linearized WHERE stream_id <= ? ORDER BY stream_id DESC LIMIT 100 """ - txn.execute(sql, [to_key]) + txn.execute(sql, [to_key.get_max_stream_pos()]) - return cast(List[Tuple[str, str, str, str, str]], txn.fetchall()) + return [ + (room_id, receipt_type, user_id, event_id, data) + for stream_id, instance_name, room_id, receipt_type, user_id, event_id, data in txn + if MultiWriterStreamToken.is_stream_position_in_range( + from_key, to_key, instance_name, stream_id + ) + ] txn_results = await self.db_pool.runInteraction( "get_linearized_receipts_for_all_rooms", f @@ -545,10 +613,11 @@ class ReceiptsWorkerStore(SQLBaseStore): SELECT stream_id, room_id, receipt_type, user_id, event_id, thread_id, data FROM receipts_linearized WHERE ? < stream_id AND stream_id <= ? + AND instance_name = ? ORDER BY stream_id ASC LIMIT ? """ - txn.execute(sql, (last_id, current_id, limit)) + txn.execute(sql, (last_id, current_id, instance_name, limit)) updates = cast( List[Tuple[int, Tuple[str, str, str, str, Optional[str], JsonDict]]], @@ -695,6 +764,7 @@ class ReceiptsWorkerStore(SQLBaseStore): keyvalues=keyvalues, values={ "stream_id": stream_id, + "instance_name": self._instance_name, "event_id": event_id, "event_stream_ordering": stream_ordering, "data": json_encoder.encode(data), @@ -750,7 +820,7 @@ class ReceiptsWorkerStore(SQLBaseStore): event_ids: List[str], thread_id: Optional[str], data: dict, - ) -> Optional[int]: + ) -> Optional[PersistedPosition]: """Insert a receipt, either from local client or remote server. Automatically does conversion between linearized and graph @@ -812,7 +882,7 @@ class ReceiptsWorkerStore(SQLBaseStore): data, ) - return stream_id + return PersistedPosition(self._instance_name, stream_id) async def _insert_graph_receipt( self, diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 7f40e2c446..ce7bfd5146 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -47,7 +47,7 @@ from synapse.storage.databases.main.stream import ( generate_pagination_where_clause, ) from synapse.storage.engines import PostgresEngine -from synapse.types import JsonDict, StreamKeyType, StreamToken +from synapse.types import JsonDict, MultiWriterStreamToken, StreamKeyType, StreamToken from synapse.util.caches.descriptors import cached, cachedList if TYPE_CHECKING: @@ -314,7 +314,7 @@ class RelationsWorkerStore(SQLBaseStore): room_key=next_key, presence_key=0, typing_key=0, - receipt_key=0, + receipt_key=MultiWriterStreamToken(stream=0), account_data_key=0, push_rules_key=0, to_device_key=0, diff --git a/synapse/storage/schema/main/delta/83/03_instance_name_receipts.sql.sqlite b/synapse/storage/schema/main/delta/83/03_instance_name_receipts.sql.sqlite new file mode 100644 index 0000000000..6c7ad0fd37 --- /dev/null +++ b/synapse/storage/schema/main/delta/83/03_instance_name_receipts.sql.sqlite @@ -0,0 +1,17 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- This already exists on Postgres. +ALTER TABLE receipts_linearized ADD COLUMN instance_name TEXT; diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 609a0978a9..d0bb83b184 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -23,7 +23,7 @@ from synapse.handlers.room import RoomEventSource from synapse.handlers.typing import TypingNotificationEventSource from synapse.logging.opentracing import trace from synapse.streams import EventSource -from synapse.types import StreamKeyType, StreamToken +from synapse.types import MultiWriterStreamToken, StreamKeyType, StreamToken if TYPE_CHECKING: from synapse.server import HomeServer @@ -111,7 +111,7 @@ class EventSources: room_key=await self.sources.room.get_current_key_for_room(room_id), presence_key=0, typing_key=0, - receipt_key=0, + receipt_key=MultiWriterStreamToken(stream=0), account_data_key=0, push_rules_key=0, to_device_key=0, diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 09a88c86a7..4c5b26ad93 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -695,6 +695,90 @@ class RoomStreamToken(AbstractMultiWriterStreamToken): return "s%d" % (self.stream,) +@attr.s(frozen=True, slots=True, order=False) +class MultiWriterStreamToken(AbstractMultiWriterStreamToken): + """A basic stream token class for streams that supports multiple writers.""" + + @classmethod + async def parse(cls, store: "DataStore", string: str) -> "MultiWriterStreamToken": + try: + if string[0].isdigit(): + return cls(stream=int(string)) + if string[0] == "m": + parts = string[1:].split("~") + stream = int(parts[0]) + + instance_map = {} + for part in parts[1:]: + key, value = part.split(".") + instance_id = int(key) + pos = int(value) + + instance_name = await store.get_name_from_instance_id(instance_id) + instance_map[instance_name] = pos + + return cls( + stream=stream, + instance_map=immutabledict(instance_map), + ) + except CancelledError: + raise + except Exception: + pass + raise SynapseError(400, "Invalid stream token %r" % (string,)) + + async def to_string(self, store: "DataStore") -> str: + if self.instance_map: + entries = [] + for name, pos in self.instance_map.items(): + if pos <= self.stream: + # Ignore instances who are below the minimum stream position + # (we might know they've advanced without seeing a recent + # write from them). + continue + + instance_id = await store.get_id_for_instance(name) + entries.append(f"{instance_id}.{pos}") + + encoded_map = "~".join(entries) + return f"m{self.stream}~{encoded_map}" + else: + return str(self.stream) + + @staticmethod + def is_stream_position_in_range( + low: Optional["AbstractMultiWriterStreamToken"], + high: Optional["AbstractMultiWriterStreamToken"], + instance_name: Optional[str], + pos: int, + ) -> bool: + """Checks if a given persisted position is between the two given tokens. + + If `instance_name` is None then the row was persisted before multi + writer support. + """ + + if low: + if instance_name: + low_stream = low.instance_map.get(instance_name, low.stream) + else: + low_stream = low.stream + + if pos <= low_stream: + return False + + if high: + if instance_name: + high_stream = high.instance_map.get(instance_name, high.stream) + else: + high_stream = high.stream + + if high_stream < pos: + return False + + return True + + class StreamKeyType(Enum): """Known stream types. @@ -776,7 +860,9 @@ class StreamToken: ) presence_key: int typing_key: int - receipt_key: int + receipt_key: MultiWriterStreamToken = attr.ib( + validator=attr.validators.instance_of(MultiWriterStreamToken) + ) account_data_key: int push_rules_key: int to_device_key: int @@ -799,8 +885,31 @@ class StreamToken: while len(keys) < len(attr.fields(cls)): # i.e. old token from before receipt_key keys.append("0") + + ( + room_key, + presence_key, + typing_key, + receipt_key, + account_data_key, + push_rules_key, + to_device_key, + device_list_key, + groups_key, + un_partial_stated_rooms_key, + ) = keys + return cls( - await RoomStreamToken.parse(store, keys[0]), *(int(k) for k in keys[1:]) + room_key=await RoomStreamToken.parse(store, room_key), + presence_key=int(presence_key), + typing_key=int(typing_key), + receipt_key=await MultiWriterStreamToken.parse(store, receipt_key), + account_data_key=int(account_data_key), + push_rules_key=int(push_rules_key), + to_device_key=int(to_device_key), + device_list_key=int(device_list_key), + groups_key=int(groups_key), + un_partial_stated_rooms_key=int(un_partial_stated_rooms_key), ) except CancelledError: raise @@ -813,7 +922,7 @@ class StreamToken: await self.room_key.to_string(store), str(self.presence_key), str(self.typing_key), - str(self.receipt_key), + await self.receipt_key.to_string(store), str(self.account_data_key), str(self.push_rules_key), str(self.to_device_key), @@ -841,6 +950,11 @@ class StreamToken: StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value) ) return new_token + elif key == StreamKeyType.RECEIPT: + new_token = self.copy_and_replace( + StreamKeyType.RECEIPT, self.receipt_key.copy_and_advance(new_value) + ) + return new_token new_token = self.copy_and_replace(key, new_value) new_id = new_token.get_field(key) @@ -858,6 +972,10 @@ class StreamToken: def get_field(self, key: Literal[StreamKeyType.ROOM]) -> RoomStreamToken: ... + @overload + def get_field(self, key: Literal[StreamKeyType.RECEIPT]) -> MultiWriterStreamToken: + ... + @overload def get_field( self, @@ -866,7 +984,6 @@ class StreamToken: StreamKeyType.DEVICE_LIST, StreamKeyType.PRESENCE, StreamKeyType.PUSH_RULES, - StreamKeyType.RECEIPT, StreamKeyType.TO_DEVICE, StreamKeyType.TYPING, StreamKeyType.UN_PARTIAL_STATED_ROOMS, @@ -875,15 +992,21 @@ class StreamToken: ... @overload - def get_field(self, key: StreamKeyType) -> Union[int, RoomStreamToken]: + def get_field( + self, key: StreamKeyType + ) -> Union[int, RoomStreamToken, MultiWriterStreamToken]: ... - def get_field(self, key: StreamKeyType) -> Union[int, RoomStreamToken]: + def get_field( + self, key: StreamKeyType + ) -> Union[int, RoomStreamToken, MultiWriterStreamToken]: """Returns the stream ID for the given key.""" return getattr(self, key.value) -StreamToken.START = StreamToken(RoomStreamToken(stream=0), 0, 0, 0, 0, 0, 0, 0, 0, 0) +StreamToken.START = StreamToken( + RoomStreamToken(stream=0), 0, 0, MultiWriterStreamToken(stream=0), 0, 0, 0, 0, 0, 0 +) @attr.s(slots=True, frozen=True, auto_attribs=True) diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index c888d1ff01..78646cb5dc 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -31,7 +31,12 @@ from synapse.appservice import ( from synapse.handlers.appservice import ApplicationServicesHandler from synapse.rest.client import login, receipts, register, room, sendtodevice from synapse.server import HomeServer -from synapse.types import JsonDict, RoomStreamToken, StreamKeyType +from synapse.types import ( + JsonDict, + MultiWriterStreamToken, + RoomStreamToken, + StreamKeyType, +) from synapse.util import Clock from synapse.util.stringutils import random_string @@ -305,7 +310,9 @@ class AppServiceHandlerTestCase(unittest.TestCase): ) self.handler.notify_interested_services_ephemeral( - StreamKeyType.RECEIPT, 580, ["@fakerecipient:example.com"] + StreamKeyType.RECEIPT, + MultiWriterStreamToken(stream=580), + ["@fakerecipient:example.com"], ) self.mock_scheduler.enqueue_for_appservice.assert_called_once_with( interested_service, ephemeral=[event] @@ -333,7 +340,9 @@ class AppServiceHandlerTestCase(unittest.TestCase): ) self.handler.notify_interested_services_ephemeral( - StreamKeyType.RECEIPT, 580, ["@fakerecipient:example.com"] + StreamKeyType.RECEIPT, + MultiWriterStreamToken(stream=580), + ["@fakerecipient:example.com"], ) # This method will be called, but with an empty list of events self.mock_scheduler.enqueue_for_appservice.assert_called_once_with( @@ -636,7 +645,7 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase): self.hs.get_application_service_handler()._notify_interested_services_ephemeral( services=[interested_appservice], stream_key=StreamKeyType.RECEIPT, - new_token=stream_token, + new_token=MultiWriterStreamToken(stream=stream_token), users=[self.exclusive_as_user], ) ) diff --git a/tests/replication/test_sharded_receipts.py b/tests/replication/test_sharded_receipts.py new file mode 100644 index 0000000000..41876b36de --- /dev/null +++ b/tests/replication/test_sharded_receipts.py @@ -0,0 +1,243 @@ +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.api.constants import ReceiptTypes +from synapse.rest import admin +from synapse.rest.client import login, receipts, room, sync +from synapse.server import HomeServer +from synapse.storage.util.id_generators import MultiWriterIdGenerator +from synapse.types import StreamToken +from synapse.util import Clock + +from tests.replication._base import BaseMultiWorkerStreamTestCase +from tests.server import make_request + +logger = logging.getLogger(__name__) + + +class ReceiptsShardTestCase(BaseMultiWorkerStreamTestCase): + """Checks receipts sharding works""" + + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + sync.register_servlets, + receipts.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + # Register a user who sends a message that we'll get notified about + self.other_user_id = self.register_user("otheruser", "pass") + self.other_access_token = self.login("otheruser", "pass") + + self.room_creator = self.hs.get_room_creation_handler() + self.store = hs.get_datastores().main + + def default_config(self) -> dict: + conf = super().default_config() + conf["stream_writers"] = {"receipts": ["worker1", "worker2"]} + conf["instance_map"] = { + "main": {"host": "testserv", "port": 8765}, + "worker1": {"host": "testserv", "port": 1001}, + "worker2": {"host": "testserv", "port": 1002}, + } + return conf + + def test_basic(self) -> None: + """Simple test to ensure that receipts can be sent on multiple + workers. + """ + + worker1 = self.make_worker_hs( + "synapse.app.generic_worker", + {"worker_name": "worker1"}, + ) + worker1_site = self._hs_to_site[worker1] + + worker2 = self.make_worker_hs( + "synapse.app.generic_worker", + {"worker_name": "worker2"}, + ) + worker2_site = self._hs_to_site[worker2] + + user_id = self.register_user("user", "pass") + access_token = self.login("user", "pass") + + # Create a room + room_id = self.helper.create_room_as(user_id, tok=access_token) + + # The other user joins + self.helper.join( + room=room_id, user=self.other_user_id, tok=self.other_access_token + ) + + # First user sends a message, the other users sends a receipt. + response = self.helper.send(room_id, body="Hi!", tok=self.other_access_token) + event_id = response["event_id"] + + channel = make_request( + reactor=self.reactor, + site=worker1_site, + method="POST", + path=f"/rooms/{room_id}/receipt/{ReceiptTypes.READ}/{event_id}", + access_token=access_token, + content={}, + ) + self.assertEqual(200, channel.code) + + # Now we do it again using the second worker + response = self.helper.send(room_id, body="Hi!", tok=self.other_access_token) + event_id = response["event_id"] + + channel = make_request( + reactor=self.reactor, + site=worker2_site, + method="POST", + path=f"/rooms/{room_id}/receipt/{ReceiptTypes.READ}/{event_id}", + access_token=access_token, + content={}, + ) + self.assertEqual(200, channel.code) + + def test_vector_clock_token(self) -> None: + """Tests that using a stream token with a vector clock component works + correctly with basic /sync usage. + """ + + worker_hs1 = self.make_worker_hs( + "synapse.app.generic_worker", + {"worker_name": "worker1"}, + ) + worker1_site = self._hs_to_site[worker_hs1] + + worker_hs2 = self.make_worker_hs( + "synapse.app.generic_worker", + {"worker_name": "worker2"}, + ) + worker2_site = self._hs_to_site[worker_hs2] + + sync_hs = self.make_worker_hs( + "synapse.app.generic_worker", + {"worker_name": "sync"}, + ) + sync_hs_site = self._hs_to_site[sync_hs] + + user_id = self.register_user("user", "pass") + access_token = self.login("user", "pass") + + store = self.hs.get_datastores().main + + room_id = self.helper.create_room_as(user_id, tok=access_token) + + # The other user joins + self.helper.join( + room=room_id, user=self.other_user_id, tok=self.other_access_token + ) + + response = self.helper.send(room_id, body="Hi!", tok=self.other_access_token) + first_event = response["event_id"] + + # Do an initial sync so that we're up to date. + channel = make_request( + self.reactor, sync_hs_site, "GET", "/sync", access_token=access_token + ) + next_batch = channel.json_body["next_batch"] + + # We now gut wrench into the events stream MultiWriterIdGenerator on + # worker2 to mimic it getting stuck persisting a receipt. This ensures + # that when we send an event on worker1 we end up in a state where + # worker2 events stream position lags that on worker1, resulting in a + # receipts token with a non-empty instance map component. + # + # Worker2's receipts stream position will not advance until we call + # __aexit__ again. + worker_store2 = worker_hs2.get_datastores().main + assert isinstance(worker_store2._receipts_id_gen, MultiWriterIdGenerator) + + actx = worker_store2._receipts_id_gen.get_next() + self.get_success(actx.__aenter__()) + + channel = make_request( + reactor=self.reactor, + site=worker1_site, + method="POST", + path=f"/rooms/{room_id}/receipt/{ReceiptTypes.READ}/{first_event}", + access_token=access_token, + content={}, + ) + self.assertEqual(200, channel.code) + + # Assert that the current stream token has an instance map component, as + # we are trying to test vector clock tokens. + receipts_token = store.get_max_receipt_stream_id() + self.assertGreater(len(receipts_token.instance_map), 0) + + # Check that syncing still gets the new receipt, despite the gap in the + # stream IDs. + channel = make_request( + self.reactor, + sync_hs_site, + "GET", + f"/sync?since={next_batch}", + access_token=access_token, + ) + + # We should only see the new event and nothing else + self.assertIn(room_id, channel.json_body["rooms"]["join"]) + + events = channel.json_body["rooms"]["join"][room_id]["ephemeral"]["events"] + self.assertEqual(len(events), 1) + self.assertIn(first_event, events[0]["content"]) + + # Get the next batch and makes sure its a vector clock style token. + vector_clock_token = channel.json_body["next_batch"] + parsed_token = self.get_success( + StreamToken.from_string(store, vector_clock_token) + ) + self.assertGreaterEqual(len(parsed_token.receipt_key.instance_map), 1) + + # Now that we've got a vector clock token we finish the fake persisting + # a receipt we started above. + self.get_success(actx.__aexit__(None, None, None)) + + # Now try and send another receipts to the other worker. + response = self.helper.send(room_id, body="Hi!", tok=self.other_access_token) + second_event = response["event_id"] + + channel = make_request( + reactor=self.reactor, + site=worker2_site, + method="POST", + path=f"/rooms/{room_id}/receipt/{ReceiptTypes.READ}/{second_event}", + access_token=access_token, + content={}, + ) + + channel = make_request( + self.reactor, + sync_hs_site, + "GET", + f"/sync?since={vector_clock_token}", + access_token=access_token, + ) + + self.assertIn(room_id, channel.json_body["rooms"]["join"]) + + events = channel.json_body["rooms"]["join"][room_id]["ephemeral"]["events"] + self.assertEqual(len(events), 1) + self.assertIn(second_event, events[0]["content"]) -- cgit 1.5.1 From 9407d5ba78d1e5275b5817ae9e6aedf7d1ca14f7 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 26 Oct 2023 13:01:36 -0400 Subject: Convert simple_select_list and simple_select_list_txn to return lists of tuples (#16505) This should use fewer allocations and improves type hints. --- changelog.d/16505.misc | 1 + synapse/handlers/deactivate_account.py | 4 +- synapse/handlers/sso.py | 5 +- synapse/storage/database.py | 31 +-- synapse/storage/databases/main/account_data.py | 18 +- synapse/storage/databases/main/appservice.py | 13 +- synapse/storage/databases/main/client_ips.py | 25 ++- synapse/storage/databases/main/devices.py | 70 +++--- synapse/storage/databases/main/e2e_room_keys.py | 49 ++-- synapse/storage/databases/main/event_federation.py | 18 +- .../databases/main/experimental_features.py | 15 +- synapse/storage/databases/main/keys.py | 35 +-- synapse/storage/databases/main/media_repository.py | 58 +++-- synapse/storage/databases/main/push_rule.py | 52 +++-- synapse/storage/databases/main/pusher.py | 20 +- synapse/storage/databases/main/registration.py | 60 +++-- synapse/storage/databases/main/relations.py | 15 +- synapse/storage/databases/main/room.py | 34 +-- synapse/storage/databases/main/roommember.py | 15 +- synapse/storage/databases/main/tags.py | 28 ++- synapse/storage/databases/main/ui_auth.py | 32 +-- synapse/storage/databases/state/store.py | 18 +- tests/handlers/test_stats.py | 14 +- tests/storage/databases/main/test_receipts.py | 20 +- tests/storage/test__base.py | 16 +- tests/storage/test_background_update.py | 35 +-- tests/storage/test_base.py | 4 +- tests/storage/test_client_ips.py | 250 ++++++++++----------- tests/storage/test_roommember.py | 40 ++-- tests/storage/test_state.py | 62 ++--- tests/storage/test_user_directory.py | 61 ++--- 31 files changed, 609 insertions(+), 509 deletions(-) create mode 100644 changelog.d/16505.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16505.misc b/changelog.d/16505.misc new file mode 100644 index 0000000000..bd7cdd42af --- /dev/null +++ b/changelog.d/16505.misc @@ -0,0 +1 @@ +Reduce memory allocations. diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index 6a8f8f2fd1..370f4041fb 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -103,10 +103,10 @@ class DeactivateAccountHandler: # Attempt to unbind any known bound threepids to this account from identity # server(s). bound_threepids = await self.store.user_get_bound_threepids(user_id) - for threepid in bound_threepids: + for medium, address in bound_threepids: try: result = await self._identity_handler.try_unbind_threepid( - user_id, threepid["medium"], threepid["address"], id_server + user_id, medium, address, id_server ) except Exception: # Do we want this to be a fatal error or should we carry on? diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index e9a544e754..62f2454f5d 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -1206,10 +1206,7 @@ class SsoHandler: # We have no guarantee that all the devices of that session are for the same # `user_id`. Hence, we have to iterate over the list of devices and log them out # one by one. - for device in devices: - user_id = device["user_id"] - device_id = device["device_id"] - + for user_id, device_id in devices: # If the user_id associated with that device/session is not the one we got # out of the `sub` claim, skip that device and show log an error. if expected_user_id is not None and user_id != expected_user_id: diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 81f661160c..774d5c12f0 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -606,13 +606,16 @@ class DatabasePool: If the background updates have not completed, wait 15 sec and check again. """ - updates = await self.simple_select_list( - "background_updates", - keyvalues=None, - retcols=["update_name"], - desc="check_background_updates", + updates = cast( + List[Tuple[str]], + await self.simple_select_list( + "background_updates", + keyvalues=None, + retcols=["update_name"], + desc="check_background_updates", + ), ) - background_update_names = [x["update_name"] for x in updates] + background_update_names = [x[0] for x in updates] for table, update_name in UNIQUE_INDEX_BACKGROUND_UPDATES.items(): if update_name not in background_update_names: @@ -1804,9 +1807,9 @@ class DatabasePool: keyvalues: Optional[Dict[str, Any]], retcols: Collection[str], desc: str = "simple_select_list", - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[Any, ...]]: """Executes a SELECT query on the named table, which may return zero or - more rows, returning the result as a list of dicts. + more rows, returning the result as a list of tuples. Args: table: the table name @@ -1817,8 +1820,7 @@ class DatabasePool: desc: description of the transaction, for logging and metrics Returns: - A list of dictionaries, one per result row, each a mapping between the - column names from `retcols` and that column's value for the row. + A list of tuples, one per result row, each the retcolumn's value for the row. """ return await self.runInteraction( desc, @@ -1836,9 +1838,9 @@ class DatabasePool: table: str, keyvalues: Optional[Dict[str, Any]], retcols: Iterable[str], - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[Any, ...]]: """Executes a SELECT query on the named table, which may return zero or - more rows, returning the result as a list of dicts. + more rows, returning the result as a list of tuples. Args: txn: Transaction object @@ -1849,8 +1851,7 @@ class DatabasePool: retcols: the names of the columns to return Returns: - A list of dictionaries, one per result row, each a mapping between the - column names from `retcols` and that column's value for the row. + A list of tuples, one per result row, each the retcolumn's value for the row. """ if keyvalues: sql = "SELECT %s FROM %s WHERE %s" % ( @@ -1863,7 +1864,7 @@ class DatabasePool: sql = "SELECT %s FROM %s" % (", ".join(retcols), table) txn.execute(sql) - return cls.cursor_to_dict(txn) + return txn.fetchall() async def simple_select_many_batch( self, diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 84ef8136c2..d7482a1f4e 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -286,16 +286,20 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) def get_account_data_for_room_txn( txn: LoggingTransaction, - ) -> Dict[str, JsonDict]: - rows = self.db_pool.simple_select_list_txn( - txn, - "room_account_data", - {"user_id": user_id, "room_id": room_id}, - ["account_data_type", "content"], + ) -> Dict[str, JsonMapping]: + rows = cast( + List[Tuple[str, str]], + self.db_pool.simple_select_list_txn( + txn, + table="room_account_data", + keyvalues={"user_id": user_id, "room_id": room_id}, + retcols=["account_data_type", "content"], + ), ) return { - row["account_data_type"]: db_to_json(row["content"]) for row in rows + account_data_type: db_to_json(content) + for account_data_type, content in rows } return await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 073a99cd84..fa7d1c469a 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -197,16 +197,21 @@ class ApplicationServiceTransactionWorkerStore( Returns: A list of ApplicationServices, which may be empty. """ - results = await self.db_pool.simple_select_list( - "application_services_state", {"state": state.value}, ["as_id"] + results = cast( + List[Tuple[str]], + await self.db_pool.simple_select_list( + table="application_services_state", + keyvalues={"state": state.value}, + retcols=("as_id",), + ), ) # NB: This assumes this class is linked with ApplicationServiceStore as_list = self.get_app_services() services = [] - for res in results: + for (as_id,) in results: for service in as_list: - if service.id == res["as_id"]: + if service.id == as_id: services.append(service) return services diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 8be1511859..c006129625 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -508,21 +508,24 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke if device_id is not None: keyvalues["device_id"] = device_id - res = await self.db_pool.simple_select_list( - table="devices", - keyvalues=keyvalues, - retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + res = cast( + List[Tuple[str, Optional[str], Optional[str], str, Optional[int]]], + await self.db_pool.simple_select_list( + table="devices", + keyvalues=keyvalues, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + ), ) return { - (d["user_id"], d["device_id"]): DeviceLastConnectionInfo( - user_id=d["user_id"], - device_id=d["device_id"], - ip=d["ip"], - user_agent=d["user_agent"], - last_seen=d["last_seen"], + (user_id, device_id): DeviceLastConnectionInfo( + user_id=user_id, + device_id=device_id, + ip=ip, + user_agent=user_agent, + last_seen=last_seen, ) - for d in res + for user_id, ip, user_agent, device_id, last_seen in res } async def _get_user_ip_and_agents_from_database( diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index fc23d18eba..0b75f6763a 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -283,7 +283,9 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): allow_none=True, ) - async def get_devices_by_user(self, user_id: str) -> Dict[str, Dict[str, str]]: + async def get_devices_by_user( + self, user_id: str + ) -> Dict[str, Dict[str, Optional[str]]]: """Retrieve all of a user's registered devices. Only returns devices that are not marked as hidden. @@ -291,20 +293,26 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): user_id: Returns: A mapping from device_id to a dict containing "device_id", "user_id" - and "display_name" for each device. + and "display_name" for each device. Display name may be null. """ - devices = await self.db_pool.simple_select_list( - table="devices", - keyvalues={"user_id": user_id, "hidden": False}, - retcols=("user_id", "device_id", "display_name"), - desc="get_devices_by_user", + devices = cast( + List[Tuple[str, str, Optional[str]]], + await self.db_pool.simple_select_list( + table="devices", + keyvalues={"user_id": user_id, "hidden": False}, + retcols=("user_id", "device_id", "display_name"), + desc="get_devices_by_user", + ), ) - return {d["device_id"]: d for d in devices} + return { + d[1]: {"user_id": d[0], "device_id": d[1], "display_name": d[2]} + for d in devices + } async def get_devices_by_auth_provider_session_id( self, auth_provider_id: str, auth_provider_session_id: str - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[str, str]]: """Retrieve the list of devices associated with a SSO IdP session ID. Args: @@ -313,14 +321,17 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): Returns: A list of dicts containing the device_id and the user_id of each device """ - return await self.db_pool.simple_select_list( - table="device_auth_providers", - keyvalues={ - "auth_provider_id": auth_provider_id, - "auth_provider_session_id": auth_provider_session_id, - }, - retcols=("user_id", "device_id"), - desc="get_devices_by_auth_provider_session_id", + return cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="device_auth_providers", + keyvalues={ + "auth_provider_id": auth_provider_id, + "auth_provider_session_id": auth_provider_session_id, + }, + retcols=("user_id", "device_id"), + desc="get_devices_by_auth_provider_session_id", + ), ) @trace @@ -821,15 +832,16 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): async def get_cached_devices_for_user( self, user_id: str ) -> Mapping[str, JsonMapping]: - devices = await self.db_pool.simple_select_list( - table="device_lists_remote_cache", - keyvalues={"user_id": user_id}, - retcols=("device_id", "content"), - desc="get_cached_devices_for_user", + devices = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="device_lists_remote_cache", + keyvalues={"user_id": user_id}, + retcols=("device_id", "content"), + desc="get_cached_devices_for_user", + ), ) - return { - device["device_id"]: db_to_json(device["content"]) for device in devices - } + return {device[0]: db_to_json(device[1]) for device in devices} def get_cached_device_list_changes( self, @@ -1080,7 +1092,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): The IDs of users whose device lists need resync. """ if user_ids: - row_tuples = cast( + rows = cast( List[Tuple[str]], await self.db_pool.simple_select_many_batch( table="device_lists_remote_resync", @@ -1090,11 +1102,9 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): desc="get_user_ids_requiring_device_list_resync_with_iterable", ), ) - - return {row[0] for row in row_tuples} else: rows = cast( - List[Dict[str, str]], + List[Tuple[str]], await self.db_pool.simple_select_list( table="device_lists_remote_resync", keyvalues=None, @@ -1103,7 +1113,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ), ) - return {row["user_id"] for row in rows} + return {row[0] for row in rows} async def mark_remote_users_device_caches_as_stale( self, user_ids: StrCollection diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py index aac4cfb054..ad904a26a6 100644 --- a/synapse/storage/databases/main/e2e_room_keys.py +++ b/synapse/storage/databases/main/e2e_room_keys.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Dict, Iterable, Mapping, Optional, Tuple, cast +from typing import TYPE_CHECKING, Dict, Iterable, List, Mapping, Optional, Tuple, cast from typing_extensions import Literal, TypedDict @@ -274,32 +274,41 @@ class EndToEndRoomKeyStore(EndToEndRoomKeyBackgroundStore): if session_id: keyvalues["session_id"] = session_id - rows = await self.db_pool.simple_select_list( - table="e2e_room_keys", - keyvalues=keyvalues, - retcols=( - "user_id", - "room_id", - "session_id", - "first_message_index", - "forwarded_count", - "is_verified", - "session_data", + rows = cast( + List[Tuple[str, str, int, int, int, str]], + await self.db_pool.simple_select_list( + table="e2e_room_keys", + keyvalues=keyvalues, + retcols=( + "room_id", + "session_id", + "first_message_index", + "forwarded_count", + "is_verified", + "session_data", + ), + desc="get_e2e_room_keys", ), - desc="get_e2e_room_keys", ) sessions: Dict[ Literal["rooms"], Dict[str, Dict[Literal["sessions"], Dict[str, RoomKey]]] ] = {"rooms": {}} - for row in rows: - room_entry = sessions["rooms"].setdefault(row["room_id"], {"sessions": {}}) - room_entry["sessions"][row["session_id"]] = { - "first_message_index": row["first_message_index"], - "forwarded_count": row["forwarded_count"], + for ( + room_id, + session_id, + first_message_index, + forwarded_count, + is_verified, + session_data, + ) in rows: + room_entry = sessions["rooms"].setdefault(room_id, {"sessions": {}}) + room_entry["sessions"][session_id] = { + "first_message_index": first_message_index, + "forwarded_count": forwarded_count, # is_verified must be returned to the client as a boolean - "is_verified": bool(row["is_verified"]), - "session_data": db_to_json(row["session_data"]), + "is_verified": bool(is_verified), + "session_data": db_to_json(session_data), } return sessions diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 4f80ce75cc..f1b0991503 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1898,21 +1898,23 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # keeping only the forward extremities (i.e. the events not referenced # by other events in the queue). We do this so that we can always # backpaginate in all the events we have dropped. - rows = await self.db_pool.simple_select_list( - table="federation_inbound_events_staging", - keyvalues={"room_id": room_id}, - retcols=("event_id", "event_json"), - desc="prune_staged_events_in_room_fetch", + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="federation_inbound_events_staging", + keyvalues={"room_id": room_id}, + retcols=("event_id", "event_json"), + desc="prune_staged_events_in_room_fetch", + ), ) # Find the set of events referenced by those in the queue, as well as # collecting all the event IDs in the queue. referenced_events: Set[str] = set() seen_events: Set[str] = set() - for row in rows: - event_id = row["event_id"] + for event_id, event_json in rows: seen_events.add(event_id) - event_d = db_to_json(row["event_json"]) + event_d = db_to_json(event_json) # We don't bother parsing the dicts into full blown event objects, # as that is needlessly expensive. diff --git a/synapse/storage/databases/main/experimental_features.py b/synapse/storage/databases/main/experimental_features.py index 654f924019..60621edeef 100644 --- a/synapse/storage/databases/main/experimental_features.py +++ b/synapse/storage/databases/main/experimental_features.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Dict, FrozenSet +from typing import TYPE_CHECKING, Dict, FrozenSet, List, Tuple, cast from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.storage.databases.main import CacheInvalidationWorkerStore @@ -42,13 +42,16 @@ class ExperimentalFeaturesStore(CacheInvalidationWorkerStore): Returns: the features currently enabled for the user """ - enabled = await self.db_pool.simple_select_list( - "per_user_experimental_features", - {"user_id": user_id, "enabled": True}, - ["feature"], + enabled = cast( + List[Tuple[str]], + await self.db_pool.simple_select_list( + table="per_user_experimental_features", + keyvalues={"user_id": user_id, "enabled": True}, + retcols=("feature",), + ), ) - return frozenset(feature["feature"] for feature in enabled) + return frozenset(feature[0] for feature in enabled) async def set_features_for_user( self, diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py index ea797864b9..ce88772f9e 100644 --- a/synapse/storage/databases/main/keys.py +++ b/synapse/storage/databases/main/keys.py @@ -248,17 +248,20 @@ class KeyStore(CacheInvalidationWorkerStore): If we have multiple entries for a given key ID, returns the most recent. """ - rows = await self.db_pool.simple_select_list( - table="server_keys_json", - keyvalues={"server_name": server_name}, - retcols=( - "key_id", - "from_server", - "ts_added_ms", - "ts_valid_until_ms", - "key_json", + rows = cast( + List[Tuple[str, str, int, int, Union[bytes, memoryview]]], + await self.db_pool.simple_select_list( + table="server_keys_json", + keyvalues={"server_name": server_name}, + retcols=( + "key_id", + "from_server", + "ts_added_ms", + "ts_valid_until_ms", + "key_json", + ), + desc="get_server_keys_json_for_remote", ), - desc="get_server_keys_json_for_remote", ) if not rows: @@ -266,14 +269,14 @@ class KeyStore(CacheInvalidationWorkerStore): # We sort the rows by ts_added_ms so that the most recently added entry # will stomp over older entries in the dictionary. - rows.sort(key=lambda r: r["ts_added_ms"]) + rows.sort(key=lambda r: r[2]) return { - row["key_id"]: FetchKeyResultForRemote( + key_id: FetchKeyResultForRemote( # Cast to bytes since postgresql returns a memoryview. - key_json=bytes(row["key_json"]), - valid_until_ts=row["ts_valid_until_ms"], - added_ts=row["ts_added_ms"], + key_json=bytes(key_json), + valid_until_ts=ts_valid_until_ms, + added_ts=ts_added_ms, ) - for row in rows + for key_id, from_server, ts_added_ms, ts_valid_until_ms, key_json in rows } diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index 2e6b176bd2..f82140b2e8 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -437,25 +437,24 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): ) async def get_local_media_thumbnails(self, media_id: str) -> List[ThumbnailInfo]: - rows = await self.db_pool.simple_select_list( - "local_media_repository_thumbnails", - {"media_id": media_id}, - ( - "thumbnail_width", - "thumbnail_height", - "thumbnail_method", - "thumbnail_type", - "thumbnail_length", + rows = cast( + List[Tuple[int, int, str, str, int]], + await self.db_pool.simple_select_list( + "local_media_repository_thumbnails", + {"media_id": media_id}, + ( + "thumbnail_width", + "thumbnail_height", + "thumbnail_method", + "thumbnail_type", + "thumbnail_length", + ), + desc="get_local_media_thumbnails", ), - desc="get_local_media_thumbnails", ) return [ ThumbnailInfo( - width=row["thumbnail_width"], - height=row["thumbnail_height"], - method=row["thumbnail_method"], - type=row["thumbnail_type"], - length=row["thumbnail_length"], + width=row[0], height=row[1], method=row[2], type=row[3], length=row[4] ) for row in rows ] @@ -568,25 +567,24 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): async def get_remote_media_thumbnails( self, origin: str, media_id: str ) -> List[ThumbnailInfo]: - rows = await self.db_pool.simple_select_list( - "remote_media_cache_thumbnails", - {"media_origin": origin, "media_id": media_id}, - ( - "thumbnail_width", - "thumbnail_height", - "thumbnail_method", - "thumbnail_type", - "thumbnail_length", + rows = cast( + List[Tuple[int, int, str, str, int]], + await self.db_pool.simple_select_list( + "remote_media_cache_thumbnails", + {"media_origin": origin, "media_id": media_id}, + ( + "thumbnail_width", + "thumbnail_height", + "thumbnail_method", + "thumbnail_type", + "thumbnail_length", + ), + desc="get_remote_media_thumbnails", ), - desc="get_remote_media_thumbnails", ) return [ ThumbnailInfo( - width=row["thumbnail_width"], - height=row["thumbnail_height"], - method=row["thumbnail_method"], - type=row["thumbnail_type"], - length=row["thumbnail_length"], + width=row[0], height=row[1], method=row[2], type=row[3], length=row[4] ) for row in rows ] diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index f5356e7f80..22025eca56 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -179,46 +179,44 @@ class PushRulesWorkerStore( @cached(max_entries=5000) async def get_push_rules_for_user(self, user_id: str) -> FilteredPushRules: - rows = await self.db_pool.simple_select_list( - table="push_rules", - keyvalues={"user_name": user_id}, - retcols=( - "user_name", - "rule_id", - "priority_class", - "priority", - "conditions", - "actions", + rows = cast( + List[Tuple[str, int, int, str, str]], + await self.db_pool.simple_select_list( + table="push_rules", + keyvalues={"user_name": user_id}, + retcols=( + "rule_id", + "priority_class", + "priority", + "conditions", + "actions", + ), + desc="get_push_rules_for_user", ), - desc="get_push_rules_for_user", ) - rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"]))) + # Sort by highest priority_class, then highest priority. + rows.sort(key=lambda row: (-int(row[1]), -int(row[2]))) enabled_map = await self.get_push_rules_enabled_for_user(user_id) return _load_rules( - [ - ( - row["rule_id"], - row["priority_class"], - row["conditions"], - row["actions"], - ) - for row in rows - ], + [(row[0], row[1], row[3], row[4]) for row in rows], enabled_map, self.hs.config.experimental, ) async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool]: - results = await self.db_pool.simple_select_list( - table="push_rules_enable", - keyvalues={"user_name": user_id}, - retcols=("rule_id", "enabled"), - desc="get_push_rules_enabled_for_user", + results = cast( + List[Tuple[str, Optional[Union[int, bool]]]], + await self.db_pool.simple_select_list( + table="push_rules_enable", + keyvalues={"user_name": user_id}, + retcols=("rule_id", "enabled"), + desc="get_push_rules_enabled_for_user", + ), ) - return {r["rule_id"]: bool(r["enabled"]) for r in results} + return {r[0]: bool(r[1]) for r in results} async def have_push_rules_changed_for_user( self, user_id: str, last_id: int diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index c7eb7fc478..a6a1671bd6 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -371,18 +371,20 @@ class PusherWorkerStore(SQLBaseStore): async def get_throttle_params_by_room( self, pusher_id: int ) -> Dict[str, ThrottleParams]: - res = await self.db_pool.simple_select_list( - "pusher_throttle", - {"pusher": pusher_id}, - ["room_id", "last_sent_ts", "throttle_ms"], - desc="get_throttle_params_by_room", + res = cast( + List[Tuple[str, Optional[int], Optional[int]]], + await self.db_pool.simple_select_list( + "pusher_throttle", + {"pusher": pusher_id}, + ["room_id", "last_sent_ts", "throttle_ms"], + desc="get_throttle_params_by_room", + ), ) params_by_room = {} - for row in res: - params_by_room[row["room_id"]] = ThrottleParams( - row["last_sent_ts"], - row["throttle_ms"], + for room_id, last_sent_ts, throttle_ms in res: + params_by_room[room_id] = ThrottleParams( + last_sent_ts or 0, throttle_ms or 0 ) return params_by_room diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 9e8643ae4d..b0ef7be155 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -855,13 +855,15 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): Returns: Tuples of (auth_provider, external_id) """ - res = await self.db_pool.simple_select_list( - table="user_external_ids", - keyvalues={"user_id": mxid}, - retcols=("auth_provider", "external_id"), - desc="get_external_ids_by_user", + return cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="user_external_ids", + keyvalues={"user_id": mxid}, + retcols=("auth_provider", "external_id"), + desc="get_external_ids_by_user", + ), ) - return [(r["auth_provider"], r["external_id"]) for r in res] async def count_all_users(self) -> int: """Counts all users registered on the homeserver.""" @@ -997,13 +999,24 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): ) async def user_get_threepids(self, user_id: str) -> List[ThreepidResult]: - results = await self.db_pool.simple_select_list( - "user_threepids", - keyvalues={"user_id": user_id}, - retcols=["medium", "address", "validated_at", "added_at"], - desc="user_get_threepids", + results = cast( + List[Tuple[str, str, int, int]], + await self.db_pool.simple_select_list( + "user_threepids", + keyvalues={"user_id": user_id}, + retcols=["medium", "address", "validated_at", "added_at"], + desc="user_get_threepids", + ), ) - return [ThreepidResult(**r) for r in results] + return [ + ThreepidResult( + medium=r[0], + address=r[1], + validated_at=r[2], + added_at=r[3], + ) + for r in results + ] async def user_delete_threepid( self, user_id: str, medium: str, address: str @@ -1042,7 +1055,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): desc="add_user_bound_threepid", ) - async def user_get_bound_threepids(self, user_id: str) -> List[Dict[str, Any]]: + async def user_get_bound_threepids(self, user_id: str) -> List[Tuple[str, str]]: """Get the threepids that a user has bound to an identity server through the homeserver The homeserver remembers where binds to an identity server occurred. Using this method can retrieve those threepids. @@ -1051,15 +1064,18 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): user_id: The ID of the user to retrieve threepids for Returns: - List of dictionaries containing the following keys: - medium (str): The medium of the threepid (e.g "email") - address (str): The address of the threepid (e.g "bob@example.com") - """ - return await self.db_pool.simple_select_list( - table="user_threepid_id_server", - keyvalues={"user_id": user_id}, - retcols=["medium", "address"], - desc="user_get_bound_threepids", + List of tuples of two strings: + medium: The medium of the threepid (e.g "email") + address: The address of the threepid (e.g "bob@example.com") + """ + return cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="user_threepid_id_server", + keyvalues={"user_id": user_id}, + retcols=["medium", "address"], + desc="user_get_bound_threepids", + ), ) async def remove_user_bound_threepid( diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index ce7bfd5146..419b2c7a22 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -384,14 +384,17 @@ class RelationsWorkerStore(SQLBaseStore): def get_all_relation_ids_for_event_txn( txn: LoggingTransaction, ) -> List[str]: - rows = self.db_pool.simple_select_list_txn( - txn=txn, - table="event_relations", - keyvalues={"relates_to_id": event_id}, - retcols=["event_id"], + rows = cast( + List[Tuple[str]], + self.db_pool.simple_select_list_txn( + txn=txn, + table="event_relations", + keyvalues={"relates_to_id": event_id}, + retcols=["event_id"], + ), ) - return [row["event_id"] for row in rows] + return [row[0] for row in rows] return await self.db_pool.runInteraction( desc="get_all_relation_ids_for_event", diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 9d24d2c347..3e8fcf1975 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -1232,28 +1232,30 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): """ room_servers: Dict[str, PartialStateResyncInfo] = {} - rows = await self.db_pool.simple_select_list( - table="partial_state_rooms", - keyvalues={}, - retcols=("room_id", "joined_via"), - desc="get_server_which_served_partial_join", + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="partial_state_rooms", + keyvalues={}, + retcols=("room_id", "joined_via"), + desc="get_server_which_served_partial_join", + ), ) - for row in rows: - room_id = row["room_id"] - joined_via = row["joined_via"] + for room_id, joined_via in rows: room_servers[room_id] = PartialStateResyncInfo(joined_via=joined_via) - rows = await self.db_pool.simple_select_list( - "partial_state_rooms_servers", - keyvalues=None, - retcols=("room_id", "server_name"), - desc="get_partial_state_rooms", + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + "partial_state_rooms_servers", + keyvalues=None, + retcols=("room_id", "server_name"), + desc="get_partial_state_rooms", + ), ) - for row in rows: - room_id = row["room_id"] - server_name = row["server_name"] + for room_id, server_name in rows: entry = room_servers.get(room_id) if entry is None: # There is a foreign key constraint which enforces that every room_id in diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 3a87eba430..a1627dffb7 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1070,13 +1070,16 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): for fully-joined rooms. """ - rows = await self.db_pool.simple_select_list( - "current_state_events", - keyvalues={"room_id": room_id}, - retcols=("event_id", "membership"), - desc="has_completed_background_updates", + rows = cast( + List[Tuple[str, Optional[str]]], + await self.db_pool.simple_select_list( + "current_state_events", + keyvalues={"room_id": room_id}, + retcols=("event_id", "membership"), + desc="has_completed_background_updates", + ), ) - return {row["event_id"]: row["membership"] for row in rows} + return dict(rows) # TODO This returns a mutable object, which is generally confusing when using a cache. @cached(max_entries=10000) # type: ignore[synapse-@cached-mutable] diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py index 61403a98cf..7deda7790e 100644 --- a/synapse/storage/databases/main/tags.py +++ b/synapse/storage/databases/main/tags.py @@ -45,14 +45,17 @@ class TagsWorkerStore(AccountDataWorkerStore): tag content. """ - rows = await self.db_pool.simple_select_list( - "room_tags", {"user_id": user_id}, ["room_id", "tag", "content"] + rows = cast( + List[Tuple[str, str, str]], + await self.db_pool.simple_select_list( + "room_tags", {"user_id": user_id}, ["room_id", "tag", "content"] + ), ) tags_by_room: Dict[str, Dict[str, JsonDict]] = {} - for row in rows: - room_tags = tags_by_room.setdefault(row["room_id"], {}) - room_tags[row["tag"]] = db_to_json(row["content"]) + for room_id, tag, content in rows: + room_tags = tags_by_room.setdefault(room_id, {}) + room_tags[tag] = db_to_json(content) return tags_by_room async def get_all_updated_tags( @@ -161,13 +164,16 @@ class TagsWorkerStore(AccountDataWorkerStore): Returns: A mapping of tags to tag content. """ - rows = await self.db_pool.simple_select_list( - table="room_tags", - keyvalues={"user_id": user_id, "room_id": room_id}, - retcols=("tag", "content"), - desc="get_tags_for_room", + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="room_tags", + keyvalues={"user_id": user_id, "room_id": room_id}, + retcols=("tag", "content"), + desc="get_tags_for_room", + ), ) - return {row["tag"]: db_to_json(row["content"]) for row in rows} + return {tag: db_to_json(content) for tag, content in rows} async def add_tag_to_room( self, user_id: str, room_id: str, tag: str, content: JsonDict diff --git a/synapse/storage/databases/main/ui_auth.py b/synapse/storage/databases/main/ui_auth.py index 919c66f553..8ab7c42c4a 100644 --- a/synapse/storage/databases/main/ui_auth.py +++ b/synapse/storage/databases/main/ui_auth.py @@ -169,13 +169,17 @@ class UIAuthWorkerStore(SQLBaseStore): that auth-type. """ results = {} - for row in await self.db_pool.simple_select_list( - table="ui_auth_sessions_credentials", - keyvalues={"session_id": session_id}, - retcols=("stage_type", "result"), - desc="get_completed_ui_auth_stages", - ): - results[row["stage_type"]] = db_to_json(row["result"]) + rows = cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="ui_auth_sessions_credentials", + keyvalues={"session_id": session_id}, + retcols=("stage_type", "result"), + desc="get_completed_ui_auth_stages", + ), + ) + for stage_type, result in rows: + results[stage_type] = db_to_json(result) return results @@ -295,13 +299,15 @@ class UIAuthWorkerStore(SQLBaseStore): Returns: List of user_agent/ip pairs """ - rows = await self.db_pool.simple_select_list( - table="ui_auth_sessions_ips", - keyvalues={"session_id": session_id}, - retcols=("user_agent", "ip"), - desc="get_user_agents_ips_to_ui_auth_session", + return cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="ui_auth_sessions_ips", + keyvalues={"session_id": session_id}, + retcols=("user_agent", "ip"), + desc="get_user_agents_ips_to_ui_auth_session", + ), ) - return [(row["user_agent"], row["ip"]) for row in rows] async def delete_old_ui_auth_sessions(self, expiration_time: int) -> None: """ diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 09d2a8c5b3..182e429174 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -154,16 +154,22 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): if not prev_group: return _GetStateGroupDelta(None, None) - delta_ids = self.db_pool.simple_select_list_txn( - txn, - table="state_groups_state", - keyvalues={"state_group": state_group}, - retcols=("type", "state_key", "event_id"), + delta_ids = cast( + List[Tuple[str, str, str]], + self.db_pool.simple_select_list_txn( + txn, + table="state_groups_state", + keyvalues={"state_group": state_group}, + retcols=("type", "state_key", "event_id"), + ), ) return _GetStateGroupDelta( prev_group, - {(row["type"], row["state_key"]): row["event_id"] for row in delta_ids}, + { + (event_type, state_key): event_id + for event_type, state_key, event_id in delta_ids + }, ) return await self.db_pool.runInteraction( diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py index d11ded6c5b..76c56d5434 100644 --- a/tests/handlers/test_stats.py +++ b/tests/handlers/test_stats.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple, cast from twisted.test.proto_helpers import MemoryReactor @@ -68,10 +68,14 @@ class StatsRoomTests(unittest.HomeserverTestCase): ) ) - async def get_all_room_state(self) -> List[Dict[str, Any]]: - return await self.store.db_pool.simple_select_list( - "room_stats_state", None, retcols=("name", "topic", "canonical_alias") + async def get_all_room_state(self) -> List[Optional[str]]: + rows = cast( + List[Tuple[Optional[str]]], + await self.store.db_pool.simple_select_list( + "room_stats_state", None, retcols=("topic",) + ), ) + return [r[0] for r in rows] def _get_current_stats( self, stats_type: str, stat_id: str @@ -130,7 +134,7 @@ class StatsRoomTests(unittest.HomeserverTestCase): r = self.get_success(self.get_all_room_state()) self.assertEqual(len(r), 1) - self.assertEqual(r[0]["topic"], "foo") + self.assertEqual(r[0], "foo") def test_create_user(self) -> None: """ diff --git a/tests/storage/databases/main/test_receipts.py b/tests/storage/databases/main/test_receipts.py index 71db47405e..98b01086bc 100644 --- a/tests/storage/databases/main/test_receipts.py +++ b/tests/storage/databases/main/test_receipts.py @@ -117,7 +117,7 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase): if expected_row is not None: columns += expected_row.keys() - rows = self.get_success( + row_tuples = self.get_success( self.store.db_pool.simple_select_list( table=table, keyvalues={ @@ -134,22 +134,22 @@ class ReceiptsBackgroundUpdateStoreTestCase(HomeserverTestCase): if expected_row is not None: self.assertEqual( - len(rows), + len(row_tuples), 1, f"Background update did not leave behind latest receipt in {table}", ) self.assertEqual( - rows[0], - { - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - **expected_row, - }, + row_tuples[0], + ( + room_id, + receipt_type, + user_id, + *expected_row.values(), + ), ) else: self.assertEqual( - len(rows), + len(row_tuples), 0, f"Background update did not remove all duplicate receipts from {table}", ) diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py index 8bbf936ae9..8cbc974ac4 100644 --- a/tests/storage/test__base.py +++ b/tests/storage/test__base.py @@ -14,7 +14,7 @@ # limitations under the License. import secrets -from typing import Generator, Tuple +from typing import Generator, List, Tuple, cast from twisted.test.proto_helpers import MemoryReactor @@ -47,15 +47,15 @@ class UpdateUpsertManyTests(unittest.HomeserverTestCase): ) def _dump_table_to_tuple(self) -> Generator[Tuple[int, str, str], None, None]: - res = self.get_success( - self.storage.db_pool.simple_select_list( - self.table_name, None, ["id, username, value"] - ) + yield from cast( + List[Tuple[int, str, str]], + self.get_success( + self.storage.db_pool.simple_select_list( + self.table_name, None, ["id, username, value"] + ) + ), ) - for i in res: - yield (i["id"], i["username"], i["value"]) - def test_upsert_many(self) -> None: """ Upsert_many will perform the upsert operation across a batch of data. diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index abf7d0564d..3f5bfa09d4 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from typing import List, Tuple, cast from unittest.mock import AsyncMock, Mock import yaml @@ -526,15 +527,18 @@ class BackgroundUpdateValidateConstraintTestCase(unittest.HomeserverTestCase): self.wait_for_background_updates() # Check the correct values are in the new table. - rows = self.get_success( - self.store.db_pool.simple_select_list( - table="test_constraint", - keyvalues={}, - retcols=("a", "b"), - ) + rows = cast( + List[Tuple[int, int]], + self.get_success( + self.store.db_pool.simple_select_list( + table="test_constraint", + keyvalues={}, + retcols=("a", "b"), + ) + ), ) - self.assertCountEqual(rows, [{"a": 1, "b": 1}, {"a": 3, "b": 3}]) + self.assertCountEqual(rows, [(1, 1), (3, 3)]) # And check that invalid rows get correctly rejected. self.get_failure( @@ -640,14 +644,17 @@ class BackgroundUpdateValidateConstraintTestCase(unittest.HomeserverTestCase): self.wait_for_background_updates() # Check the correct values are in the new table. - rows = self.get_success( - self.store.db_pool.simple_select_list( - table="test_constraint", - keyvalues={}, - retcols=("a", "b"), - ) + rows = cast( + List[Tuple[int, int]], + self.get_success( + self.store.db_pool.simple_select_list( + table="test_constraint", + keyvalues={}, + retcols=("a", "b"), + ) + ), ) - self.assertCountEqual(rows, [{"a": 1, "b": 1}, {"a": 3, "b": 3}]) + self.assertCountEqual(rows, [(1, 1), (3, 3)]) # And check that invalid rows get correctly rejected. self.get_failure( diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index 256d28e4c9..e4a52c301e 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -146,7 +146,7 @@ class SQLBaseStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def test_select_list(self) -> Generator["defer.Deferred[object]", object, None]: self.mock_txn.rowcount = 3 - self.mock_txn.__iter__ = Mock(return_value=iter([(1,), (2,), (3,)])) + self.mock_txn.fetchall.return_value = [(1,), (2,), (3,)] self.mock_txn.description = (("colA", None, None, None, None, None, None),) ret = yield defer.ensureDeferred( @@ -155,7 +155,7 @@ class SQLBaseStoreTestCase(unittest.TestCase): ) ) - self.assertEqual([{"colA": 1}, {"colA": 2}, {"colA": 3}], ret) + self.assertEqual([(1,), (2,), (3,)], ret) self.mock_txn.execute.assert_called_with( "SELECT colA FROM tablename WHERE keycol = ?", ["A set"] ) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 0c054a598f..8e4393d843 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict +from typing import Any, Dict, List, Optional, Tuple, cast from unittest.mock import AsyncMock from parameterized import parameterized @@ -97,26 +97,26 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): self.reactor.advance(200) self.pump(0) - result = self.get_success( - self.store.db_pool.simple_select_list( - table="user_ips", - keyvalues={"user_id": user_id}, - retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], - desc="get_user_ip_and_agents", - ) + result = cast( + List[Tuple[str, str, str, Optional[str], int]], + self.get_success( + self.store.db_pool.simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=[ + "access_token", + "ip", + "user_agent", + "device_id", + "last_seen", + ], + desc="get_user_ip_and_agents", + ) + ), ) self.assertEqual( - result, - [ - { - "access_token": "access_token", - "ip": "ip", - "user_agent": "user_agent", - "device_id": None, - "last_seen": 12345678000, - } - ], + result, [("access_token", "ip", "user_agent", None, 12345678000)] ) # Add another & trigger the storage loop @@ -128,26 +128,26 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): self.reactor.advance(10) self.pump(0) - result = self.get_success( - self.store.db_pool.simple_select_list( - table="user_ips", - keyvalues={"user_id": user_id}, - retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], - desc="get_user_ip_and_agents", - ) + result = cast( + List[Tuple[str, str, str, Optional[str], int]], + self.get_success( + self.store.db_pool.simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=[ + "access_token", + "ip", + "user_agent", + "device_id", + "last_seen", + ], + desc="get_user_ip_and_agents", + ) + ), ) # Only one result, has been upserted. self.assertEqual( - result, - [ - { - "access_token": "access_token", - "ip": "ip", - "user_agent": "user_agent", - "device_id": None, - "last_seen": 12345878000, - } - ], + result, [("access_token", "ip", "user_agent", None, 12345878000)] ) @parameterized.expand([(False,), (True,)]) @@ -177,25 +177,23 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): self.reactor.advance(10) else: # Check that the new IP and user agent has not been stored yet - db_result = self.get_success( - self.store.db_pool.simple_select_list( - table="devices", - keyvalues={}, - retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + db_result = cast( + List[Tuple[str, Optional[str], Optional[str], str, Optional[int]]], + self.get_success( + self.store.db_pool.simple_select_list( + table="devices", + keyvalues={}, + retcols=( + "user_id", + "ip", + "user_agent", + "device_id", + "last_seen", + ), + ), ), ) - self.assertEqual( - db_result, - [ - { - "user_id": user_id, - "device_id": device_id, - "ip": None, - "user_agent": None, - "last_seen": None, - }, - ], - ) + self.assertEqual(db_result, [(user_id, None, None, device_id, None)]) result = self.get_success( self.store.get_last_client_ip_by_device(user_id, device_id) @@ -261,30 +259,21 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) # Check that the new IP and user agent has not been stored yet - db_result = self.get_success( - self.store.db_pool.simple_select_list( - table="devices", - keyvalues={}, - retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + db_result = cast( + List[Tuple[str, Optional[str], Optional[str], str, Optional[int]]], + self.get_success( + self.store.db_pool.simple_select_list( + table="devices", + keyvalues={}, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + ), ), ) self.assertCountEqual( db_result, [ - { - "user_id": user_id, - "device_id": device_id_1, - "ip": "ip_1", - "user_agent": "user_agent_1", - "last_seen": 12345678000, - }, - { - "user_id": user_id, - "device_id": device_id_2, - "ip": "ip_2", - "user_agent": "user_agent_2", - "last_seen": 12345678000, - }, + (user_id, "ip_1", "user_agent_1", device_id_1, 12345678000), + (user_id, "ip_2", "user_agent_2", device_id_2, 12345678000), ], ) @@ -385,28 +374,21 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) # Check that the new IP and user agent has not been stored yet - db_result = self.get_success( - self.store.db_pool.simple_select_list( - table="user_ips", - keyvalues={}, - retcols=("access_token", "ip", "user_agent", "last_seen"), + db_result = cast( + List[Tuple[str, str, str, int]], + self.get_success( + self.store.db_pool.simple_select_list( + table="user_ips", + keyvalues={}, + retcols=("access_token", "ip", "user_agent", "last_seen"), + ), ), ) self.assertEqual( db_result, [ - { - "access_token": "access_token", - "ip": "ip_1", - "user_agent": "user_agent_1", - "last_seen": 12345678000, - }, - { - "access_token": "access_token", - "ip": "ip_2", - "user_agent": "user_agent_2", - "last_seen": 12345678000, - }, + ("access_token", "ip_1", "user_agent_1", 12345678000), + ("access_token", "ip_2", "user_agent_2", 12345678000), ], ) @@ -600,39 +582,49 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): self.reactor.advance(200) # We should see that in the DB - result = self.get_success( - self.store.db_pool.simple_select_list( - table="user_ips", - keyvalues={"user_id": user_id}, - retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], - desc="get_user_ip_and_agents", - ) + result = cast( + List[Tuple[str, str, str, Optional[str], int]], + self.get_success( + self.store.db_pool.simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=[ + "access_token", + "ip", + "user_agent", + "device_id", + "last_seen", + ], + desc="get_user_ip_and_agents", + ) + ), ) self.assertEqual( result, - [ - { - "access_token": "access_token", - "ip": "ip", - "user_agent": "user_agent", - "device_id": device_id, - "last_seen": 0, - } - ], + [("access_token", "ip", "user_agent", device_id, 0)], ) # Now advance by a couple of months self.reactor.advance(60 * 24 * 60 * 60) # We should get no results. - result = self.get_success( - self.store.db_pool.simple_select_list( - table="user_ips", - keyvalues={"user_id": user_id}, - retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], - desc="get_user_ip_and_agents", - ) + result = cast( + List[Tuple[str, str, str, Optional[str], int]], + self.get_success( + self.store.db_pool.simple_select_list( + table="user_ips", + keyvalues={"user_id": user_id}, + retcols=[ + "access_token", + "ip", + "user_agent", + "device_id", + "last_seen", + ], + desc="get_user_ip_and_agents", + ) + ), ) self.assertEqual(result, []) @@ -696,28 +688,26 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): self.reactor.advance(200) # We should see that in the DB - result = self.get_success( - self.store.db_pool.simple_select_list( - table="user_ips", - keyvalues={}, - retcols=["access_token", "ip", "user_agent", "device_id", "last_seen"], - desc="get_user_ip_and_agents", - ) + result = cast( + List[Tuple[str, str, str, Optional[str], int]], + self.get_success( + self.store.db_pool.simple_select_list( + table="user_ips", + keyvalues={}, + retcols=[ + "access_token", + "ip", + "user_agent", + "device_id", + "last_seen", + ], + desc="get_user_ip_and_agents", + ) + ), ) # ensure user1 is filtered out - self.assertEqual( - result, - [ - { - "access_token": access_token2, - "ip": "ip", - "user_agent": "user_agent", - "device_id": device_id2, - "last_seen": 0, - } - ], - ) + self.assertEqual(result, [(access_token2, "ip", "user_agent", device_id2, 0)]) class ClientIpAuthTestCase(unittest.HomeserverTestCase): diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index f4c4661aaf..36fcab06b5 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -12,6 +12,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import List, Optional, Tuple, cast + from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import Membership @@ -110,21 +112,24 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): def test__null_byte_in_display_name_properly_handled(self) -> None: room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) - res = self.get_success( - self.store.db_pool.simple_select_list( - "room_memberships", - {"user_id": "@alice:test"}, - ["display_name", "event_id"], - ) + res = cast( + List[Tuple[Optional[str], str]], + self.get_success( + self.store.db_pool.simple_select_list( + "room_memberships", + {"user_id": "@alice:test"}, + ["display_name", "event_id"], + ) + ), ) # Check that we only got one result back self.assertEqual(len(res), 1) # Check that alice's display name is "alice" - self.assertEqual(res[0]["display_name"], "alice") + self.assertEqual(res[0][0], "alice") # Grab the event_id to use later - event_id = res[0]["event_id"] + event_id = res[0][1] # Create a profile with the offending null byte in the display name new_profile = {"displayname": "ali\u0000ce"} @@ -139,21 +144,24 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): tok=self.t_alice, ) - res2 = self.get_success( - self.store.db_pool.simple_select_list( - "room_memberships", - {"user_id": "@alice:test"}, - ["display_name", "event_id"], - ) + res2 = cast( + List[Tuple[Optional[str], str]], + self.get_success( + self.store.db_pool.simple_select_list( + "room_memberships", + {"user_id": "@alice:test"}, + ["display_name", "event_id"], + ) + ), ) # Check that we only have two results self.assertEqual(len(res2), 2) # Filter out the previous event using the event_id we grabbed above - row = [row for row in res2 if row["event_id"] != event_id] + row = [row for row in res2 if row[1] != event_id] # Check that alice's display name is now None - self.assertEqual(row[0]["display_name"], None) + self.assertIsNone(row[0][0]) def test_room_is_locally_forgotten(self) -> None: """Test that when the last local user has forgotten a room it is known as forgotten.""" diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 0b9446c36c..2715c73f16 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from typing import List, Tuple, cast from immutabledict import immutabledict @@ -584,18 +585,21 @@ class StateStoreTestCase(HomeserverTestCase): ) # check that only state events are in state_groups, and all state events are in state_groups - res = self.get_success( - self.store.db_pool.simple_select_list( - table="state_groups", - keyvalues=None, - retcols=("event_id",), - ) + res = cast( + List[Tuple[str]], + self.get_success( + self.store.db_pool.simple_select_list( + table="state_groups", + keyvalues=None, + retcols=("event_id",), + ) + ), ) events = [] for result in res: - self.assertNotIn(event3.event_id, result) - events.append(result.get("event_id")) + self.assertNotIn(event3.event_id, result) # XXX + events.append(result[0]) for event, _ in processed_events_and_context: if event.is_state(): @@ -606,23 +610,29 @@ class StateStoreTestCase(HomeserverTestCase): # has an entry and prev event in state_group_edges for event, context in processed_events_and_context: if event.is_state(): - state = self.get_success( - self.store.db_pool.simple_select_list( - table="state_groups_state", - keyvalues={"state_group": context.state_group_after_event}, - retcols=("type", "state_key"), - ) - ) - self.assertEqual(event.type, state[0].get("type")) - self.assertEqual(event.state_key, state[0].get("state_key")) - - groups = self.get_success( - self.store.db_pool.simple_select_list( - table="state_group_edges", - keyvalues={"state_group": str(context.state_group_after_event)}, - retcols=("*",), - ) + state = cast( + List[Tuple[str, str]], + self.get_success( + self.store.db_pool.simple_select_list( + table="state_groups_state", + keyvalues={"state_group": context.state_group_after_event}, + retcols=("type", "state_key"), + ) + ), ) - self.assertEqual( - context.state_group_before_event, groups[0].get("prev_state_group") + self.assertEqual(event.type, state[0][0]) + self.assertEqual(event.state_key, state[0][1]) + + groups = cast( + List[Tuple[str]], + self.get_success( + self.store.db_pool.simple_select_list( + table="state_group_edges", + keyvalues={ + "state_group": str(context.state_group_after_event) + }, + retcols=("prev_state_group",), + ) + ), ) + self.assertEqual(context.state_group_before_event, groups[0][0]) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index 8c72aa1722..822c41dd9f 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import re -from typing import Any, Dict, Set, Tuple +from typing import Any, Dict, List, Optional, Set, Tuple, cast from unittest import mock from unittest.mock import Mock, patch @@ -62,14 +62,13 @@ class GetUserDirectoryTables: Returns a list of tuples (user_id, room_id) where room_id is public and contains the user with the given id. """ - r = await self.store.db_pool.simple_select_list( - "users_in_public_rooms", None, ("user_id", "room_id") + r = cast( + List[Tuple[str, str]], + await self.store.db_pool.simple_select_list( + "users_in_public_rooms", None, ("user_id", "room_id") + ), ) - - retval = set() - for i in r: - retval.add((i["user_id"], i["room_id"])) - return retval + return set(r) async def get_users_who_share_private_rooms(self) -> Set[Tuple[str, str, str]]: """Fetch the entire `users_who_share_private_rooms` table. @@ -78,27 +77,30 @@ class GetUserDirectoryTables: to the rows of `users_who_share_private_rooms`. """ - rows = await self.store.db_pool.simple_select_list( - "users_who_share_private_rooms", - None, - ["user_id", "other_user_id", "room_id"], + rows = cast( + List[Tuple[str, str, str]], + await self.store.db_pool.simple_select_list( + "users_who_share_private_rooms", + None, + ["user_id", "other_user_id", "room_id"], + ), ) - rv = set() - for row in rows: - rv.add((row["user_id"], row["other_user_id"], row["room_id"])) - return rv + return set(rows) async def get_users_in_user_directory(self) -> Set[str]: """Fetch the set of users in the `user_directory` table. This is useful when checking we've correctly excluded users from the directory. """ - result = await self.store.db_pool.simple_select_list( - "user_directory", - None, - ["user_id"], + result = cast( + List[Tuple[str]], + await self.store.db_pool.simple_select_list( + "user_directory", + None, + ["user_id"], + ), ) - return {row["user_id"] for row in result} + return {row[0] for row in result} async def get_profiles_in_user_directory(self) -> Dict[str, ProfileInfo]: """Fetch users and their profiles from the `user_directory` table. @@ -107,16 +109,17 @@ class GetUserDirectoryTables: It's almost the entire contents of the `user_directory` table: the only thing missing is an unused room_id column. """ - rows = await self.store.db_pool.simple_select_list( - "user_directory", - None, - ("user_id", "display_name", "avatar_url"), + rows = cast( + List[Tuple[str, Optional[str], Optional[str]]], + await self.store.db_pool.simple_select_list( + "user_directory", + None, + ("user_id", "display_name", "avatar_url"), + ), ) return { - row["user_id"]: ProfileInfo( - display_name=row["display_name"], avatar_url=row["avatar_url"] - ) - for row in rows + user_id: ProfileInfo(display_name=display_name, avatar_url=avatar_url) + for user_id, display_name, avatar_url in rows } async def get_tables( -- cgit 1.5.1 From 679c691f6f7c4f7901e6d075a645a8ade20f44d5 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 26 Oct 2023 15:12:28 -0400 Subject: Remove more usages of cursor_to_dict. (#16551) Mostly to improve type safety. --- changelog.d/16551.misc | 1 + synapse/handlers/identity.py | 18 ++++---- synapse/handlers/ui_auth/checkers.py | 6 +-- synapse/media/media_repository.py | 5 +-- synapse/rest/admin/federation.py | 14 +++++- synapse/rest/admin/rooms.py | 12 ++++- synapse/rest/admin/statistics.py | 13 +++++- synapse/storage/database.py | 30 ++----------- synapse/storage/databases/main/censor_events.py | 2 +- synapse/storage/databases/main/devices.py | 3 +- synapse/storage/databases/main/end_to_end_keys.py | 1 - .../storage/databases/main/events_bg_updates.py | 7 +-- .../databases/main/events_forward_extremities.py | 15 ++++--- synapse/storage/databases/main/media_repository.py | 19 ++++---- synapse/storage/databases/main/registration.py | 43 ++++++++++++------ synapse/storage/databases/main/roommember.py | 4 +- synapse/storage/databases/main/search.py | 52 +++++++++++++--------- synapse/storage/databases/main/stats.py | 15 ++++--- synapse/storage/databases/main/stream.py | 3 +- synapse/storage/databases/main/transactions.py | 28 ++++++++++-- synapse/storage/databases/main/user_directory.py | 14 +++--- synapse/storage/databases/state/bg_updates.py | 1 - tests/federation/test_federation_catch_up.py | 1 - tests/storage/test_background_update.py | 16 +++---- tests/storage/test_profile.py | 2 +- tests/storage/test_user_filters.py | 2 +- 26 files changed, 193 insertions(+), 134 deletions(-) create mode 100644 changelog.d/16551.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16551.misc b/changelog.d/16551.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/16551.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 472879c964..c041b67993 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -19,6 +19,8 @@ import logging import urllib.parse from typing import TYPE_CHECKING, Awaitable, Callable, Dict, List, Optional, Tuple +import attr + from synapse.api.errors import ( CodeMessageException, Codes, @@ -357,9 +359,9 @@ class IdentityHandler: # Check to see if a session already exists and that it is not yet # marked as validated - if session and session.get("validated_at") is None: - session_id = session["session_id"] - last_send_attempt = session["last_send_attempt"] + if session and session.validated_at is None: + session_id = session.session_id + last_send_attempt = session.last_send_attempt # Check that the send_attempt is higher than previous attempts if send_attempt <= last_send_attempt: @@ -480,7 +482,6 @@ class IdentityHandler: # We don't actually know which medium this 3PID is. Thus we first assume it's email, # and if validation fails we try msisdn - validation_session = None # Try to validate as email if self.hs.config.email.can_verify_email: @@ -488,19 +489,18 @@ class IdentityHandler: validation_session = await self.store.get_threepid_validation_session( "email", client_secret, sid=sid, validated=True ) - - if validation_session: - return validation_session + if validation_session: + return attr.asdict(validation_session) # Try to validate as msisdn if self.hs.config.registration.account_threepid_delegate_msisdn: # Ask our delegated msisdn identity server - validation_session = await self.threepid_from_creds( + return await self.threepid_from_creds( self.hs.config.registration.account_threepid_delegate_msisdn, threepid_creds, ) - return validation_session + return None async def proxy_msisdn_submit_token( self, id_server: str, client_secret: str, sid: str, token: str diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index 78a75bfed6..ab8f7610e9 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -187,9 +187,9 @@ class _BaseThreepidAuthChecker: if row: threepid = { - "medium": row["medium"], - "address": row["address"], - "validated_at": row["validated_at"], + "medium": row.medium, + "address": row.address, + "validated_at": row.validated_at, } # Valid threepid returned, delete from the db diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index 7fd46901f7..72b0f1c5de 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -949,10 +949,7 @@ class MediaRepository: deleted = 0 - for media in old_media: - origin = media["media_origin"] - media_id = media["media_id"] - file_id = media["filesystem_id"] + for origin, media_id, file_id in old_media: key = (origin, media_id) logger.info("Deleting: %r", key) diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py index 8a617af599..a6ce787da1 100644 --- a/synapse/rest/admin/federation.py +++ b/synapse/rest/admin/federation.py @@ -85,7 +85,19 @@ class ListDestinationsRestServlet(RestServlet): destinations, total = await self._store.get_destinations_paginate( start, limit, destination, order_by, direction ) - response = {"destinations": destinations, "total": total} + response = { + "destinations": [ + { + "destination": r[0], + "retry_last_ts": r[1], + "retry_interval": r[2], + "failure_ts": r[3], + "last_successful_stream_ordering": r[4], + } + for r in destinations + ], + "total": total, + } if (start + limit) < total: response["next_token"] = str(start + len(destinations)) diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 436718c8b2..2d4da38db9 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -724,7 +724,17 @@ class ForwardExtremitiesRestServlet(ResolveRoomIdMixin, RestServlet): room_id, _ = await self.resolve_room_id(room_identifier) extremities = await self.store.get_forward_extremities_for_room(room_id) - return HTTPStatus.OK, {"count": len(extremities), "results": extremities} + result = [ + { + "event_id": ex[0], + "state_group": ex[1], + "depth": ex[2], + "received_ts": ex[3], + } + for ex in extremities + ] + + return HTTPStatus.OK, {"count": len(extremities), "results": result} class RoomEventContextServlet(RestServlet): diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py index 19780e4b4c..75d8a37ccf 100644 --- a/synapse/rest/admin/statistics.py +++ b/synapse/rest/admin/statistics.py @@ -108,7 +108,18 @@ class UserMediaStatisticsRestServlet(RestServlet): users_media, total = await self.store.get_users_media_usage_paginate( start, limit, from_ts, until_ts, order_by, direction, search_term ) - ret = {"users": users_media, "total": total} + ret = { + "users": [ + { + "user_id": r[0], + "displayname": r[1], + "media_count": r[2], + "media_length": r[3], + } + for r in users_media + ], + "total": total, + } if (start + limit) < total: ret["next_token"] = start + len(users_media) diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 774d5c12f0..b1ece63845 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -35,7 +35,6 @@ from typing import ( Tuple, Type, TypeVar, - Union, cast, overload, ) @@ -1047,43 +1046,20 @@ class DatabasePool: results = [dict(zip(col_headers, row)) for row in cursor] return results - @overload - async def execute( - self, desc: str, decoder: Literal[None], query: str, *args: Any - ) -> List[Tuple[Any, ...]]: - ... - - @overload - async def execute( - self, desc: str, decoder: Callable[[Cursor], R], query: str, *args: Any - ) -> R: - ... - - async def execute( - self, - desc: str, - decoder: Optional[Callable[[Cursor], R]], - query: str, - *args: Any, - ) -> Union[List[Tuple[Any, ...]], R]: + async def execute(self, desc: str, query: str, *args: Any) -> List[Tuple[Any, ...]]: """Runs a single query for a result set. Args: desc: description of the transaction, for logging and metrics - decoder - The function which can resolve the cursor results to - something meaningful. query - The query string to execute *args - Query args. Returns: The result of decoder(results) """ - def interaction(txn: LoggingTransaction) -> Union[List[Tuple[Any, ...]], R]: + def interaction(txn: LoggingTransaction) -> List[Tuple[Any, ...]]: txn.execute(query, args) - if decoder: - return decoder(txn) - else: - return txn.fetchall() + return txn.fetchall() return await self.runInteraction(desc, interaction) diff --git a/synapse/storage/databases/main/censor_events.py b/synapse/storage/databases/main/censor_events.py index 58177ecec1..711fdddd4e 100644 --- a/synapse/storage/databases/main/censor_events.py +++ b/synapse/storage/databases/main/censor_events.py @@ -93,7 +93,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase """ rows = await self.db_pool.execute( - "_censor_redactions_fetch", None, sql, before_ts, 100 + "_censor_redactions_fetch", sql, before_ts, 100 ) updates = [] diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 0b75f6763a..49edbb9e06 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -894,7 +894,6 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): rows = await self.db_pool.execute( "get_all_devices_changed", - None, sql, from_key, to_key, @@ -978,7 +977,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): WHERE from_user_id = ? AND stream_id > ? """ rows = await self.db_pool.execute( - "get_users_whose_signatures_changed", None, sql, user_id, from_key + "get_users_whose_signatures_changed", sql, user_id, from_key ) return {user for row in rows for user in db_to_json(row[0])} else: diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index f13d776b0d..f70f95eeba 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -155,7 +155,6 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker """ rows = await self.db_pool.execute( "get_e2e_device_keys_for_federation_query_check", - None, sql, now_stream_id, user_id, diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index c5fce1c82b..0061805150 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1310,12 +1310,9 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): # ANALYZE the new column to build stats on it, to encourage PostgreSQL to use the # indexes on it. - # We need to pass execute a dummy function to handle the txn's result otherwise - # it tries to call fetchall() on it and fails because there's no result to fetch. - await self.db_pool.execute( + await self.db_pool.runInteraction( "background_analyze_new_stream_ordering_column", - lambda txn: None, - "ANALYZE events(stream_ordering2)", + lambda txn: txn.execute("ANALYZE events(stream_ordering2)"), ) await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/events_forward_extremities.py b/synapse/storage/databases/main/events_forward_extremities.py index f851bff604..0ba84b1469 100644 --- a/synapse/storage/databases/main/events_forward_extremities.py +++ b/synapse/storage/databases/main/events_forward_extremities.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import Any, Dict, List +from typing import List, Optional, Tuple, cast from synapse.api.errors import SynapseError from synapse.storage.database import LoggingTransaction @@ -91,12 +91,17 @@ class EventForwardExtremitiesStore( async def get_forward_extremities_for_room( self, room_id: str - ) -> List[Dict[str, Any]]: - """Get list of forward extremities for a room.""" + ) -> List[Tuple[str, int, int, Optional[int]]]: + """ + Get list of forward extremities for a room. + + Returns: + A list of tuples of event_id, state_group, depth, and received_ts. + """ def get_forward_extremities_for_room_txn( txn: LoggingTransaction, - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[str, int, int, Optional[int]]]: sql = """ SELECT event_id, state_group, depth, received_ts FROM event_forward_extremities @@ -106,7 +111,7 @@ class EventForwardExtremitiesStore( """ txn.execute(sql, (room_id,)) - return self.db_pool.cursor_to_dict(txn) + return cast(List[Tuple[str, int, int, Optional[int]]], txn.fetchall()) return await self.db_pool.runInteraction( "get_forward_extremities_for_room", diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index f82140b2e8..aeb3db596c 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -650,7 +650,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): async def get_remote_media_ids( self, before_ts: int, include_quarantined_media: bool - ) -> List[Dict[str, str]]: + ) -> List[Tuple[str, str, str]]: """ Retrieve a list of server name, media ID tuples from the remote media cache. @@ -664,12 +664,14 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): A list of tuples containing: * The server name of homeserver where the media originates from, * The ID of the media. + * The filesystem ID. + """ + + sql = """ + SELECT media_origin, media_id, filesystem_id + FROM remote_media_cache + WHERE last_access_ts < ? """ - sql = ( - "SELECT media_origin, media_id, filesystem_id" - " FROM remote_media_cache" - " WHERE last_access_ts < ?" - ) if include_quarantined_media is False: # Only include media that has not been quarantined @@ -677,8 +679,9 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): AND quarantined_by IS NULL """ - return await self.db_pool.execute( - "get_remote_media_ids", self.db_pool.cursor_to_dict, sql, before_ts + return cast( + List[Tuple[str, str, str]], + await self.db_pool.execute("get_remote_media_ids", sql, before_ts), ) async def delete_remote_media(self, media_origin: str, media_id: str) -> None: diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index b0ef7be155..e09ab21593 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -151,6 +151,22 @@ class ThreepidResult: added_at: int +@attr.s(frozen=True, slots=True, auto_attribs=True) +class ThreepidValidationSession: + address: str + """address of the 3pid""" + medium: str + """medium of the 3pid""" + client_secret: str + """a secret provided by the client for this validation session""" + session_id: str + """ID of the validation session""" + last_send_attempt: int + """a number serving to dedupe send attempts for this session""" + validated_at: Optional[int] + """timestamp of when this session was validated if so""" + + class RegistrationWorkerStore(CacheInvalidationWorkerStore): def __init__( self, @@ -1172,7 +1188,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): address: Optional[str] = None, sid: Optional[str] = None, validated: Optional[bool] = True, - ) -> Optional[Dict[str, Any]]: + ) -> Optional[ThreepidValidationSession]: """Gets a session_id and last_send_attempt (if available) for a combination of validation metadata @@ -1187,15 +1203,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): perform no filtering Returns: - A dict containing the following: - * address - address of the 3pid - * medium - medium of the 3pid - * client_secret - a secret provided by the client for this validation session - * session_id - ID of the validation session - * send_attempt - a number serving to dedupe send attempts for this session - * validated_at - timestamp of when this session was validated if so - - Otherwise None if a validation session is not found + A ThreepidValidationSession or None if a validation session is not found """ if not client_secret: raise SynapseError( @@ -1214,7 +1222,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): def get_threepid_validation_session_txn( txn: LoggingTransaction, - ) -> Optional[Dict[str, Any]]: + ) -> Optional[ThreepidValidationSession]: sql = """ SELECT address, session_id, medium, client_secret, last_send_attempt, validated_at @@ -1229,11 +1237,18 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): sql += " LIMIT 1" txn.execute(sql, list(keyvalues.values())) - rows = self.db_pool.cursor_to_dict(txn) - if not rows: + row = txn.fetchone() + if not row: return None - return rows[0] + return ThreepidValidationSession( + address=row[0], + session_id=row[1], + medium=row[2], + client_secret=row[3], + last_send_attempt=row[4], + validated_at=row[5], + ) return await self.db_pool.runInteraction( "get_threepid_validation_session", get_threepid_validation_session_txn diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index a1627dffb7..67e149b586 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -940,7 +940,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): like_clause = "%:" + host rows = await self.db_pool.execute( - "is_host_joined", None, sql, membership, room_id, like_clause + "is_host_joined", sql, membership, room_id, like_clause ) if not rows: @@ -1168,7 +1168,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): AND forgotten = 0; """ - rows = await self.db_pool.execute("is_forgotten_room", None, sql, room_id) + rows = await self.db_pool.execute("is_forgotten_room", sql, room_id) # `count(*)` returns always an integer # If any rows still exist it means someone has not forgotten this room yet diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 1d69c4a5f0..dbde9130c6 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -26,6 +26,7 @@ from typing import ( Set, Tuple, Union, + cast, ) import attr @@ -506,16 +507,18 @@ class SearchStore(SearchBackgroundUpdateStore): # entire table from the database. sql += " ORDER BY rank DESC LIMIT 500" - results = await self.db_pool.execute( - "search_msgs", self.db_pool.cursor_to_dict, sql, *args + # List of tuples of (rank, room_id, event_id). + results = cast( + List[Tuple[Union[int, float], str, str]], + await self.db_pool.execute("search_msgs", sql, *args), ) - results = list(filter(lambda row: row["room_id"] in room_ids, results)) + results = list(filter(lambda row: row[1] in room_ids, results)) # We set redact_behaviour to block here to prevent redacted events being returned in # search results (which is a data leak) events = await self.get_events_as_list( # type: ignore[attr-defined] - [r["event_id"] for r in results], + [r[2] for r in results], redact_behaviour=EventRedactBehaviour.block, ) @@ -527,16 +530,18 @@ class SearchStore(SearchBackgroundUpdateStore): count_sql += " GROUP BY room_id" - count_results = await self.db_pool.execute( - "search_rooms_count", self.db_pool.cursor_to_dict, count_sql, *count_args + # List of tuples of (room_id, count). + count_results = cast( + List[Tuple[str, int]], + await self.db_pool.execute("search_rooms_count", count_sql, *count_args), ) - count = sum(row["count"] for row in count_results if row["room_id"] in room_ids) + count = sum(row[1] for row in count_results if row[0] in room_ids) return { "results": [ - {"event": event_map[r["event_id"]], "rank": r["rank"]} + {"event": event_map[r[2]], "rank": r[0]} for r in results - if r["event_id"] in event_map + if r[2] in event_map ], "highlights": highlights, "count": count, @@ -604,7 +609,7 @@ class SearchStore(SearchBackgroundUpdateStore): search_query = search_term sql = """ SELECT ts_rank_cd(vector, websearch_to_tsquery('english', ?)) as rank, - origin_server_ts, stream_ordering, room_id, event_id + room_id, event_id, origin_server_ts, stream_ordering FROM event_search WHERE vector @@ websearch_to_tsquery('english', ?) AND """ @@ -665,16 +670,18 @@ class SearchStore(SearchBackgroundUpdateStore): # mypy expects to append only a `str`, not an `int` args.append(limit) - results = await self.db_pool.execute( - "search_rooms", self.db_pool.cursor_to_dict, sql, *args + # List of tuples of (rank, room_id, event_id, origin_server_ts, stream_ordering). + results = cast( + List[Tuple[Union[int, float], str, str, int, int]], + await self.db_pool.execute("search_rooms", sql, *args), ) - results = list(filter(lambda row: row["room_id"] in room_ids, results)) + results = list(filter(lambda row: row[1] in room_ids, results)) # We set redact_behaviour to block here to prevent redacted events being returned in # search results (which is a data leak) events = await self.get_events_as_list( # type: ignore[attr-defined] - [r["event_id"] for r in results], + [r[2] for r in results], redact_behaviour=EventRedactBehaviour.block, ) @@ -686,22 +693,23 @@ class SearchStore(SearchBackgroundUpdateStore): count_sql += " GROUP BY room_id" - count_results = await self.db_pool.execute( - "search_rooms_count", self.db_pool.cursor_to_dict, count_sql, *count_args + # List of tuples of (room_id, count). + count_results = cast( + List[Tuple[str, int]], + await self.db_pool.execute("search_rooms_count", count_sql, *count_args), ) - count = sum(row["count"] for row in count_results if row["room_id"] in room_ids) + count = sum(row[1] for row in count_results if row[0] in room_ids) return { "results": [ { - "event": event_map[r["event_id"]], - "rank": r["rank"], - "pagination_token": "%s,%s" - % (r["origin_server_ts"], r["stream_ordering"]), + "event": event_map[r[2]], + "rank": r[0], + "pagination_token": "%s,%s" % (r[3], r[4]), } for r in results - if r["event_id"] in event_map + if r[2] in event_map ], "highlights": highlights, "count": count, diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 5b2d0ba870..e96c9b0486 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -679,7 +679,7 @@ class StatsStore(StateDeltasStore): order_by: Optional[str] = UserSortOrder.USER_ID.value, direction: Direction = Direction.FORWARDS, search_term: Optional[str] = None, - ) -> Tuple[List[JsonDict], int]: + ) -> Tuple[List[Tuple[str, Optional[str], int, int]], int]: """Function to retrieve a paginated list of users and their uploaded local media (size and number). This will return a json list of users and the total number of users matching the filter criteria. @@ -692,14 +692,19 @@ class StatsStore(StateDeltasStore): order_by: the sort order of the returned list direction: sort ascending or descending search_term: a string to filter user names by + Returns: - A list of user dicts and an integer representing the total number of - users that exist given this query + A tuple of: + A list of tuples of user information (the user ID, displayname, + total number of media, total length of media) and + + An integer representing the total number of users that exist + given this query """ def get_users_media_usage_paginate_txn( txn: LoggingTransaction, - ) -> Tuple[List[JsonDict], int]: + ) -> Tuple[List[Tuple[str, Optional[str], int, int]], int]: filters = [] args: list = [] @@ -773,7 +778,7 @@ class StatsStore(StateDeltasStore): args += [limit, start] txn.execute(sql, args) - users = self.db_pool.cursor_to_dict(txn) + users = cast(List[Tuple[str, Optional[str], int, int]], txn.fetchall()) return users, count diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 872df6bda1..2225f8272d 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1078,7 +1078,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): """ row = await self.db_pool.execute( - "get_current_topological_token", None, sql, room_id, room_id, stream_key + "get_current_topological_token", sql, room_id, room_id, stream_key ) return row[0][0] if row else 0 @@ -1636,7 +1636,6 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): rows = await self.db_pool.execute( "get_timeline_gaps", - None, sql, room_id, from_token.stream if from_token else 0, diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index c4a6475060..fecddb4144 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -478,7 +478,10 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): destination: Optional[str] = None, order_by: str = DestinationSortOrder.DESTINATION.value, direction: Direction = Direction.FORWARDS, - ) -> Tuple[List[JsonDict], int]: + ) -> Tuple[ + List[Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]]], + int, + ]: """Function to retrieve a paginated list of destinations. This will return a json list of destinations and the total number of destinations matching the filter criteria. @@ -490,13 +493,23 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): order_by: the sort order of the returned list direction: sort ascending or descending Returns: - A tuple of a list of mappings from destination to information + A tuple of a list of tuples of destination information: + * destination + * retry_last_ts + * retry_interval + * failure_ts + * last_successful_stream_ordering and a count of total destinations. """ def get_destinations_paginate_txn( txn: LoggingTransaction, - ) -> Tuple[List[JsonDict], int]: + ) -> Tuple[ + List[ + Tuple[str, Optional[int], Optional[int], Optional[int], Optional[int]] + ], + int, + ]: order_by_column = DestinationSortOrder(order_by).value if direction == Direction.BACKWARDS: @@ -523,7 +536,14 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): LIMIT ? OFFSET ? """ txn.execute(sql, args + [limit, start]) - destinations = self.db_pool.cursor_to_dict(txn) + destinations = cast( + List[ + Tuple[ + str, Optional[int], Optional[int], Optional[int], Optional[int] + ] + ], + txn.fetchall(), + ) return destinations, count return await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 23eb92c514..a9f5d68b63 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -1145,15 +1145,19 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): raise Exception("Unrecognized database engine") results = cast( - List[UserProfile], - await self.db_pool.execute( - "search_user_dir", self.db_pool.cursor_to_dict, sql, *args - ), + List[Tuple[str, Optional[str], Optional[str]]], + await self.db_pool.execute("search_user_dir", sql, *args), ) limited = len(results) > limit - return {"limited": limited, "results": results[0:limit]} + return { + "limited": limited, + "results": [ + {"user_id": r[0], "display_name": r[1], "avatar_url": r[2]} + for r in results[0:limit] + ], + } def _filter_text_for_index(text: str) -> str: diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 6ff533a129..0f9c550b27 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -359,7 +359,6 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): if max_group is None: rows = await self.db_pool.execute( "_background_deduplicate_state", - None, "SELECT coalesce(max(id), 0) FROM state_groups", ) max_group = rows[0][0] diff --git a/tests/federation/test_federation_catch_up.py b/tests/federation/test_federation_catch_up.py index 75ae740b43..08214b0013 100644 --- a/tests/federation/test_federation_catch_up.py +++ b/tests/federation/test_federation_catch_up.py @@ -100,7 +100,6 @@ class FederationCatchUpTestCases(FederatingHomeserverTestCase): event_id, stream_ordering = self.get_success( self.hs.get_datastores().main.db_pool.execute( "test:get_destination_rooms", - None, """ SELECT event_id, stream_ordering FROM destination_rooms dr diff --git a/tests/storage/test_background_update.py b/tests/storage/test_background_update.py index 3f5bfa09d4..67ea640902 100644 --- a/tests/storage/test_background_update.py +++ b/tests/storage/test_background_update.py @@ -457,8 +457,8 @@ class BackgroundUpdateValidateConstraintTestCase(unittest.HomeserverTestCase): ); """ self.get_success( - self.store.db_pool.execute( - "test_not_null_constraint", lambda _: None, table_sql + self.store.db_pool.runInteraction( + "test_not_null_constraint", lambda txn: txn.execute(table_sql) ) ) @@ -466,8 +466,8 @@ class BackgroundUpdateValidateConstraintTestCase(unittest.HomeserverTestCase): # using SQLite. index_sql = "CREATE INDEX test_index ON test_constraint(a)" self.get_success( - self.store.db_pool.execute( - "test_not_null_constraint", lambda _: None, index_sql + self.store.db_pool.runInteraction( + "test_not_null_constraint", lambda txn: txn.execute(index_sql) ) ) @@ -574,13 +574,13 @@ class BackgroundUpdateValidateConstraintTestCase(unittest.HomeserverTestCase): ); """ self.get_success( - self.store.db_pool.execute( - "test_foreign_key_constraint", lambda _: None, base_sql + self.store.db_pool.runInteraction( + "test_foreign_key_constraint", lambda txn: txn.execute(base_sql) ) ) self.get_success( - self.store.db_pool.execute( - "test_foreign_key_constraint", lambda _: None, table_sql + self.store.db_pool.runInteraction( + "test_foreign_key_constraint", lambda txn: txn.execute(table_sql) ) ) diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py index 95f99f4130..6afb5403bd 100644 --- a/tests/storage/test_profile.py +++ b/tests/storage/test_profile.py @@ -120,7 +120,7 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase): res = self.get_success( self.store.db_pool.execute( - "", None, "SELECT full_user_id from profiles ORDER BY full_user_id" + "", "SELECT full_user_id from profiles ORDER BY full_user_id" ) ) self.assertEqual(len(res), len(expected_values)) diff --git a/tests/storage/test_user_filters.py b/tests/storage/test_user_filters.py index d4637d9d1e..2da6a018e8 100644 --- a/tests/storage/test_user_filters.py +++ b/tests/storage/test_user_filters.py @@ -87,7 +87,7 @@ class UserFiltersStoreTestCase(unittest.HomeserverTestCase): res = self.get_success( self.store.db_pool.execute( - "", None, "SELECT full_user_id from user_filters ORDER BY full_user_id" + "", "SELECT full_user_id from user_filters ORDER BY full_user_id" ) ) self.assertEqual(len(res), len(expected_values)) -- cgit 1.5.1 From 2bf934140625e91e1e27ffc9f717f6f2d277b2b9 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 27 Oct 2023 12:50:50 -0400 Subject: Ensure local invited & knocking users leave before purge. (#16559) This is mostly useful for federated rooms where some users would get stuck in the invite or knock state when the room was purged from their homeserver. --- changelog.d/16559.bugfix | 1 + synapse/handlers/room.py | 7 ++-- synapse/storage/databases/main/roommember.py | 16 +++++++++ tests/rest/admin/test_room.py | 53 +++++++++++++++++++++++++++- 4 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 changelog.d/16559.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/16559.bugfix b/changelog.d/16559.bugfix new file mode 100644 index 0000000000..e0fb16f807 --- /dev/null +++ b/changelog.d/16559.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where invited/knocking users would not leave during a room purge. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 97c9f01245..6d680b0795 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1939,9 +1939,10 @@ class RoomShutdownHandler: else: logger.info("Shutting down room %r", room_id) - users = await self.store.get_users_in_room(room_id) - for user_id in users: - if not self.hs.is_mine_id(user_id): + users = await self.store.get_local_users_related_to_room(room_id) + for user_id, membership in users: + # If the user is not in the room (or is banned), nothing to do. + if membership not in (Membership.JOIN, Membership.INVITE, Membership.KNOCK): continue logger.info("Kicking %r from %r...", user_id, room_id) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 67e149b586..1ed7f2d0ef 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -482,6 +482,22 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore): desc="get_local_users_in_room", ) + async def get_local_users_related_to_room( + self, room_id: str + ) -> List[Tuple[str, str]]: + """ + Retrieves a list of the current roommembers who are local to the server and their membership status. + """ + return cast( + List[Tuple[str, str]], + await self.db_pool.simple_select_list( + table="local_current_membership", + keyvalues={"room_id": room_id}, + retcols=("user_id", "membership"), + desc="get_local_users_in_room", + ), + ) + async def check_local_user_in_room(self, user_id: str, room_id: str) -> bool: """ Check whether a given local user is currently joined to the given room. diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 6ed451d7c4..206ca7f083 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -29,7 +29,7 @@ from synapse.handlers.pagination import ( PURGE_ROOM_ACTION_NAME, SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME, ) -from synapse.rest.client import directory, events, login, room +from synapse.rest.client import directory, events, knock, login, room, sync from synapse.server import HomeServer from synapse.types import UserID from synapse.util import Clock @@ -49,6 +49,8 @@ class DeleteRoomTestCase(unittest.HomeserverTestCase): login.register_servlets, events.register_servlets, room.register_servlets, + knock.register_servlets, + sync.register_servlets, room.register_deprecated_servlets, ] @@ -254,6 +256,55 @@ class DeleteRoomTestCase(unittest.HomeserverTestCase): self._is_blocked(self.room_id, expect=False) self._has_no_members(self.room_id) + def test_purge_room_unjoined(self) -> None: + """Test to purge a room when there are invited or knocked users.""" + # Test that room is not purged + with self.assertRaises(AssertionError): + self._is_purged(self.room_id) + + # Test that room is not blocked + self._is_blocked(self.room_id, expect=False) + + # Assert one user in room + self._is_member(room_id=self.room_id, user_id=self.other_user) + self.helper.send_state( + self.room_id, + EventTypes.JoinRules, + {"join_rule": "knock"}, + tok=self.other_user_tok, + ) + + # Invite a user. + invited_user = self.register_user("invited", "pass") + self.helper.invite( + self.room_id, self.other_user, invited_user, tok=self.other_user_tok + ) + + # Have a user knock. + knocked_user = self.register_user("knocked", "pass") + knocked_user_tok = self.login("knocked", "pass") + self.helper.knock(self.room_id, knocked_user, tok=knocked_user_tok) + + channel = self.make_request( + "DELETE", + self.url.encode("ascii"), + content={"block": False, "purge": True}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(None, channel.json_body["new_room_id"]) + self.assertCountEqual( + [self.other_user, invited_user, knocked_user], + channel.json_body["kicked_users"], + ) + self.assertIn("failed_to_kick_users", channel.json_body) + self.assertIn("local_aliases", channel.json_body) + + self._is_purged(self.room_id) + self._is_blocked(self.room_id, expect=False) + self._has_no_members(self.room_id) + def test_block_room_and_not_purge(self) -> None: """Test to block a room without purging it. Members will not be moved to a new room and will not receive a message. -- cgit 1.5.1 From fdce83ee60b3b5ffd0c41d112873a4de52b1e640 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 30 Oct 2023 14:34:37 +0000 Subject: Claim fallback keys in bulk (#16570) --- changelog.d/16570.feature | 1 + synapse/handlers/e2e_keys.py | 14 +++++ synapse/storage/database.py | 10 +++ synapse/storage/databases/main/end_to_end_keys.py | 60 ++++++++++++++++++ tests/handlers/test_e2e_keys.py | 77 +++++++++++++++++++++++ 5 files changed, 162 insertions(+) create mode 100644 changelog.d/16570.feature (limited to 'synapse/storage/databases') diff --git a/changelog.d/16570.feature b/changelog.d/16570.feature new file mode 100644 index 0000000000..c807945fa8 --- /dev/null +++ b/changelog.d/16570.feature @@ -0,0 +1 @@ +Improve the performance of claiming encryption keys. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 8c6432035d..91c5fe007d 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -659,6 +659,20 @@ class E2eKeysHandler: timeout: Optional[int], always_include_fallback_keys: bool, ) -> JsonDict: + """ + Args: + query: A chain of maps from (user_id, device_id, algorithm) to the requested + number of keys to claim. + user: The user who is claiming these keys. + timeout: How long to wait for any federation key claim requests before + giving up. + always_include_fallback_keys: always include a fallback key for local users' + devices, even if we managed to claim a one-time-key. + + Returns: a heterogeneous dict with two keys: + one_time_keys: chain of maps user ID -> device ID -> key ID -> key. + failures: map from remote destination to a JsonDict describing the error. + """ local_query: List[Tuple[str, str, str, int]] = [] remote_queries: Dict[str, Dict[str, Dict[str, Dict[str, int]]]] = {} diff --git a/synapse/storage/database.py b/synapse/storage/database.py index b1ece63845..a4e7048368 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -420,6 +420,16 @@ class LoggingTransaction: self._do_execute(self.txn.execute, sql, parameters) def executemany(self, sql: str, *args: Any) -> None: + """Repeatedly execute the same piece of SQL with different parameters. + + See https://peps.python.org/pep-0249/#executemany. Note in particular that + + > Use of this method for an operation which produces one or more result sets + > constitutes undefined behavior + + so you can't use this for e.g. a SELECT, an UPDATE ... RETURNING, or a + DELETE FROM... RETURNING. + """ # TODO: we should add a type for *args here. Looking at Cursor.executemany # and DBAPI2 it ought to be Sequence[_Parameter], but we pass in # Iterable[Iterable[Any]] in execute_batch and execute_values above, which mypy diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index f70f95eeba..08385d312f 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -24,6 +24,7 @@ from typing import ( Mapping, Optional, Sequence, + Set, Tuple, Union, cast, @@ -1260,6 +1261,65 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker Returns: A map of user ID -> a map device ID -> a map of key ID -> JSON. """ + if isinstance(self.database_engine, PostgresEngine): + return await self.db_pool.runInteraction( + "_claim_e2e_fallback_keys_bulk", + self._claim_e2e_fallback_keys_bulk_txn, + query_list, + db_autocommit=True, + ) + # Use an UPDATE FROM... RETURNING combined with a VALUES block to do + # everything in one query. Note: this is also supported in SQLite 3.33.0, + # (see https://www.sqlite.org/lang_update.html#update_from), but we do not + # have an equivalent of psycopg2's execute_values to do this in one query. + else: + return await self._claim_e2e_fallback_keys_simple(query_list) + + def _claim_e2e_fallback_keys_bulk_txn( + self, + txn: LoggingTransaction, + query_list: Iterable[Tuple[str, str, str, bool]], + ) -> Dict[str, Dict[str, Dict[str, JsonDict]]]: + """Efficient implementation of claim_e2e_fallback_keys for Postgres. + + Safe to autocommit: this is a single query. + """ + results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {} + + sql = """ + WITH claims(user_id, device_id, algorithm, mark_as_used) AS ( + VALUES ? + ) + UPDATE e2e_fallback_keys_json k + SET used = used OR mark_as_used + FROM claims + WHERE (k.user_id, k.device_id, k.algorithm) = (claims.user_id, claims.device_id, claims.algorithm) + RETURNING k.user_id, k.device_id, k.algorithm, k.key_id, k.key_json; + """ + claimed_keys = cast( + List[Tuple[str, str, str, str, str]], + txn.execute_values(sql, query_list), + ) + + seen_user_device: Set[Tuple[str, str]] = set() + for user_id, device_id, algorithm, key_id, key_json in claimed_keys: + device_results = results.setdefault(user_id, {}).setdefault(device_id, {}) + device_results[f"{algorithm}:{key_id}"] = json_decoder.decode(key_json) + + if (user_id, device_id) in seen_user_device: + continue + seen_user_device.add((user_id, device_id)) + self._invalidate_cache_and_stream( + txn, self.get_e2e_unused_fallback_key_types, (user_id, device_id) + ) + + return results + + async def _claim_e2e_fallback_keys_simple( + self, + query_list: Iterable[Tuple[str, str, str, bool]], + ) -> Dict[str, Dict[str, Dict[str, JsonDict]]]: + """Naive, inefficient implementation of claim_e2e_fallback_keys for SQLite.""" results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {} for user_id, device_id, algorithm, mark_as_used in query_list: row = await self.db_pool.simple_select_one( diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index c5556f2844..24e405f429 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -322,6 +322,83 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key3}}}, ) + def test_fallback_key_bulk(self) -> None: + """Like test_fallback_key, but claims multiple keys in one handler call.""" + alice = f"@alice:{self.hs.hostname}" + brian = f"@brian:{self.hs.hostname}" + chris = f"@chris:{self.hs.hostname}" + + # Have three users upload fallback keys for two devices. + fallback_keys = { + alice: { + "alice_dev_1": {"alg1:k1": "fallback_key1"}, + "alice_dev_2": {"alg2:k2": "fallback_key2"}, + }, + brian: { + "brian_dev_1": {"alg1:k3": "fallback_key3"}, + "brian_dev_2": {"alg2:k4": "fallback_key4"}, + }, + chris: { + "chris_dev_1": {"alg1:k5": "fallback_key5"}, + "chris_dev_2": {"alg2:k6": "fallback_key6"}, + }, + } + + for user_id, devices in fallback_keys.items(): + for device_id, key_dict in devices.items(): + self.get_success( + self.handler.upload_keys_for_user( + user_id, + device_id, + {"fallback_keys": key_dict}, + ) + ) + + # Each device should have an unused fallback key. + for user_id, devices in fallback_keys.items(): + for device_id in devices: + fallback_res = self.get_success( + self.store.get_e2e_unused_fallback_key_types(user_id, device_id) + ) + expected_algorithm_name = f"alg{device_id[-1]}" + self.assertEqual(fallback_res, [expected_algorithm_name]) + + # Claim the fallback key for one device per user. + claim_res = self.get_success( + self.handler.claim_one_time_keys( + { + alice: {"alice_dev_1": {"alg1": 1}}, + brian: {"brian_dev_2": {"alg2": 1}}, + chris: {"chris_dev_2": {"alg2": 1}}, + }, + self.requester, + timeout=None, + always_include_fallback_keys=False, + ) + ) + expected_claims = { + alice: {"alice_dev_1": {"alg1:k1": "fallback_key1"}}, + brian: {"brian_dev_2": {"alg2:k4": "fallback_key4"}}, + chris: {"chris_dev_2": {"alg2:k6": "fallback_key6"}}, + } + self.assertEqual( + claim_res, + {"failures": {}, "one_time_keys": expected_claims}, + ) + + for user_id, devices in fallback_keys.items(): + for device_id in devices: + fallback_res = self.get_success( + self.store.get_e2e_unused_fallback_key_types(user_id, device_id) + ) + # Claimed fallback keys should no longer show up as unused. + # Unclaimed fallback keys should still be unused. + if device_id in expected_claims[user_id]: + self.assertEqual(fallback_res, []) + else: + expected_algorithm_name = f"alg{device_id[-1]}" + self.assertEqual(fallback_res, [expected_algorithm_name]) + def test_fallback_key_always_returned(self) -> None: local_user = "@boris:" + self.hs.hostname device_id = "xyz" -- cgit 1.5.1 From de981ae56720b06c33203e9edd3df08376afb907 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 30 Oct 2023 21:25:21 +0000 Subject: Claim local one-time-keys in bulk (#16565) Co-authored-by: Patrick Cloke --- changelog.d/16565.feature | 1 + synapse/handlers/e2e_keys.py | 10 + synapse/storage/databases/main/end_to_end_keys.py | 253 ++++++++++++---------- tests/handlers/test_e2e_keys.py | 158 ++++++++++++++ 4 files changed, 308 insertions(+), 114 deletions(-) create mode 100644 changelog.d/16565.feature (limited to 'synapse/storage/databases') diff --git a/changelog.d/16565.feature b/changelog.d/16565.feature new file mode 100644 index 0000000000..c807945fa8 --- /dev/null +++ b/changelog.d/16565.feature @@ -0,0 +1 @@ +Improve the performance of claiming encryption keys. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 91c5fe007d..d340d4aebe 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -753,6 +753,16 @@ class E2eKeysHandler: async def upload_keys_for_user( self, user_id: str, device_id: str, keys: JsonDict ) -> JsonDict: + """ + Args: + user_id: user whose keys are being uploaded. + device_id: device whose keys are being uploaded. + keys: the body of a /keys/upload request. + + Returns a dictionary with one field: + "one_time_keys": A mapping from algorithm to number of keys for that + algorithm, including those previously persisted. + """ # This can only be called from the main process. assert isinstance(self.device_handler, DeviceHandler) diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 08385d312f..4f96ac25c7 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -1111,7 +1111,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker ... async def claim_e2e_one_time_keys( - self, query_list: Iterable[Tuple[str, str, str, int]] + self, query_list: Collection[Tuple[str, str, str, int]] ) -> Tuple[ Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str, int]] ]: @@ -1121,131 +1121,63 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker query_list: An iterable of tuples of (user ID, device ID, algorithm). Returns: - A tuple pf: + A tuple (results, missing) of: A map of user ID -> a map device ID -> a map of key ID -> JSON. - A copy of the input which has not been fulfilled. + A copy of the input which has not been fulfilled. The returned counts + may be less than the input counts. In this case, the returned counts + are the number of claims that were not fulfilled. """ - - @trace - def _claim_e2e_one_time_key_simple( - txn: LoggingTransaction, - user_id: str, - device_id: str, - algorithm: str, - count: int, - ) -> List[Tuple[str, str]]: - """Claim OTK for device for DBs that don't support RETURNING. - - Returns: - A tuple of key name (algorithm + key ID) and key JSON, if an - OTK was found. - """ - - sql = """ - SELECT key_id, key_json FROM e2e_one_time_keys_json - WHERE user_id = ? AND device_id = ? AND algorithm = ? - LIMIT ? - """ - - txn.execute(sql, (user_id, device_id, algorithm, count)) - otk_rows = list(txn) - if not otk_rows: - return [] - - self.db_pool.simple_delete_many_txn( - txn, - table="e2e_one_time_keys_json", - column="key_id", - values=[otk_row[0] for otk_row in otk_rows], - keyvalues={ - "user_id": user_id, - "device_id": device_id, - "algorithm": algorithm, - }, - ) - self._invalidate_cache_and_stream( - txn, self.count_e2e_one_time_keys, (user_id, device_id) - ) - - return [ - (f"{algorithm}:{key_id}", key_json) for key_id, key_json in otk_rows - ] - - @trace - def _claim_e2e_one_time_key_returning( - txn: LoggingTransaction, - user_id: str, - device_id: str, - algorithm: str, - count: int, - ) -> List[Tuple[str, str]]: - """Claim OTK for device for DBs that support RETURNING. - - Returns: - A tuple of key name (algorithm + key ID) and key JSON, if an - OTK was found. - """ - - # We can use RETURNING to do the fetch and DELETE in once step. - sql = """ - DELETE FROM e2e_one_time_keys_json - WHERE user_id = ? AND device_id = ? AND algorithm = ? - AND key_id IN ( - SELECT key_id FROM e2e_one_time_keys_json - WHERE user_id = ? AND device_id = ? AND algorithm = ? - LIMIT ? - ) - RETURNING key_id, key_json - """ - - txn.execute( - sql, - (user_id, device_id, algorithm, user_id, device_id, algorithm, count), - ) - otk_rows = list(txn) - if not otk_rows: - return [] - - self._invalidate_cache_and_stream( - txn, self.count_e2e_one_time_keys, (user_id, device_id) - ) - - return [ - (f"{algorithm}:{key_id}", key_json) for key_id, key_json in otk_rows - ] - results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {} missing: List[Tuple[str, str, str, int]] = [] - for user_id, device_id, algorithm, count in query_list: - if self.database_engine.supports_returning: - # If we support RETURNING clause we can use a single query that - # allows us to use autocommit mode. - _claim_e2e_one_time_key = _claim_e2e_one_time_key_returning - db_autocommit = True - else: - _claim_e2e_one_time_key = _claim_e2e_one_time_key_simple - db_autocommit = False + if isinstance(self.database_engine, PostgresEngine): + # If we can use execute_values we can use a single batch query + # in autocommit mode. + unfulfilled_claim_counts: Dict[Tuple[str, str, str], int] = {} + for user_id, device_id, algorithm, count in query_list: + unfulfilled_claim_counts[user_id, device_id, algorithm] = count - claim_rows = await self.db_pool.runInteraction( + bulk_claims = await self.db_pool.runInteraction( "claim_e2e_one_time_keys", - _claim_e2e_one_time_key, - user_id, - device_id, - algorithm, - count, - db_autocommit=db_autocommit, + self._claim_e2e_one_time_keys_bulk, + query_list, + db_autocommit=True, ) - if claim_rows: + + for user_id, device_id, algorithm, key_id, key_json in bulk_claims: device_results = results.setdefault(user_id, {}).setdefault( device_id, {} ) - for claim_row in claim_rows: - device_results[claim_row[0]] = json_decoder.decode(claim_row[1]) + device_results[f"{algorithm}:{key_id}"] = json_decoder.decode(key_json) + unfulfilled_claim_counts[(user_id, device_id, algorithm)] -= 1 + # Did we get enough OTKs? - count -= len(claim_rows) - if count: - missing.append((user_id, device_id, algorithm, count)) + missing = [ + (user, device, alg, count) + for (user, device, alg), count in unfulfilled_claim_counts.items() + if count > 0 + ] + else: + for user_id, device_id, algorithm, count in query_list: + claim_rows = await self.db_pool.runInteraction( + "claim_e2e_one_time_keys", + self._claim_e2e_one_time_key_simple, + user_id, + device_id, + algorithm, + count, + db_autocommit=False, + ) + if claim_rows: + device_results = results.setdefault(user_id, {}).setdefault( + device_id, {} + ) + for claim_row in claim_rows: + device_results[claim_row[0]] = json_decoder.decode(claim_row[1]) + # Did we get enough OTKs? + count -= len(claim_rows) + if count: + missing.append((user_id, device_id, algorithm, count)) return results, missing @@ -1362,6 +1294,99 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker return results + @trace + def _claim_e2e_one_time_key_simple( + self, + txn: LoggingTransaction, + user_id: str, + device_id: str, + algorithm: str, + count: int, + ) -> List[Tuple[str, str]]: + """Claim OTK for device for DBs that don't support RETURNING. + + Returns: + A tuple of key name (algorithm + key ID) and key JSON, if an + OTK was found. + """ + + sql = """ + SELECT key_id, key_json FROM e2e_one_time_keys_json + WHERE user_id = ? AND device_id = ? AND algorithm = ? + LIMIT ? + """ + + txn.execute(sql, (user_id, device_id, algorithm, count)) + otk_rows = list(txn) + if not otk_rows: + return [] + + self.db_pool.simple_delete_many_txn( + txn, + table="e2e_one_time_keys_json", + column="key_id", + values=[otk_row[0] for otk_row in otk_rows], + keyvalues={ + "user_id": user_id, + "device_id": device_id, + "algorithm": algorithm, + }, + ) + self._invalidate_cache_and_stream( + txn, self.count_e2e_one_time_keys, (user_id, device_id) + ) + + return [(f"{algorithm}:{key_id}", key_json) for key_id, key_json in otk_rows] + + @trace + def _claim_e2e_one_time_keys_bulk( + self, + txn: LoggingTransaction, + query_list: Iterable[Tuple[str, str, str, int]], + ) -> List[Tuple[str, str, str, str, str]]: + """Bulk claim OTKs, for DBs that support DELETE FROM... RETURNING. + + Args: + query_list: Collection of tuples (user_id, device_id, algorithm, count) + as passed to claim_e2e_one_time_keys. + + Returns: + A list of tuples (user_id, device_id, algorithm, key_id, key_json) + for each OTK claimed. + """ + sql = """ + WITH claims(user_id, device_id, algorithm, claim_count) AS ( + VALUES ? + ), ranked_keys AS ( + SELECT + user_id, device_id, algorithm, key_id, claim_count, + ROW_NUMBER() OVER (PARTITION BY (user_id, device_id, algorithm)) AS r + FROM e2e_one_time_keys_json + JOIN claims USING (user_id, device_id, algorithm) + ) + DELETE FROM e2e_one_time_keys_json k + WHERE (user_id, device_id, algorithm, key_id) IN ( + SELECT user_id, device_id, algorithm, key_id + FROM ranked_keys + WHERE r <= claim_count + ) + RETURNING user_id, device_id, algorithm, key_id, key_json; + """ + otk_rows = cast( + List[Tuple[str, str, str, str, str]], txn.execute_values(sql, query_list) + ) + + seen_user_device: Set[Tuple[str, str]] = set() + for user_id, device_id, _, _, _ in otk_rows: + if (user_id, device_id) in seen_user_device: + continue + seen_user_device.add((user_id, device_id)) + self._invalidate_cache_and_stream( + txn, self.count_e2e_one_time_keys, (user_id, device_id) + ) + + return otk_rows + class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): def __init__( diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 24e405f429..90b4da9ad5 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -174,6 +174,164 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): }, ) + def test_claim_one_time_key_bulk(self) -> None: + """Like test_claim_one_time_key but claims multiple keys in one handler call.""" + # Apologies to the reader. This test is a little too verbose. It is particularly + # tricky to make assertions neatly with all these nested dictionaries in play. + + # Three users with two devices each. Each device uses two algorithms. + # Each algorithm is invoked with two keys. + alice = f"@alice:{self.hs.hostname}" + brian = f"@brian:{self.hs.hostname}" + chris = f"@chris:{self.hs.hostname}" + one_time_keys = { + alice: { + "alice_dev_1": { + "alg1:k1": {"dummy_id": 1}, + "alg1:k2": {"dummy_id": 2}, + "alg2:k3": {"dummy_id": 3}, + "alg2:k4": {"dummy_id": 4}, + }, + "alice_dev_2": { + "alg1:k5": {"dummy_id": 5}, + "alg1:k6": {"dummy_id": 6}, + "alg2:k7": {"dummy_id": 7}, + "alg2:k8": {"dummy_id": 8}, + }, + }, + brian: { + "brian_dev_1": { + "alg1:k9": {"dummy_id": 9}, + "alg1:k10": {"dummy_id": 10}, + "alg2:k11": {"dummy_id": 11}, + "alg2:k12": {"dummy_id": 12}, + }, + "brian_dev_2": { + "alg1:k13": {"dummy_id": 13}, + "alg1:k14": {"dummy_id": 14}, + "alg2:k15": {"dummy_id": 15}, + "alg2:k16": {"dummy_id": 16}, + }, + }, + chris: { + "chris_dev_1": { + "alg1:k17": {"dummy_id": 17}, + "alg1:k18": {"dummy_id": 18}, + "alg2:k19": {"dummy_id": 19}, + "alg2:k20": {"dummy_id": 20}, + }, + "chris_dev_2": { + "alg1:k21": {"dummy_id": 21}, + "alg1:k22": {"dummy_id": 22}, + "alg2:k23": {"dummy_id": 23}, + "alg2:k24": {"dummy_id": 24}, + }, + }, + } + for user_id, devices in one_time_keys.items(): + for device_id, keys_dict in devices.items(): + counts = self.get_success( + self.handler.upload_keys_for_user( + user_id, + device_id, + {"one_time_keys": keys_dict}, + ) + ) + # The upload should report 2 keys per algorithm. + expected_counts = { + "one_time_key_counts": { + # See count_e2e_one_time_keys for why this is hardcoded. + "signed_curve25519": 0, + "alg1": 2, + "alg2": 2, + }, + } + self.assertEqual(counts, expected_counts) + + # Claim a variety of keys. + # Raw format, easier to make test assertions about. + claims_to_make = { + (alice, "alice_dev_1", "alg1"): 1, + (alice, "alice_dev_1", "alg2"): 2, + (alice, "alice_dev_2", "alg2"): 1, + (brian, "brian_dev_1", "alg1"): 2, + (brian, "brian_dev_2", "alg2"): 9001, + (chris, "chris_dev_2", "alg2"): 1, + } + # Convert to the format the handler wants. + query: Dict[str, Dict[str, Dict[str, int]]] = {} + for (user_id, device_id, algorithm), count in claims_to_make.items(): + query.setdefault(user_id, {}).setdefault(device_id, {})[algorithm] = count + claim_res = self.get_success( + self.handler.claim_one_time_keys( + query, + self.requester, + timeout=None, + always_include_fallback_keys=False, + ) + ) + + # No failures, please! + self.assertEqual(claim_res["failures"], {}) + + # Check that we get exactly the (user, device, algorithm)s we asked for. + got_otks = claim_res["one_time_keys"] + claimed_user_device_algorithms = { + (user_id, device_id, alg_key_id.split(":")[0]) + for user_id, devices in got_otks.items() + for device_id, key_dict in devices.items() + for alg_key_id in key_dict + } + self.assertEqual(claimed_user_device_algorithms, set(claims_to_make)) + + # Now check the keys we got are what we expected. + def assertExactlyOneOtk( + user_id: str, device_id: str, *alg_key_pairs: str + ) -> None: + key_dict = got_otks[user_id][device_id] + found = 0 + for alg_key in alg_key_pairs: + if alg_key in key_dict: + expected_key_json = one_time_keys[user_id][device_id][alg_key] + self.assertEqual(key_dict[alg_key], expected_key_json) + found += 1 + self.assertEqual(found, 1) + + def assertAllOtks(user_id: str, device_id: str, *alg_key_pairs: str) -> None: + key_dict = got_otks[user_id][device_id] + for alg_key in alg_key_pairs: + expected_key_json = one_time_keys[user_id][device_id][alg_key] + self.assertEqual(key_dict[alg_key], expected_key_json) + + # Expect a single arbitrary key to be returned. + assertExactlyOneOtk(alice, "alice_dev_1", "alg1:k1", "alg1:k2") + assertExactlyOneOtk(alice, "alice_dev_2", "alg2:k7", "alg2:k8") + assertExactlyOneOtk(chris, "chris_dev_2", "alg2:k23", "alg2:k24") + + assertAllOtks(alice, "alice_dev_1", "alg2:k3", "alg2:k4") + assertAllOtks(brian, "brian_dev_1", "alg1:k9", "alg1:k10") + assertAllOtks(brian, "brian_dev_2", "alg2:k15", "alg2:k16") + + # Now check the unused key counts. + for user_id, devices in one_time_keys.items(): + for device_id in devices: + counts_by_alg = self.get_success( + self.store.count_e2e_one_time_keys(user_id, device_id) + ) + # Somewhat fiddley to compute the expected count dict. + expected_counts_by_alg = { + "signed_curve25519": 0, + } + for alg in ["alg1", "alg2"]: + claim_count = claims_to_make.get((user_id, device_id, alg), 0) + remaining_count = max(0, 2 - claim_count) + if remaining_count > 0: + expected_counts_by_alg[alg] = remaining_count + + self.assertEqual( + counts_by_alg, expected_counts_by_alg, f"{user_id}:{device_id}" + ) + def test_fallback_key(self) -> None: local_user = "@boris:" + self.hs.hostname device_id = "xyz" -- cgit 1.5.1 From cfb6d38c47711b8dfaf0125353aec88d16708b97 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 31 Oct 2023 13:13:28 -0400 Subject: Remove remaining usage of cursor_to_dict. (#16564) --- changelog.d/16564.misc | 1 + synapse/handlers/admin.py | 2 +- synapse/handlers/room_list.py | 43 ++++++------ synapse/handlers/room_summary.py | 26 +++---- synapse/rest/admin/media.py | 6 +- synapse/rest/admin/registration_tokens.py | 13 +++- synapse/rest/admin/rooms.py | 11 ++- synapse/rest/admin/users.py | 10 ++- synapse/storage/background_updates.py | 14 ++-- synapse/storage/database.py | 15 ---- synapse/storage/databases/main/__init__.py | 52 +++++++++++--- synapse/storage/databases/main/devices.py | 55 ++++++++++----- synapse/storage/databases/main/media_repository.py | 48 ++++++++++--- synapse/storage/databases/main/registration.py | 42 +++++++---- synapse/storage/databases/main/room.py | 82 ++++++++++++++++++---- tests/handlers/test_register.py | 22 +++--- tests/storage/test_main.py | 4 +- tests/storage/test_room.py | 11 +-- 18 files changed, 300 insertions(+), 157 deletions(-) create mode 100644 changelog.d/16564.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16564.misc b/changelog.d/16564.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/16564.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 2c2baeac67..d06f8e3296 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -283,7 +283,7 @@ class AdminHandler: start, limit, user_id ) for media in media_ids: - writer.write_media_id(media["media_id"], media) + writer.write_media_id(media.media_id, attr.asdict(media)) logger.info( "[%s] Written %d media_ids of %s", diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 36e2db8975..2947e154be 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -33,6 +33,7 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) +from synapse.storage.databases.main.room import LargestRoomStats from synapse.types import JsonDict, JsonMapping, ThirdPartyInstanceID from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.caches.response_cache import ResponseCache @@ -170,26 +171,24 @@ class RoomListHandler: ignore_non_federatable=from_federation, ) - def build_room_entry(room: JsonDict) -> JsonDict: + def build_room_entry(room: LargestRoomStats) -> JsonDict: entry = { - "room_id": room["room_id"], - "name": room["name"], - "topic": room["topic"], - "canonical_alias": room["canonical_alias"], - "num_joined_members": room["joined_members"], - "avatar_url": room["avatar"], - "world_readable": room["history_visibility"] + "room_id": room.room_id, + "name": room.name, + "topic": room.topic, + "canonical_alias": room.canonical_alias, + "num_joined_members": room.joined_members, + "avatar_url": room.avatar, + "world_readable": room.history_visibility == HistoryVisibility.WORLD_READABLE, - "guest_can_join": room["guest_access"] == "can_join", - "join_rule": room["join_rules"], - "room_type": room["room_type"], + "guest_can_join": room.guest_access == "can_join", + "join_rule": room.join_rules, + "room_type": room.room_type, } # Filter out Nones – rather omit the field altogether return {k: v for k, v in entry.items() if v is not None} - results = [build_room_entry(r) for r in results] - response: JsonDict = {} num_results = len(results) if limit is not None: @@ -212,33 +211,33 @@ class RoomListHandler: # If there was a token given then we assume that there # must be previous results. response["prev_batch"] = RoomListNextBatch( - last_joined_members=initial_entry["num_joined_members"], - last_room_id=initial_entry["room_id"], + last_joined_members=initial_entry.joined_members, + last_room_id=initial_entry.room_id, direction_is_forward=False, ).to_token() if more_to_come: response["next_batch"] = RoomListNextBatch( - last_joined_members=final_entry["num_joined_members"], - last_room_id=final_entry["room_id"], + last_joined_members=final_entry.joined_members, + last_room_id=final_entry.room_id, direction_is_forward=True, ).to_token() else: if has_batch_token: response["next_batch"] = RoomListNextBatch( - last_joined_members=final_entry["num_joined_members"], - last_room_id=final_entry["room_id"], + last_joined_members=final_entry.joined_members, + last_room_id=final_entry.room_id, direction_is_forward=True, ).to_token() if more_to_come: response["prev_batch"] = RoomListNextBatch( - last_joined_members=initial_entry["num_joined_members"], - last_room_id=initial_entry["room_id"], + last_joined_members=initial_entry.joined_members, + last_room_id=initial_entry.room_id, direction_is_forward=False, ).to_token() - response["chunk"] = results + response["chunk"] = [build_room_entry(r) for r in results] response["total_room_count_estimate"] = await self.store.count_public_rooms( network_tuple, diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index dd559b4c45..1dfb12e065 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -703,24 +703,24 @@ class RoomSummaryHandler: # there should always be an entry assert stats is not None, "unable to retrieve stats for %s" % (room_id,) - entry = { - "room_id": stats["room_id"], - "name": stats["name"], - "topic": stats["topic"], - "canonical_alias": stats["canonical_alias"], - "num_joined_members": stats["joined_members"], - "avatar_url": stats["avatar"], - "join_rule": stats["join_rules"], + entry: JsonDict = { + "room_id": stats.room_id, + "name": stats.name, + "topic": stats.topic, + "canonical_alias": stats.canonical_alias, + "num_joined_members": stats.joined_members, + "avatar_url": stats.avatar, + "join_rule": stats.join_rules, "world_readable": ( - stats["history_visibility"] == HistoryVisibility.WORLD_READABLE + stats.history_visibility == HistoryVisibility.WORLD_READABLE ), - "guest_can_join": stats["guest_access"] == "can_join", - "room_type": stats["room_type"], + "guest_can_join": stats.guest_access == "can_join", + "room_type": stats.room_type, } if self._msc3266_enabled: - entry["im.nheko.summary.version"] = stats["version"] - entry["im.nheko.summary.encryption"] = stats["encryption"] + entry["im.nheko.summary.version"] = stats.version + entry["im.nheko.summary.encryption"] = stats.encryption # Federation requests need to provide additional information so the # requested server is able to filter the response appropriately. diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index b7637dff0b..8cf5268854 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -17,6 +17,8 @@ import logging from http import HTTPStatus from typing import TYPE_CHECKING, Optional, Tuple +import attr + from synapse.api.constants import Direction from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer @@ -418,7 +420,7 @@ class UserMediaRestServlet(RestServlet): start, limit, user_id, order_by, direction ) - ret = {"media": media, "total": total} + ret = {"media": [attr.asdict(m) for m in media], "total": total} if (start + limit) < total: ret["next_token"] = start + len(media) @@ -477,7 +479,7 @@ class UserMediaRestServlet(RestServlet): ) deleted_media, total = await self.media_repository.delete_local_media_ids( - [row["media_id"] for row in media] + [m.media_id for m in media] ) return HTTPStatus.OK, {"deleted_media": deleted_media, "total": total} diff --git a/synapse/rest/admin/registration_tokens.py b/synapse/rest/admin/registration_tokens.py index ffce92d45e..f3e06d3da3 100644 --- a/synapse/rest/admin/registration_tokens.py +++ b/synapse/rest/admin/registration_tokens.py @@ -77,7 +77,18 @@ class ListRegistrationTokensRestServlet(RestServlet): await assert_requester_is_admin(self.auth, request) valid = parse_boolean(request, "valid") token_list = await self.store.get_registration_tokens(valid) - return HTTPStatus.OK, {"registration_tokens": token_list} + return HTTPStatus.OK, { + "registration_tokens": [ + { + "token": t[0], + "uses_allowed": t[1], + "pending": t[2], + "completed": t[3], + "expiry_time": t[4], + } + for t in token_list + ] + } class NewRegistrationTokenRestServlet(RestServlet): diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 0659f22a89..23a034522c 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -16,6 +16,8 @@ from http import HTTPStatus from typing import TYPE_CHECKING, List, Optional, Tuple, cast from urllib import parse as urlparse +import attr + from synapse.api.constants import Direction, EventTypes, JoinRules, Membership from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.api.filtering import Filter @@ -306,10 +308,13 @@ class RoomRestServlet(RestServlet): raise NotFoundError("Room not found") members = await self.store.get_users_in_room(room_id) - ret["joined_local_devices"] = await self.store.count_devices_by_users(members) - ret["forgotten"] = await self.store.is_locally_forgotten_room(room_id) + result = attr.asdict(ret) + result["joined_local_devices"] = await self.store.count_devices_by_users( + members + ) + result["forgotten"] = await self.store.is_locally_forgotten_room(room_id) - return HTTPStatus.OK, ret + return HTTPStatus.OK, result async def on_DELETE( self, request: SynapseRequest, room_id: str diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 7fe16130e7..73878dd99d 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -18,6 +18,8 @@ import secrets from http import HTTPStatus from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +import attr + from synapse.api.constants import Direction, UserTypes from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.servlet import ( @@ -161,11 +163,13 @@ class UsersRestServletV2(RestServlet): ) # If support for MSC3866 is not enabled, don't show the approval flag. + filter = None if not self._msc3866_enabled: - for user in users: - del user["approved"] - ret = {"users": users, "total": total} + def _filter(a: attr.Attribute) -> bool: + return a.name != "approved" + + ret = {"users": [attr.asdict(u, filter=filter) for u in users], "total": total} if (start + limit) < total: ret["next_token"] = str(start + len(users)) diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 12829d3d7d..7426dbcad6 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -28,6 +28,7 @@ from typing import ( Sequence, Tuple, Type, + cast, ) import attr @@ -488,14 +489,14 @@ class BackgroundUpdater: True if we have finished running all the background updates, otherwise False """ - def get_background_updates_txn(txn: Cursor) -> List[Dict[str, Any]]: + def get_background_updates_txn(txn: Cursor) -> List[Tuple[str, Optional[str]]]: txn.execute( """ SELECT update_name, depends_on FROM background_updates ORDER BY ordering, update_name """ ) - return self.db_pool.cursor_to_dict(txn) + return cast(List[Tuple[str, Optional[str]]], txn.fetchall()) if not self._current_background_update: all_pending_updates = await self.db_pool.runInteraction( @@ -507,14 +508,13 @@ class BackgroundUpdater: return True # find the first update which isn't dependent on another one in the queue. - pending = {update["update_name"] for update in all_pending_updates} - for upd in all_pending_updates: - depends_on = upd["depends_on"] + pending = {update_name for update_name, depends_on in all_pending_updates} + for update_name, depends_on in all_pending_updates: if not depends_on or depends_on not in pending: break logger.info( "Not starting on bg update %s until %s is done", - upd["update_name"], + update_name, depends_on, ) else: @@ -524,7 +524,7 @@ class BackgroundUpdater: "another: dependency cycle?" ) - self._current_background_update = upd["update_name"] + self._current_background_update = update_name # We have a background update to run, otherwise we would have returned # early. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index a4e7048368..6d54bb0eb2 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -18,7 +18,6 @@ import logging import time import types from collections import defaultdict -from sys import intern from time import monotonic as monotonic_time from typing import ( TYPE_CHECKING, @@ -1042,20 +1041,6 @@ class DatabasePool: self._db_pool.runWithConnection(inner_func, *args, **kwargs) ) - @staticmethod - def cursor_to_dict(cursor: Cursor) -> List[Dict[str, Any]]: - """Converts a SQL cursor into an list of dicts. - - Args: - cursor: The DBAPI cursor which has executed a query. - Returns: - A list of dicts where the key is the column header. - """ - assert cursor.description is not None, "cursor.description was None" - col_headers = [intern(str(column[0])) for column in cursor.description] - results = [dict(zip(col_headers, row)) for row in cursor] - return results - async def execute(self, desc: str, query: str, *args: Any) -> List[Tuple[Any, ...]]: """Runs a single query for a result set. diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 840d725114..89f4077351 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -17,6 +17,8 @@ import logging from typing import TYPE_CHECKING, List, Optional, Tuple, Union, cast +import attr + from synapse.api.constants import Direction from synapse.config.homeserver import HomeServerConfig from synapse.storage._base import make_in_list_sql_clause @@ -28,7 +30,7 @@ from synapse.storage.database import ( from synapse.storage.databases.main.stats import UserSortOrder from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.types import Cursor -from synapse.types import JsonDict, get_domain_from_id +from synapse.types import get_domain_from_id from .account_data import AccountDataStore from .appservice import ApplicationServiceStore, ApplicationServiceTransactionStore @@ -82,6 +84,25 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +@attr.s(slots=True, frozen=True, auto_attribs=True) +class UserPaginateResponse: + """This is very similar to UserInfo, but not quite the same.""" + + name: str + user_type: Optional[str] + is_guest: bool + admin: bool + deactivated: bool + shadow_banned: bool + displayname: Optional[str] + avatar_url: Optional[str] + creation_ts: Optional[int] + approved: bool + erased: bool + last_seen_ts: int + locked: bool + + class DataStore( EventsBackgroundUpdatesStore, ExperimentalFeaturesStore, @@ -156,7 +177,7 @@ class DataStore( approved: bool = True, not_user_types: Optional[List[str]] = None, locked: bool = False, - ) -> Tuple[List[JsonDict], int]: + ) -> Tuple[List[UserPaginateResponse], int]: """Function to retrieve a paginated list of users from users list. This will return a json list of users and the total number of users matching the filter criteria. @@ -182,7 +203,7 @@ class DataStore( def get_users_paginate_txn( txn: LoggingTransaction, - ) -> Tuple[List[JsonDict], int]: + ) -> Tuple[List[UserPaginateResponse], int]: filters = [] args: list = [] @@ -282,13 +303,24 @@ class DataStore( """ args += [limit, start] txn.execute(sql, args) - users = self.db_pool.cursor_to_dict(txn) - - # some of those boolean values are returned as integers when we're on SQLite - columns_to_boolify = ["erased"] - for user in users: - for column in columns_to_boolify: - user[column] = bool(user[column]) + users = [ + UserPaginateResponse( + name=row[0], + user_type=row[1], + is_guest=bool(row[2]), + admin=bool(row[3]), + deactivated=bool(row[4]), + shadow_banned=bool(row[5]), + displayname=row[6], + avatar_url=row[7], + creation_ts=row[8], + approved=bool(row[9]), + erased=bool(row[10]), + last_seen_ts=row[11], + locked=bool(row[12]), + ) + for row in txn + ] return users, count diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 49edbb9e06..b0811a4cf1 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1620,7 +1620,6 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): # # For each duplicate, we delete all the existing rows and put one back. - KEY_COLS = ["stream_id", "destination", "user_id", "device_id"] last_row = progress.get( "last_row", {"stream_id": 0, "destination": "", "user_id": "", "device_id": ""}, @@ -1628,44 +1627,62 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): def _txn(txn: LoggingTransaction) -> int: clause, args = make_tuple_comparison_clause( - [(x, last_row[x]) for x in KEY_COLS] + [ + ("stream_id", last_row["stream_id"]), + ("destination", last_row["destination"]), + ("user_id", last_row["user_id"]), + ("device_id", last_row["device_id"]), + ] ) - sql = """ + sql = f""" SELECT stream_id, destination, user_id, device_id, MAX(ts) AS ts FROM device_lists_outbound_pokes - WHERE %s - GROUP BY %s + WHERE {clause} + GROUP BY stream_id, destination, user_id, device_id HAVING count(*) > 1 - ORDER BY %s + ORDER BY stream_id, destination, user_id, device_id LIMIT ? - """ % ( - clause, # WHERE - ",".join(KEY_COLS), # GROUP BY - ",".join(KEY_COLS), # ORDER BY - ) + """ txn.execute(sql, args + [batch_size]) - rows = self.db_pool.cursor_to_dict(txn) + rows = txn.fetchall() - row = None - for row in rows: + stream_id, destination, user_id, device_id = None, None, None, None + for stream_id, destination, user_id, device_id, _ in rows: self.db_pool.simple_delete_txn( txn, "device_lists_outbound_pokes", - {x: row[x] for x in KEY_COLS}, + { + "stream_id": stream_id, + "destination": destination, + "user_id": user_id, + "device_id": device_id, + }, ) - row["sent"] = False self.db_pool.simple_insert_txn( txn, "device_lists_outbound_pokes", - row, + { + "stream_id": stream_id, + "destination": destination, + "user_id": user_id, + "device_id": device_id, + "sent": False, + }, ) - if row: + if rows: self.db_pool.updates._background_update_progress_txn( txn, BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES, - {"last_row": row}, + { + "last_row": { + "stream_id": stream_id, + "destination": destination, + "user_id": user_id, + "device_id": device_id, + } + }, ) return len(rows) diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index aeb3db596c..c8d7c9fd32 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -26,6 +26,8 @@ from typing import ( cast, ) +import attr + from synapse.api.constants import Direction from synapse.logging.opentracing import trace from synapse.media._base import ThumbnailInfo @@ -45,6 +47,18 @@ BG_UPDATE_REMOVE_MEDIA_REPO_INDEX_WITHOUT_METHOD_2 = ( ) +@attr.s(slots=True, frozen=True, auto_attribs=True) +class LocalMedia: + media_id: str + media_type: str + media_length: int + upload_name: str + created_ts: int + last_access_ts: int + quarantined_by: Optional[str] + safe_from_quarantine: bool + + class MediaSortOrder(Enum): """ Enum to define the sorting method used when returning media with @@ -180,7 +194,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): user_id: str, order_by: str = MediaSortOrder.CREATED_TS.value, direction: Direction = Direction.FORWARDS, - ) -> Tuple[List[Dict[str, Any]], int]: + ) -> Tuple[List[LocalMedia], int]: """Get a paginated list of metadata for a local piece of media which an user_id has uploaded @@ -197,7 +211,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): def get_local_media_by_user_paginate_txn( txn: LoggingTransaction, - ) -> Tuple[List[Dict[str, Any]], int]: + ) -> Tuple[List[LocalMedia], int]: # Set ordering order_by_column = MediaSortOrder(order_by).value @@ -217,14 +231,14 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): sql = """ SELECT - "media_id", - "media_type", - "media_length", - "upload_name", - "created_ts", - "last_access_ts", - "quarantined_by", - "safe_from_quarantine" + media_id, + media_type, + media_length, + upload_name, + created_ts, + last_access_ts, + quarantined_by, + safe_from_quarantine FROM local_media_repository WHERE user_id = ? ORDER BY {order_by_column} {order}, media_id ASC @@ -236,7 +250,19 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): args += [limit, start] txn.execute(sql, args) - media = self.db_pool.cursor_to_dict(txn) + media = [ + LocalMedia( + media_id=row[0], + media_type=row[1], + media_length=row[2], + upload_name=row[3], + created_ts=row[4], + last_access_ts=row[5], + quarantined_by=row[6], + safe_from_quarantine=bool(row[7]), + ) + for row in txn + ] return media, count return await self.db_pool.runInteraction( diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index e09ab21593..933d76e905 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -1517,7 +1517,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): async def get_registration_tokens( self, valid: Optional[bool] = None - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[str, Optional[int], int, int, Optional[int]]]: """List all registration tokens. Used by the admin API. Args: @@ -1526,34 +1526,48 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): Default is None: return all tokens regardless of validity. Returns: - A list of dicts, each containing details of a token. + A list of tuples containing: + * The token + * The number of users allowed (or None) + * Whether it is pending + * Whether it has been completed + * An expiry time (or None if no expiry) """ def select_registration_tokens_txn( txn: LoggingTransaction, now: int, valid: Optional[bool] - ) -> List[Dict[str, Any]]: + ) -> List[Tuple[str, Optional[int], int, int, Optional[int]]]: if valid is None: # Return all tokens regardless of validity - txn.execute("SELECT * FROM registration_tokens") + txn.execute( + """ + SELECT token, uses_allowed, pending, completed, expiry_time + FROM registration_tokens + """ + ) elif valid: # Select valid tokens only - sql = ( - "SELECT * FROM registration_tokens WHERE " - "(uses_allowed > pending + completed OR uses_allowed IS NULL) " - "AND (expiry_time > ? OR expiry_time IS NULL)" - ) + sql = """ + SELECT token, uses_allowed, pending, completed, expiry_time + FROM registration_tokens + WHERE (uses_allowed > pending + completed OR uses_allowed IS NULL) + AND (expiry_time > ? OR expiry_time IS NULL) + """ txn.execute(sql, [now]) else: # Select invalid tokens only - sql = ( - "SELECT * FROM registration_tokens WHERE " - "uses_allowed <= pending + completed OR expiry_time <= ?" - ) + sql = """ + SELECT token, uses_allowed, pending, completed, expiry_time + FROM registration_tokens + WHERE uses_allowed <= pending + completed OR expiry_time <= ? + """ txn.execute(sql, [now]) - return self.db_pool.cursor_to_dict(txn) + return cast( + List[Tuple[str, Optional[int], int, int, Optional[int]]], txn.fetchall() + ) return await self.db_pool.runInteraction( "select_registration_tokens", diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 3e8fcf1975..6d4b9891e7 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -78,6 +78,31 @@ class RatelimitOverride: burst_count: int +@attr.s(slots=True, frozen=True, auto_attribs=True) +class LargestRoomStats: + room_id: str + name: Optional[str] + canonical_alias: Optional[str] + joined_members: int + join_rules: Optional[str] + guest_access: Optional[str] + history_visibility: Optional[str] + state_events: int + avatar: Optional[str] + topic: Optional[str] + room_type: Optional[str] + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class RoomStats(LargestRoomStats): + joined_local_members: int + version: Optional[str] + creator: Optional[str] + encryption: Optional[str] + federatable: bool + public: bool + + class RoomSortOrder(Enum): """ Enum to define the sorting method used when returning rooms with get_rooms_paginate @@ -204,7 +229,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): allow_none=True, ) - async def get_room_with_stats(self, room_id: str) -> Optional[Dict[str, Any]]: + async def get_room_with_stats(self, room_id: str) -> Optional[RoomStats]: """Retrieve room with statistics. Args: @@ -215,7 +240,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): def get_room_with_stats_txn( txn: LoggingTransaction, room_id: str - ) -> Optional[Dict[str, Any]]: + ) -> Optional[RoomStats]: sql = """ SELECT room_id, state.name, state.canonical_alias, curr.joined_members, curr.local_users_in_room AS joined_local_members, rooms.room_version AS version, @@ -229,15 +254,28 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): WHERE room_id = ? """ txn.execute(sql, [room_id]) - # Catch error if sql returns empty result to return "None" instead of an error - try: - res = self.db_pool.cursor_to_dict(txn)[0] - except IndexError: + row = txn.fetchone() + if not row: return None - - res["federatable"] = bool(res["federatable"]) - res["public"] = bool(res["public"]) - return res + return RoomStats( + room_id=row[0], + name=row[1], + canonical_alias=row[2], + joined_members=row[3], + joined_local_members=row[4], + version=row[5], + creator=row[6], + encryption=row[7], + federatable=bool(row[8]), + public=bool(row[9]), + join_rules=row[10], + guest_access=row[11], + history_visibility=row[12], + state_events=row[13], + avatar=row[14], + topic=row[15], + room_type=row[16], + ) return await self.db_pool.runInteraction( "get_room_with_stats", get_room_with_stats_txn, room_id @@ -368,7 +406,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): bounds: Optional[Tuple[int, str]], forwards: bool, ignore_non_federatable: bool = False, - ) -> List[Dict[str, Any]]: + ) -> List[LargestRoomStats]: """Gets the largest public rooms (where largest is in terms of joined members, as tracked in the statistics table). @@ -505,20 +543,34 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): def _get_largest_public_rooms_txn( txn: LoggingTransaction, - ) -> List[Dict[str, Any]]: + ) -> List[LargestRoomStats]: txn.execute(sql, query_args) - results = self.db_pool.cursor_to_dict(txn) + results = [ + LargestRoomStats( + room_id=r[0], + name=r[1], + canonical_alias=r[3], + joined_members=r[4], + join_rules=r[8], + guest_access=r[7], + history_visibility=r[6], + state_events=0, + avatar=r[5], + topic=r[2], + room_type=r[9], + ) + for r in txn + ] if not forwards: results.reverse() return results - ret_val = await self.db_pool.runInteraction( + return await self.db_pool.runInteraction( "get_largest_public_rooms", _get_largest_public_rooms_txn ) - return ret_val @cached(max_entries=10000) async def is_room_blocked(self, room_id: str) -> Optional[bool]: diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index e9fbf32c7c..032b89d684 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -342,10 +342,10 @@ class RegistrationTestCase(unittest.HomeserverTestCase): # Ensure the room is properly not federated. room = self.get_success(self.store.get_room_with_stats(room_id["room_id"])) assert room is not None - self.assertFalse(room["federatable"]) - self.assertFalse(room["public"]) - self.assertEqual(room["join_rules"], "public") - self.assertIsNone(room["guest_access"]) + self.assertFalse(room.federatable) + self.assertFalse(room.public) + self.assertEqual(room.join_rules, "public") + self.assertIsNone(room.guest_access) # The user should be in the room. rooms = self.get_success(self.store.get_rooms_for_user(user_id)) @@ -372,7 +372,7 @@ class RegistrationTestCase(unittest.HomeserverTestCase): # Ensure the room is properly a public room. room = self.get_success(self.store.get_room_with_stats(room_id["room_id"])) assert room is not None - self.assertEqual(room["join_rules"], "public") + self.assertEqual(room.join_rules, "public") # Both users should be in the room. rooms = self.get_success(self.store.get_rooms_for_user(inviter)) @@ -411,9 +411,9 @@ class RegistrationTestCase(unittest.HomeserverTestCase): # Ensure the room is properly a private room. room = self.get_success(self.store.get_room_with_stats(room_id["room_id"])) assert room is not None - self.assertFalse(room["public"]) - self.assertEqual(room["join_rules"], "invite") - self.assertEqual(room["guest_access"], "can_join") + self.assertFalse(room.public) + self.assertEqual(room.join_rules, "invite") + self.assertEqual(room.guest_access, "can_join") # Both users should be in the room. rooms = self.get_success(self.store.get_rooms_for_user(inviter)) @@ -455,9 +455,9 @@ class RegistrationTestCase(unittest.HomeserverTestCase): # Ensure the room is properly a private room. room = self.get_success(self.store.get_room_with_stats(room_id["room_id"])) assert room is not None - self.assertFalse(room["public"]) - self.assertEqual(room["join_rules"], "invite") - self.assertEqual(room["guest_access"], "can_join") + self.assertFalse(room.public) + self.assertEqual(room.join_rules, "invite") + self.assertEqual(room.guest_access, "can_join") # Both users should be in the room. rooms = self.get_success(self.store.get_rooms_for_user(inviter)) diff --git a/tests/storage/test_main.py b/tests/storage/test_main.py index b8823d6993..01c0e5e671 100644 --- a/tests/storage/test_main.py +++ b/tests/storage/test_main.py @@ -39,11 +39,11 @@ class DataStoreTestCase(unittest.HomeserverTestCase): ) self.assertEqual(1, total) - self.assertEqual(self.displayname, users.pop()["displayname"]) + self.assertEqual(self.displayname, users.pop().displayname) users, total = self.get_success( self.store.get_users_paginate(0, 10, name="BC", guests=False) ) self.assertEqual(1, total) - self.assertEqual(self.displayname, users.pop()["displayname"]) + self.assertEqual(self.displayname, users.pop().displayname) diff --git a/tests/storage/test_room.py b/tests/storage/test_room.py index 1e27f2c275..ce34195a25 100644 --- a/tests/storage/test_room.py +++ b/tests/storage/test_room.py @@ -59,14 +59,9 @@ class RoomStoreTestCase(HomeserverTestCase): def test_get_room_with_stats(self) -> None: res = self.get_success(self.store.get_room_with_stats(self.room.to_string())) assert res is not None - self.assertLessEqual( - { - "room_id": self.room.to_string(), - "creator": self.u_creator.to_string(), - "public": True, - }.items(), - res.items(), - ) + self.assertEqual(res.room_id, self.room.to_string()) + self.assertEqual(res.creator, self.u_creator.to_string()) + self.assertTrue(res.public) def test_get_room_with_stats_unknown_room(self) -> None: self.assertIsNone( -- cgit 1.5.1 From 0afbef30cfb28fbee09989b0a089c86352126ad2 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 2 Nov 2023 09:41:00 -0400 Subject: Use simple_select_many_txn in event persistance code. (#16585) Just to standardize on the normal helpers, it might also have a slight perf improvement on PostgreSQL which will now use `ANY (?)` instead of `IN (?, ?, ...)`. --- changelog.d/16585.misc | 1 + synapse/storage/databases/main/events.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 changelog.d/16585.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16585.misc b/changelog.d/16585.misc new file mode 100644 index 0000000000..01f3ecc843 --- /dev/null +++ b/changelog.d/16585.misc @@ -0,0 +1 @@ +Use standard SQL helpers in persistence code. \ No newline at end of file diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 3c1492e3ad..b74ff1c498 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1350,13 +1350,19 @@ class PersistEventsStore: PartialStateConflictError: if attempting to persist a partial state event in a room that has been un-partial stated. """ - txn.execute( - "SELECT event_id, outlier FROM events WHERE event_id in (%s)" - % (",".join(["?"] * len(events_and_contexts)),), - [event.event_id for event, _ in events_and_contexts], + rows = cast( + List[Tuple[str, bool]], + self.db_pool.simple_select_many_txn( + txn, + "events", + "event_id", + [event.event_id for event, _ in events_and_contexts], + keyvalues={}, + retcols=("event_id", "outlier"), + ), ) - have_persisted = dict(cast(Iterable[Tuple[str, bool]], txn)) + have_persisted = dict(rows) logger.debug( "_update_outliers_txn: events=%s have_persisted=%s", -- cgit 1.5.1 From 92828a7f958b2cb1925e2a64ed08c2efb6293787 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 3 Nov 2023 07:30:31 -0400 Subject: Simplify event persistence code (#16584) The event persistence code used to handle multiple rooms at a time, but was simplified to only ever be called with a single room at a time (different rooms are now handled in parallel). The code is still generic to multiple rooms causing a lot of work that is unnecessary (e.g. unnecessary loops, and partitioning data by room). This strips out the ability to handle multiple rooms at once, greatly simplifying the code. --- changelog.d/16584.misc | 1 + changelog.d/16586.misc | 1 + synapse/storage/controllers/persist_events.py | 252 +++++++++-------- synapse/storage/databases/main/events.py | 384 ++++++++++++++------------ 4 files changed, 326 insertions(+), 312 deletions(-) create mode 100644 changelog.d/16584.misc create mode 100644 changelog.d/16586.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16584.misc b/changelog.d/16584.misc new file mode 100644 index 0000000000..beec8f2301 --- /dev/null +++ b/changelog.d/16584.misc @@ -0,0 +1 @@ +Simplify persistance code to be per-room. diff --git a/changelog.d/16586.misc b/changelog.d/16586.misc new file mode 100644 index 0000000000..f02c4a2060 --- /dev/null +++ b/changelog.d/16586.misc @@ -0,0 +1 @@ +Avoid updating the stream cache unnecessarily. diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index f39ae2d635..1529c86cc5 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -542,13 +542,15 @@ class EventsPersistenceStorageController: return await res.get_state(self._state_controller, StateFilter.all()) async def _persist_event_batch( - self, _room_id: str, task: _PersistEventsTask + self, room_id: str, task: _PersistEventsTask ) -> Dict[str, str]: """Callback for the _event_persist_queue Calculates the change to current state and forward extremities, and persists the given events and with those updates. + Assumes that we are only persisting events for one room at a time. + Returns: A dictionary of event ID to event ID we didn't persist as we already had another event persisted with the same TXN ID. @@ -594,140 +596,23 @@ class EventsPersistenceStorageController: # We can't easily parallelize these since different chunks # might contain the same event. :( - # NB: Assumes that we are only persisting events for one room - # at a time. - - # map room_id->set[event_ids] giving the new forward - # extremities in each room - new_forward_extremities: Dict[str, Set[str]] = {} - - # map room_id->(to_delete, to_insert) where to_delete is a list - # of type/state keys to remove from current state, and to_insert - # is a map (type,key)->event_id giving the state delta in each - # room - state_delta_for_room: Dict[str, DeltaState] = {} + new_forward_extremities = None + state_delta_for_room = None if not backfilled: with Measure(self._clock, "_calculate_state_and_extrem"): - # Work out the new "current state" for each room. + # Work out the new "current state" for the room. # We do this by working out what the new extremities are and then # calculating the state from that. - events_by_room: Dict[str, List[Tuple[EventBase, EventContext]]] = {} - for event, context in chunk: - events_by_room.setdefault(event.room_id, []).append( - (event, context) - ) - - for room_id, ev_ctx_rm in events_by_room.items(): - latest_event_ids = ( - await self.main_store.get_latest_event_ids_in_room(room_id) - ) - new_latest_event_ids = await self._calculate_new_extremities( - room_id, ev_ctx_rm, latest_event_ids - ) - - if new_latest_event_ids == latest_event_ids: - # No change in extremities, so no change in state - continue - - # there should always be at least one forward extremity. - # (except during the initial persistence of the send_join - # results, in which case there will be no existing - # extremities, so we'll `continue` above and skip this bit.) - assert new_latest_event_ids, "No forward extremities left!" - - new_forward_extremities[room_id] = new_latest_event_ids - - len_1 = ( - len(latest_event_ids) == 1 - and len(new_latest_event_ids) == 1 - ) - if len_1: - all_single_prev_not_state = all( - len(event.prev_event_ids()) == 1 - and not event.is_state() - for event, ctx in ev_ctx_rm - ) - # Don't bother calculating state if they're just - # a long chain of single ancestor non-state events. - if all_single_prev_not_state: - continue - - state_delta_counter.inc() - if len(new_latest_event_ids) == 1: - state_delta_single_event_counter.inc() - - # This is a fairly handwavey check to see if we could - # have guessed what the delta would have been when - # processing one of these events. - # What we're interested in is if the latest extremities - # were the same when we created the event as they are - # now. When this server creates a new event (as opposed - # to receiving it over federation) it will use the - # forward extremities as the prev_events, so we can - # guess this by looking at the prev_events and checking - # if they match the current forward extremities. - for ev, _ in ev_ctx_rm: - prev_event_ids = set(ev.prev_event_ids()) - if latest_event_ids == prev_event_ids: - state_delta_reuse_delta_counter.inc() - break - - logger.debug("Calculating state delta for room %s", room_id) - with Measure( - self._clock, "persist_events.get_new_state_after_events" - ): - res = await self._get_new_state_after_events( - room_id, - ev_ctx_rm, - latest_event_ids, - new_latest_event_ids, - ) - current_state, delta_ids, new_latest_event_ids = res - - # there should always be at least one forward extremity. - # (except during the initial persistence of the send_join - # results, in which case there will be no existing - # extremities, so we'll `continue` above and skip this bit.) - assert new_latest_event_ids, "No forward extremities left!" - - new_forward_extremities[room_id] = new_latest_event_ids - - # If either are not None then there has been a change, - # and we need to work out the delta (or use that - # given) - delta = None - if delta_ids is not None: - # If there is a delta we know that we've - # only added or replaced state, never - # removed keys entirely. - delta = DeltaState([], delta_ids) - elif current_state is not None: - with Measure( - self._clock, "persist_events.calculate_state_delta" - ): - delta = await self._calculate_state_delta( - room_id, current_state - ) - - if delta: - # If we have a change of state then lets check - # whether we're actually still a member of the room, - # or if our last user left. If we're no longer in - # the room then we delete the current state and - # extremities. - is_still_joined = await self._is_server_still_joined( - room_id, - ev_ctx_rm, - delta, - ) - if not is_still_joined: - logger.info("Server no longer in room %s", room_id) - delta.no_longer_in_room = True - - state_delta_for_room[room_id] = delta + ( + new_forward_extremities, + state_delta_for_room, + ) = await self._calculate_new_forward_extremities_and_state_delta( + room_id, chunk + ) await self.persist_events_store._persist_events_and_state_updates( + room_id, chunk, state_delta_for_room=state_delta_for_room, new_forward_extremities=new_forward_extremities, @@ -737,6 +622,117 @@ class EventsPersistenceStorageController: return replaced_events + async def _calculate_new_forward_extremities_and_state_delta( + self, room_id: str, ev_ctx_rm: List[Tuple[EventBase, EventContext]] + ) -> Tuple[Optional[Set[str]], Optional[DeltaState]]: + """Calculates the new forward extremities and state delta for a room + given events to persist. + + Assumes that we are only persisting events for one room at a time. + + Returns: + A tuple of: + A set of str giving the new forward extremities the room + + The state delta for the room. + """ + + latest_event_ids = await self.main_store.get_latest_event_ids_in_room(room_id) + new_latest_event_ids = await self._calculate_new_extremities( + room_id, ev_ctx_rm, latest_event_ids + ) + + if new_latest_event_ids == latest_event_ids: + # No change in extremities, so no change in state + return (None, None) + + # there should always be at least one forward extremity. + # (except during the initial persistence of the send_join + # results, in which case there will be no existing + # extremities, so we'll `continue` above and skip this bit.) + assert new_latest_event_ids, "No forward extremities left!" + + new_forward_extremities = new_latest_event_ids + + len_1 = len(latest_event_ids) == 1 and len(new_latest_event_ids) == 1 + if len_1: + all_single_prev_not_state = all( + len(event.prev_event_ids()) == 1 and not event.is_state() + for event, ctx in ev_ctx_rm + ) + # Don't bother calculating state if they're just + # a long chain of single ancestor non-state events. + if all_single_prev_not_state: + return (new_forward_extremities, None) + + state_delta_counter.inc() + if len(new_latest_event_ids) == 1: + state_delta_single_event_counter.inc() + + # This is a fairly handwavey check to see if we could + # have guessed what the delta would have been when + # processing one of these events. + # What we're interested in is if the latest extremities + # were the same when we created the event as they are + # now. When this server creates a new event (as opposed + # to receiving it over federation) it will use the + # forward extremities as the prev_events, so we can + # guess this by looking at the prev_events and checking + # if they match the current forward extremities. + for ev, _ in ev_ctx_rm: + prev_event_ids = set(ev.prev_event_ids()) + if latest_event_ids == prev_event_ids: + state_delta_reuse_delta_counter.inc() + break + + logger.debug("Calculating state delta for room %s", room_id) + with Measure(self._clock, "persist_events.get_new_state_after_events"): + res = await self._get_new_state_after_events( + room_id, + ev_ctx_rm, + latest_event_ids, + new_latest_event_ids, + ) + current_state, delta_ids, new_latest_event_ids = res + + # there should always be at least one forward extremity. + # (except during the initial persistence of the send_join + # results, in which case there will be no existing + # extremities, so we'll `continue` above and skip this bit.) + assert new_latest_event_ids, "No forward extremities left!" + + new_forward_extremities = new_latest_event_ids + + # If either are not None then there has been a change, + # and we need to work out the delta (or use that + # given) + delta = None + if delta_ids is not None: + # If there is a delta we know that we've + # only added or replaced state, never + # removed keys entirely. + delta = DeltaState([], delta_ids) + elif current_state is not None: + with Measure(self._clock, "persist_events.calculate_state_delta"): + delta = await self._calculate_state_delta(room_id, current_state) + + if delta: + # If we have a change of state then lets check + # whether we're actually still a member of the room, + # or if our last user left. If we're no longer in + # the room then we delete the current state and + # extremities. + is_still_joined = await self._is_server_still_joined( + room_id, + ev_ctx_rm, + delta, + ) + if not is_still_joined: + logger.info("Server no longer in room %s", room_id) + delta.no_longer_in_room = True + + return (new_forward_extremities, delta) + async def _calculate_new_extremities( self, room_id: str, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b74ff1c498..647ba182f6 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -79,7 +79,7 @@ class DeltaState: Attributes: to_delete: List of type/state_keys to delete from current state to_insert: Map of state to upsert into current state - no_longer_in_room: The server is not longer in the room, so the room + no_longer_in_room: The server is no longer in the room, so the room should e.g. be removed from `current_state_events` table. """ @@ -131,22 +131,25 @@ class PersistEventsStore: @trace async def _persist_events_and_state_updates( self, + room_id: str, events_and_contexts: List[Tuple[EventBase, EventContext]], *, - state_delta_for_room: Dict[str, DeltaState], - new_forward_extremities: Dict[str, Set[str]], + state_delta_for_room: Optional[DeltaState], + new_forward_extremities: Optional[Set[str]], use_negative_stream_ordering: bool = False, inhibit_local_membership_updates: bool = False, ) -> None: """Persist a set of events alongside updates to the current state and - forward extremities tables. + forward extremities tables. + + Assumes that we are only persisting events for one room at a time. Args: + room_id: events_and_contexts: - state_delta_for_room: Map from room_id to the delta to apply to - room state - new_forward_extremities: Map from room_id to set of event IDs - that are the new forward extremities of the room. + state_delta_for_room: The delta to apply to the room state + new_forward_extremities: A set of event IDs that are the new forward + extremities of the room. use_negative_stream_ordering: Whether to start stream_ordering on the negative side and decrement. This should be set as True for backfilled events because backfilled events get a negative @@ -196,6 +199,7 @@ class PersistEventsStore: await self.db_pool.runInteraction( "persist_events", self._persist_events_txn, + room_id=room_id, events_and_contexts=events_and_contexts, inhibit_local_membership_updates=inhibit_local_membership_updates, state_delta_for_room=state_delta_for_room, @@ -221,9 +225,9 @@ class PersistEventsStore: event_counter.labels(event.type, origin_type, origin_entity).inc() - for room_id, latest_event_ids in new_forward_extremities.items(): + if new_forward_extremities: self.store.get_latest_event_ids_in_room.prefill( - (room_id,), frozenset(latest_event_ids) + (room_id,), frozenset(new_forward_extremities) ) async def _get_events_which_are_prevs(self, event_ids: Iterable[str]) -> List[str]: @@ -336,10 +340,11 @@ class PersistEventsStore: self, txn: LoggingTransaction, *, + room_id: str, events_and_contexts: List[Tuple[EventBase, EventContext]], inhibit_local_membership_updates: bool, - state_delta_for_room: Dict[str, DeltaState], - new_forward_extremities: Dict[str, Set[str]], + state_delta_for_room: Optional[DeltaState], + new_forward_extremities: Optional[Set[str]], ) -> None: """Insert some number of room events into the necessary database tables. @@ -347,8 +352,11 @@ class PersistEventsStore: and the rejections table. Things reading from those table will need to check whether the event was rejected. + Assumes that we are only persisting events for one room at a time. + Args: txn + room_id: The room the events are from events_and_contexts: events to persist inhibit_local_membership_updates: Stop the local_current_membership from being updated by these events. This should be set to True @@ -357,10 +365,9 @@ class PersistEventsStore: delete_existing True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. - state_delta_for_room: The current-state delta for each room. - new_forward_extremities: The new forward extremities for each room. - For each room, a list of the event ids which are the forward - extremities. + state_delta_for_room: The current-state delta for the room. + new_forward_extremities: The new forward extremities for the room: + a set of the event ids which are the forward extremities. Raises: PartialStateConflictError: if attempting to persist a partial state event in @@ -376,14 +383,13 @@ class PersistEventsStore: # # Annoyingly SQLite doesn't support row level locking. if isinstance(self.database_engine, PostgresEngine): - for room_id in {e.room_id for e, _ in events_and_contexts}: - txn.execute( - "SELECT room_version FROM rooms WHERE room_id = ? FOR SHARE", - (room_id,), - ) - row = txn.fetchone() - if row is None: - raise Exception(f"Room does not exist {room_id}") + txn.execute( + "SELECT room_version FROM rooms WHERE room_id = ? FOR SHARE", + (room_id,), + ) + row = txn.fetchone() + if row is None: + raise Exception(f"Room does not exist {room_id}") # stream orderings should have been assigned by now assert min_stream_order @@ -419,7 +425,9 @@ class PersistEventsStore: events_and_contexts ) - self._update_room_depths_txn(txn, events_and_contexts=events_and_contexts) + self._update_room_depths_txn( + txn, room_id, events_and_contexts=events_and_contexts + ) # _update_outliers_txn filters out any events which have already been # persisted, and returns the filtered list. @@ -432,11 +440,13 @@ class PersistEventsStore: self._store_event_txn(txn, events_and_contexts=events_and_contexts) - self._update_forward_extremities_txn( - txn, - new_forward_extremities=new_forward_extremities, - max_stream_order=max_stream_order, - ) + if new_forward_extremities: + self._update_forward_extremities_txn( + txn, + room_id, + new_forward_extremities=new_forward_extremities, + max_stream_order=max_stream_order, + ) self._persist_transaction_ids_txn(txn, events_and_contexts) @@ -464,7 +474,10 @@ class PersistEventsStore: # We call this last as it assumes we've inserted the events into # room_memberships, where applicable. # NB: This function invalidates all state related caches - self._update_current_state_txn(txn, state_delta_for_room, min_stream_order) + if state_delta_for_room: + self._update_current_state_txn( + txn, room_id, state_delta_for_room, min_stream_order + ) def _persist_event_auth_chain_txn( self, @@ -1026,74 +1039,75 @@ class PersistEventsStore: await self.db_pool.runInteraction( "update_current_state", self._update_current_state_txn, - state_delta_by_room={room_id: state_delta}, + room_id, + delta_state=state_delta, stream_id=stream_ordering, ) def _update_current_state_txn( self, txn: LoggingTransaction, - state_delta_by_room: Dict[str, DeltaState], + room_id: str, + delta_state: DeltaState, stream_id: int, ) -> None: - for room_id, delta_state in state_delta_by_room.items(): - to_delete = delta_state.to_delete - to_insert = delta_state.to_insert - - # Figure out the changes of membership to invalidate the - # `get_rooms_for_user` cache. - # We find out which membership events we may have deleted - # and which we have added, then we invalidate the caches for all - # those users. - members_changed = { - state_key - for ev_type, state_key in itertools.chain(to_delete, to_insert) - if ev_type == EventTypes.Member - } + to_delete = delta_state.to_delete + to_insert = delta_state.to_insert + + # Figure out the changes of membership to invalidate the + # `get_rooms_for_user` cache. + # We find out which membership events we may have deleted + # and which we have added, then we invalidate the caches for all + # those users. + members_changed = { + state_key + for ev_type, state_key in itertools.chain(to_delete, to_insert) + if ev_type == EventTypes.Member + } - if delta_state.no_longer_in_room: - # Server is no longer in the room so we delete the room from - # current_state_events, being careful we've already updated the - # rooms.room_version column (which gets populated in a - # background task). - self._upsert_room_version_txn(txn, room_id) + if delta_state.no_longer_in_room: + # Server is no longer in the room so we delete the room from + # current_state_events, being careful we've already updated the + # rooms.room_version column (which gets populated in a + # background task). + self._upsert_room_version_txn(txn, room_id) - # Before deleting we populate the current_state_delta_stream - # so that async background tasks get told what happened. - sql = """ + # Before deleting we populate the current_state_delta_stream + # so that async background tasks get told what happened. + sql = """ INSERT INTO current_state_delta_stream (stream_id, instance_name, room_id, type, state_key, event_id, prev_event_id) SELECT ?, ?, room_id, type, state_key, null, event_id FROM current_state_events WHERE room_id = ? """ - txn.execute(sql, (stream_id, self._instance_name, room_id)) + txn.execute(sql, (stream_id, self._instance_name, room_id)) - # We also want to invalidate the membership caches for users - # that were in the room. - users_in_room = self.store.get_users_in_room_txn(txn, room_id) - members_changed.update(users_in_room) + # We also want to invalidate the membership caches for users + # that were in the room. + users_in_room = self.store.get_users_in_room_txn(txn, room_id) + members_changed.update(users_in_room) - self.db_pool.simple_delete_txn( - txn, - table="current_state_events", - keyvalues={"room_id": room_id}, - ) - else: - # We're still in the room, so we update the current state as normal. + self.db_pool.simple_delete_txn( + txn, + table="current_state_events", + keyvalues={"room_id": room_id}, + ) + else: + # We're still in the room, so we update the current state as normal. - # First we add entries to the current_state_delta_stream. We - # do this before updating the current_state_events table so - # that we can use it to calculate the `prev_event_id`. (This - # allows us to not have to pull out the existing state - # unnecessarily). - # - # The stream_id for the update is chosen to be the minimum of the stream_ids - # for the batch of the events that we are persisting; that means we do not - # end up in a situation where workers see events before the - # current_state_delta updates. - # - sql = """ + # First we add entries to the current_state_delta_stream. We + # do this before updating the current_state_events table so + # that we can use it to calculate the `prev_event_id`. (This + # allows us to not have to pull out the existing state + # unnecessarily). + # + # The stream_id for the update is chosen to be the minimum of the stream_ids + # for the batch of the events that we are persisting; that means we do not + # end up in a situation where workers see events before the + # current_state_delta updates. + # + sql = """ INSERT INTO current_state_delta_stream (stream_id, instance_name, room_id, type, state_key, event_id, prev_event_id) SELECT ?, ?, ?, ?, ?, ?, ( @@ -1101,39 +1115,39 @@ class PersistEventsStore: WHERE room_id = ? AND type = ? AND state_key = ? ) """ - txn.execute_batch( - sql, + txn.execute_batch( + sql, + ( ( - ( - stream_id, - self._instance_name, - room_id, - etype, - state_key, - to_insert.get((etype, state_key)), - room_id, - etype, - state_key, - ) - for etype, state_key in itertools.chain(to_delete, to_insert) - ), - ) - # Now we actually update the current_state_events table + stream_id, + self._instance_name, + room_id, + etype, + state_key, + to_insert.get((etype, state_key)), + room_id, + etype, + state_key, + ) + for etype, state_key in itertools.chain(to_delete, to_insert) + ), + ) + # Now we actually update the current_state_events table - txn.execute_batch( - "DELETE FROM current_state_events" - " WHERE room_id = ? AND type = ? AND state_key = ?", - ( - (room_id, etype, state_key) - for etype, state_key in itertools.chain(to_delete, to_insert) - ), - ) + txn.execute_batch( + "DELETE FROM current_state_events" + " WHERE room_id = ? AND type = ? AND state_key = ?", + ( + (room_id, etype, state_key) + for etype, state_key in itertools.chain(to_delete, to_insert) + ), + ) - # We include the membership in the current state table, hence we do - # a lookup when we insert. This assumes that all events have already - # been inserted into room_memberships. - txn.execute_batch( - """INSERT INTO current_state_events + # We include the membership in the current state table, hence we do + # a lookup when we insert. This assumes that all events have already + # been inserted into room_memberships. + txn.execute_batch( + """INSERT INTO current_state_events (room_id, type, state_key, event_id, membership, event_stream_ordering) VALUES ( ?, ?, ?, ?, @@ -1141,34 +1155,34 @@ class PersistEventsStore: (SELECT stream_ordering FROM events WHERE event_id = ?) ) """, - [ - (room_id, key[0], key[1], ev_id, ev_id, ev_id) - for key, ev_id in to_insert.items() - ], - ) + [ + (room_id, key[0], key[1], ev_id, ev_id, ev_id) + for key, ev_id in to_insert.items() + ], + ) - # We now update `local_current_membership`. We do this regardless - # of whether we're still in the room or not to handle the case where - # e.g. we just got banned (where we need to record that fact here). - - # Note: Do we really want to delete rows here (that we do not - # subsequently reinsert below)? While technically correct it means - # we have no record of the fact the user *was* a member of the - # room but got, say, state reset out of it. - if to_delete or to_insert: - txn.execute_batch( - "DELETE FROM local_current_membership" - " WHERE room_id = ? AND user_id = ?", - ( - (room_id, state_key) - for etype, state_key in itertools.chain(to_delete, to_insert) - if etype == EventTypes.Member and self.is_mine_id(state_key) - ), - ) + # We now update `local_current_membership`. We do this regardless + # of whether we're still in the room or not to handle the case where + # e.g. we just got banned (where we need to record that fact here). - if to_insert: - txn.execute_batch( - """INSERT INTO local_current_membership + # Note: Do we really want to delete rows here (that we do not + # subsequently reinsert below)? While technically correct it means + # we have no record of the fact the user *was* a member of the + # room but got, say, state reset out of it. + if to_delete or to_insert: + txn.execute_batch( + "DELETE FROM local_current_membership" + " WHERE room_id = ? AND user_id = ?", + ( + (room_id, state_key) + for etype, state_key in itertools.chain(to_delete, to_insert) + if etype == EventTypes.Member and self.is_mine_id(state_key) + ), + ) + + if to_insert: + txn.execute_batch( + """INSERT INTO local_current_membership (room_id, user_id, event_id, membership, event_stream_ordering) VALUES ( ?, ?, ?, @@ -1176,29 +1190,27 @@ class PersistEventsStore: (SELECT stream_ordering FROM events WHERE event_id = ?) ) """, - [ - (room_id, key[1], ev_id, ev_id, ev_id) - for key, ev_id in to_insert.items() - if key[0] == EventTypes.Member and self.is_mine_id(key[1]) - ], - ) - - txn.call_after( - self.store._curr_state_delta_stream_cache.entity_has_changed, - room_id, - stream_id, + [ + (room_id, key[1], ev_id, ev_id, ev_id) + for key, ev_id in to_insert.items() + if key[0] == EventTypes.Member and self.is_mine_id(key[1]) + ], ) - # Invalidate the various caches - self.store._invalidate_state_caches_and_stream( - txn, room_id, members_changed - ) + txn.call_after( + self.store._curr_state_delta_stream_cache.entity_has_changed, + room_id, + stream_id, + ) - # Check if any of the remote membership changes requires us to - # unsubscribe from their device lists. - self.store.handle_potentially_left_users_txn( - txn, {m for m in members_changed if not self.hs.is_mine_id(m)} - ) + # Invalidate the various caches + self.store._invalidate_state_caches_and_stream(txn, room_id, members_changed) + + # Check if any of the remote membership changes requires us to + # unsubscribe from their device lists. + self.store.handle_potentially_left_users_txn( + txn, {m for m in members_changed if not self.hs.is_mine_id(m)} + ) def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str) -> None: """Update the room version in the database based off current state @@ -1232,23 +1244,19 @@ class PersistEventsStore: def _update_forward_extremities_txn( self, txn: LoggingTransaction, - new_forward_extremities: Dict[str, Set[str]], + room_id: str, + new_forward_extremities: Set[str], max_stream_order: int, ) -> None: - for room_id in new_forward_extremities.keys(): - self.db_pool.simple_delete_txn( - txn, table="event_forward_extremities", keyvalues={"room_id": room_id} - ) + self.db_pool.simple_delete_txn( + txn, table="event_forward_extremities", keyvalues={"room_id": room_id} + ) self.db_pool.simple_insert_many_txn( txn, table="event_forward_extremities", keys=("event_id", "room_id"), - values=[ - (ev_id, room_id) - for room_id, new_extrem in new_forward_extremities.items() - for ev_id in new_extrem - ], + values=[(ev_id, room_id) for ev_id in new_forward_extremities], ) # We now insert into stream_ordering_to_exterm a mapping from room_id, # new stream_ordering to new forward extremeties in the room. @@ -1260,8 +1268,7 @@ class PersistEventsStore: keys=("room_id", "event_id", "stream_ordering"), values=[ (room_id, event_id, max_stream_order) - for room_id, new_extrem in new_forward_extremities.items() - for event_id in new_extrem + for event_id in new_forward_extremities ], ) @@ -1298,36 +1305,45 @@ class PersistEventsStore: def _update_room_depths_txn( self, txn: LoggingTransaction, + room_id: str, events_and_contexts: List[Tuple[EventBase, EventContext]], ) -> None: """Update min_depth for each room Args: txn: db connection + room_id: The room ID events_and_contexts: events we are persisting """ - depth_updates: Dict[str, int] = {} + stream_ordering: Optional[int] = None + depth_update = 0 for event, context in events_and_contexts: - # Then update the `stream_ordering` position to mark the latest - # event as the front of the room. This should not be done for - # backfilled events because backfilled events have negative - # stream_ordering and happened in the past so we know that we don't - # need to update the stream_ordering tip/front for the room. + # Don't update the stream ordering for backfilled events because + # backfilled events have negative stream_ordering and happened in the + # past, so we know that we don't need to update the stream_ordering + # tip/front for the room. assert event.internal_metadata.stream_ordering is not None if event.internal_metadata.stream_ordering >= 0: - txn.call_after( - self.store._events_stream_cache.entity_has_changed, - event.room_id, - event.internal_metadata.stream_ordering, - ) + if stream_ordering is None: + stream_ordering = event.internal_metadata.stream_ordering + else: + stream_ordering = max( + stream_ordering, event.internal_metadata.stream_ordering + ) if not event.internal_metadata.is_outlier() and not context.rejected: - depth_updates[event.room_id] = max( - event.depth, depth_updates.get(event.room_id, event.depth) - ) + depth_update = max(event.depth, depth_update) + + # Then update the `stream_ordering` position to mark the latest event as + # the front of the room. + if stream_ordering is not None: + txn.call_after( + self.store._events_stream_cache.entity_has_changed, + room_id, + stream_ordering, + ) - for room_id, depth in depth_updates.items(): - self._update_min_depth_for_room_txn(txn, room_id, depth) + self._update_min_depth_for_room_txn(txn, room_id, depth_update) def _update_outliers_txn( self, -- cgit 1.5.1 From 7e5d3b06fa8b6ce3676eb1178d7db0e252d48679 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 6 Nov 2023 15:41:57 -0500 Subject: Collect information for PushRuleEvaluator in parallel. (#16590) Fetch information needed for push rule evaluation in parallel. Ideally this would use query pipelining, but this is not available in psycopg2. Due to the database thread pool this may result in little to no parallelization. --- changelog.d/16590.misc | 1 + synapse/push/bulk_push_rule_evaluator.py | 56 +++++++++++++++++++++-------- synapse/storage/databases/main/push_rule.py | 50 ++++++++++++++++---------- synapse/util/async_helpers.py | 14 ++++++++ 4 files changed, 87 insertions(+), 34 deletions(-) create mode 100644 changelog.d/16590.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16590.misc b/changelog.d/16590.misc new file mode 100644 index 0000000000..6db04b0c98 --- /dev/null +++ b/changelog.d/16590.misc @@ -0,0 +1 @@ +Run push rule evaluator setup in parallel. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 14784312dc..5934b1ef34 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -25,10 +25,13 @@ from typing import ( Sequence, Tuple, Union, + cast, ) from prometheus_client import Counter +from twisted.internet.defer import Deferred + from synapse.api.constants import ( MAIN_TIMELINE, EventContentFields, @@ -40,11 +43,15 @@ from synapse.api.room_versions import PushRuleRoomFlag from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext +from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership +from synapse.storage.roommember import ProfileInfo from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator from synapse.types import JsonValue from synapse.types.state import StateFilter +from synapse.util import unwrapFirstError +from synapse.util.async_helpers import gather_results from synapse.util.caches import register_cache from synapse.util.metrics import measure_func from synapse.visibility import filter_event_for_clients_with_state @@ -342,15 +349,41 @@ class BulkPushRuleEvaluator: rules_by_user = await self._get_rules_for_event(event) actions_by_user: Dict[str, Collection[Union[Mapping, str]]] = {} - room_member_count = await self.store.get_number_joined_users_in_room( - event.room_id - ) - + # Gather a bunch of info in parallel. + # + # This has a lot of ignored types and casting due to the use of @cached + # decorated functions passed into run_in_background. + # + # See https://github.com/matrix-org/synapse/issues/16606 ( - power_levels, - sender_power_level, - ) = await self._get_power_levels_and_sender_level( - event, context, event_id_to_event + room_member_count, + (power_levels, sender_power_level), + related_events, + profiles, + ) = await make_deferred_yieldable( + cast( + "Deferred[Tuple[int, Tuple[dict, Optional[int]], Dict[str, Dict[str, JsonValue]], Mapping[str, ProfileInfo]]]", + gather_results( + ( + run_in_background( # type: ignore[call-arg] + self.store.get_number_joined_users_in_room, event.room_id # type: ignore[arg-type] + ), + run_in_background( + self._get_power_levels_and_sender_level, + event, + context, + event_id_to_event, + ), + run_in_background(self._related_events, event), + run_in_background( # type: ignore[call-arg] + self.store.get_subset_users_in_room_with_profiles, + event.room_id, # type: ignore[arg-type] + rules_by_user.keys(), # type: ignore[arg-type] + ), + ), + consumeErrors=True, + ).addErrback(unwrapFirstError), + ) ) # Find the event's thread ID. @@ -366,8 +399,6 @@ class BulkPushRuleEvaluator: # the parent is part of a thread. thread_id = await self.store.get_thread_id(relation.parent_id) - related_events = await self._related_events(event) - # It's possible that old room versions have non-integer power levels (floats or # strings; even the occasional `null`). For old rooms, we interpret these as if # they were integers. Do this here for the `@room` power level threshold. @@ -400,11 +431,6 @@ class BulkPushRuleEvaluator: self.hs.config.experimental.msc1767_enabled, # MSC3931 flag ) - users = rules_by_user.keys() - profiles = await self.store.get_subset_users_in_room_with_profiles( - event.room_id, users - ) - for uid, rules in rules_by_user.items(): if event.sender == uid: continue diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 22025eca56..37135d431d 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -28,8 +28,11 @@ from typing import ( cast, ) +from twisted.internet import defer + from synapse.api.errors import StoreError from synapse.config.homeserver import ExperimentalConfig +from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.replication.tcp.streams import PushRulesStream from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( @@ -51,7 +54,8 @@ from synapse.storage.util.id_generators import ( ) from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRules from synapse.types import JsonDict -from synapse.util import json_encoder +from synapse.util import json_encoder, unwrapFirstError +from synapse.util.async_helpers import gather_results from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -249,23 +253,33 @@ class PushRulesWorkerStore( user_id: [] for user_id in user_ids } - rows = cast( - List[Tuple[str, str, int, int, str, str]], - await self.db_pool.simple_select_many_batch( - table="push_rules", - column="user_name", - iterable=user_ids, - retcols=( - "user_name", - "rule_id", - "priority_class", - "priority", - "conditions", - "actions", + # gatherResults loses all type information. + rows, enabled_map_by_user = await make_deferred_yieldable( + gather_results( + ( + cast( + "defer.Deferred[List[Tuple[str, str, int, int, str, str]]]", + run_in_background( + self.db_pool.simple_select_many_batch, + table="push_rules", + column="user_name", + iterable=user_ids, + retcols=( + "user_name", + "rule_id", + "priority_class", + "priority", + "conditions", + "actions", + ), + desc="bulk_get_push_rules", + batch_size=1000, + ), + ), + run_in_background(self.bulk_get_push_rules_enabled, user_ids), ), - desc="bulk_get_push_rules", - batch_size=1000, - ), + consumeErrors=True, + ).addErrback(unwrapFirstError) ) # Sort by highest priority_class, then highest priority. @@ -276,8 +290,6 @@ class PushRulesWorkerStore( (rule_id, priority_class, conditions, actions) ) - enabled_map_by_user = await self.bulk_get_push_rules_enabled(user_ids) - results: Dict[str, FilteredPushRules] = {} for user_id, rules in raw_rules.items(): diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index 0cbeb0c365..8a55e4e41d 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -345,6 +345,7 @@ async def yieldable_gather_results_delaying_cancellation( T1 = TypeVar("T1") T2 = TypeVar("T2") T3 = TypeVar("T3") +T4 = TypeVar("T4") @overload @@ -380,6 +381,19 @@ def gather_results( ... +@overload +def gather_results( + deferredList: Tuple[ + "defer.Deferred[T1]", + "defer.Deferred[T2]", + "defer.Deferred[T3]", + "defer.Deferred[T4]", + ], + consumeErrors: bool = ..., +) -> "defer.Deferred[Tuple[T1, T2, T3, T4]]": + ... + + def gather_results( # type: ignore[misc] deferredList: Tuple["defer.Deferred[T1]", ...], consumeErrors: bool = False, -- cgit 1.5.1 From 9738b1c4975b293a1bc25ee27b5527724038baa1 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 7 Nov 2023 14:00:25 -0500 Subject: Avoid executing no-op queries. (#16583) If simple_{insert,upsert,update}_many_txn is called without any data to modify then return instead of executing the query. This matches the behavior of simple_{select,delete}_many_txn. --- changelog.d/16583.misc | 1 + synapse/storage/database.py | 32 ++++++++++++++++++++++--------- synapse/storage/databases/main/devices.py | 2 +- synapse/storage/databases/main/events.py | 12 ++++++------ synapse/storage/databases/main/room.py | 2 +- synapse/storage/databases/main/search.py | 4 ++-- tests/storage/test_base.py | 25 +++++------------------- 7 files changed, 39 insertions(+), 39 deletions(-) create mode 100644 changelog.d/16583.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/16583.misc b/changelog.d/16583.misc new file mode 100644 index 0000000000..df5b27b112 --- /dev/null +++ b/changelog.d/16583.misc @@ -0,0 +1 @@ +Avoid executing no-op queries. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index abc7d8a5d2..792f2e7cdf 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1117,7 +1117,7 @@ class DatabasePool: txn: LoggingTransaction, table: str, keys: Collection[str], - values: Iterable[Iterable[Any]], + values: Collection[Iterable[Any]], ) -> None: """Executes an INSERT query on the named table. @@ -1130,6 +1130,9 @@ class DatabasePool: keys: list of column names values: for each row, a list of values in the same order as `keys` """ + # If there's nothing to insert, then skip executing the query. + if not values: + return if isinstance(txn.database_engine, PostgresEngine): # We use `execute_values` as it can be a lot faster than `execute_batch`, @@ -1455,7 +1458,7 @@ class DatabasePool: key_names: Collection[str], key_values: Collection[Iterable[Any]], value_names: Collection[str], - value_values: Iterable[Iterable[Any]], + value_values: Collection[Iterable[Any]], ) -> None: """ Upsert, many times. @@ -1468,6 +1471,19 @@ class DatabasePool: value_values: A list of each row's value column values. Ignored if value_names is empty. """ + # If there's nothing to upsert, then skip executing the query. + if not key_values: + return + + # No value columns, therefore make a blank list so that the following + # zip() works correctly. + if not value_names: + value_values = [() for x in range(len(key_values))] + elif len(value_values) != len(key_values): + raise ValueError( + f"{len(key_values)} key rows and {len(value_values)} value rows: should be the same number." + ) + if table not in self._unsafe_to_upsert_tables: return self.simple_upsert_many_txn_native_upsert( txn, table, key_names, key_values, value_names, value_values @@ -1502,10 +1518,6 @@ class DatabasePool: value_values: A list of each row's value column values. Ignored if value_names is empty. """ - # No value columns, therefore make a blank list so that the following - # zip() works correctly. - if not value_names: - value_values = [() for x in range(len(key_values))] # Lock the table just once, to prevent it being done once per row. # Note that, according to Postgres' documentation, once obtained, @@ -1543,10 +1555,7 @@ class DatabasePool: allnames.extend(value_names) if not value_names: - # No value columns, therefore make a blank list so that the - # following zip() works correctly. latter = "NOTHING" - value_values = [() for x in range(len(key_values))] else: latter = "UPDATE SET " + ", ".join( k + "=EXCLUDED." + k for k in value_names @@ -1910,6 +1919,7 @@ class DatabasePool: Returns: The results as a list of tuples. """ + # If there's nothing to select, then skip executing the query. if not iterable: return [] @@ -2044,6 +2054,9 @@ class DatabasePool: raise ValueError( f"{len(key_values)} key rows and {len(value_values)} value rows: should be the same number." ) + # If there is nothing to update, then skip executing the query. + if not key_values: + return # List of tuples of (value values, then key values) # (This matches the order needed for the query) @@ -2278,6 +2291,7 @@ class DatabasePool: Returns: Number rows deleted """ + # If there's nothing to delete, then skip executing the query. if not values: return 0 diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index b0811a4cf1..04d12a876c 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -703,7 +703,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): key_names=("destination", "user_id"), key_values=[(destination, user_id) for user_id, _ in rows], value_names=("stream_id",), - value_values=((stream_id,) for _, stream_id in rows), + value_values=[(stream_id,) for _, stream_id in rows], ) # Delete all sent outbound pokes diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 647ba182f6..7c34bde3e5 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1476,7 +1476,7 @@ class PersistEventsStore: txn, table="event_json", keys=("event_id", "room_id", "internal_metadata", "json", "format_version"), - values=( + values=[ ( event.event_id, event.room_id, @@ -1485,7 +1485,7 @@ class PersistEventsStore: event.format_version, ) for event, _ in events_and_contexts - ), + ], ) self.db_pool.simple_insert_many_txn( @@ -1508,7 +1508,7 @@ class PersistEventsStore: "state_key", "rejection_reason", ), - values=( + values=[ ( self._instance_name, event.internal_metadata.stream_ordering, @@ -1527,7 +1527,7 @@ class PersistEventsStore: context.rejected, ) for event, context in events_and_contexts - ), + ], ) # If we're persisting an unredacted event we go and ensure @@ -1550,11 +1550,11 @@ class PersistEventsStore: txn, table="state_events", keys=("event_id", "room_id", "type", "state_key"), - values=( + values=[ (event.event_id, event.room_id, event.type, event.state_key) for event, _ in events_and_contexts if event.is_state() - ), + ], ) def _store_rejected_events_txn( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 6d4b9891e7..afb880532e 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -2268,7 +2268,7 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): txn, table="partial_state_rooms_servers", keys=("room_id", "server_name"), - values=((room_id, s) for s in servers), + values=[(room_id, s) for s in servers], ) self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,)) self._invalidate_cache_and_stream( diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index dbde9130c6..f4bef4c99b 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -106,7 +106,7 @@ class SearchWorkerStore(SQLBaseStore): txn, table="event_search", keys=("event_id", "room_id", "key", "value"), - values=( + values=[ ( entry.event_id, entry.room_id, @@ -114,7 +114,7 @@ class SearchWorkerStore(SQLBaseStore): _clean_value_for_search(entry.value), ) for entry in entries - ), + ], ) else: diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index b4c490b568..de4fcfe026 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -189,17 +189,9 @@ class SQLBaseStoreTestCase(unittest.TestCase): ) if USE_POSTGRES_FOR_TESTS: - self.mock_execute_values.assert_called_once_with( - self.mock_txn, - "INSERT INTO tablename (col1, col2) VALUES ?", - [], - template=None, - fetch=False, - ) + self.mock_execute_values.assert_not_called() else: - self.mock_txn.executemany.assert_called_once_with( - "INSERT INTO tablename (col1, col2) VALUES(?, ?)", [] - ) + self.mock_txn.executemany.assert_not_called() @defer.inlineCallbacks def test_select_one_1col(self) -> Generator["defer.Deferred[object]", object, None]: @@ -393,7 +385,7 @@ class SQLBaseStoreTestCase(unittest.TestCase): ) @defer.inlineCallbacks - def test_update_many_no_values( + def test_update_many_no_iterable( self, ) -> Generator["defer.Deferred[object]", object, None]: yield defer.ensureDeferred( @@ -408,16 +400,9 @@ class SQLBaseStoreTestCase(unittest.TestCase): ) if USE_POSTGRES_FOR_TESTS: - self.mock_execute_batch.assert_called_once_with( - self.mock_txn, - "UPDATE tablename SET col3 = ? WHERE col1 = ? AND col2 = ?", - [], - ) + self.mock_execute_batch.assert_not_called() else: - self.mock_txn.executemany.assert_called_once_with( - "UPDATE tablename SET col3 = ? WHERE col1 = ? AND col2 = ?", - [], - ) + self.mock_txn.executemany.assert_not_called() @defer.inlineCallbacks def test_delete_one(self) -> Generator["defer.Deferred[object]", object, None]: -- cgit 1.5.1