From 147f098fb4ac7ae435bae7d29c05f93b43472854 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 10 May 2022 15:35:08 +0100 Subject: Stop writing to `event_reference_hashes` (#12679) This table is never read, since #11794. We stop writing to it; in future we can drop it altogether. --- synapse/storage/databases/main/events.py | 25 ------------------------- synapse/storage/databases/main/purge_events.py | 3 --- 2 files changed, 28 deletions(-) (limited to 'synapse/storage/databases') diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ed29a0a5e2..ad611b2c0b 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -36,7 +36,6 @@ from prometheus_client import Counter import synapse.metrics from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.room_versions import RoomVersions -from synapse.crypto.event_signing import compute_event_reference_hash from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 from synapse.storage._base import db_to_json, make_in_list_sql_clause @@ -1600,11 +1599,6 @@ class PersistEventsStore: inhibit_local_membership_updates=inhibit_local_membership_updates, ) - # Insert event_reference_hashes table. - self._store_event_reference_hashes_txn( - txn, [event for event, _ in events_and_contexts] - ) - # Prefill the event cache self._add_to_cache(txn, events_and_contexts) @@ -1704,25 +1698,6 @@ class PersistEventsStore: values={"event_id": event_id, "expiry_ts": expiry_ts}, ) - def _store_event_reference_hashes_txn(self, txn, events): - """Store a hash for a PDU - Args: - txn (cursor): - events (list): list of Events. - """ - - vals = [] - for event in events: - ref_alg, ref_hash_bytes = compute_event_reference_hash(event) - vals.append((event.event_id, ref_alg, memoryview(ref_hash_bytes))) - - self.db_pool.simple_insert_many_txn( - txn, - table="event_reference_hashes", - keys=("event_id", "algorithm", "hash"), - values=vals, - ) - def _store_room_members_txn( self, txn, events, *, inhibit_local_membership_updates: bool = False ): diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index bfc85b3add..38ba91af4c 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -69,7 +69,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): # event_forward_extremities # event_json # event_push_actions - # event_reference_hashes # event_relations # event_search # event_to_state_groups @@ -220,7 +219,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): "event_auth", "event_edges", "event_forward_extremities", - "event_reference_hashes", "event_relations", "event_search", "rejections", @@ -369,7 +367,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): "event_edges", "event_json", "event_push_actions_staging", - "event_reference_hashes", "event_relations", "event_to_state_groups", "event_auth_chains", -- cgit 1.5.1 From 989fa3309655e2ebd5416f4b09a98edfb1b2caa8 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 10 May 2022 20:07:48 +0200 Subject: Add some type hints to datastore. (#12477) --- changelog.d/12477.misc | 1 + synapse/events/snapshot.py | 3 +- synapse/storage/databases/main/events.py | 156 ++++++++++++++++++++----------- synapse/storage/databases/main/search.py | 33 ++++--- 4 files changed, 122 insertions(+), 71 deletions(-) create mode 100644 changelog.d/12477.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12477.misc b/changelog.d/12477.misc new file mode 100644 index 0000000000..e793d08e5e --- /dev/null +++ b/changelog.d/12477.misc @@ -0,0 +1 @@ +Add some type hints to datastore. \ No newline at end of file diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 46042b2bf7..8120c305df 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -15,6 +15,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple, Union import attr from frozendict import frozendict +from typing_extensions import Literal from twisted.internet.defer import Deferred @@ -106,7 +107,7 @@ class EventContext: incomplete state. """ - rejected: Union[bool, str] = False + rejected: Union[Literal[False], str] = False _state_group: Optional[int] = None state_group_before_event: Optional[int] = None prev_group: Optional[int] = None diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ad611b2c0b..6c12653bb3 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -49,7 +49,7 @@ from synapse.storage.databases.main.search import SearchEntry from synapse.storage.engines.postgres import PostgresEngine from synapse.storage.util.id_generators import AbstractStreamIdGenerator from synapse.storage.util.sequence import SequenceGenerator -from synapse.types import StateMap, get_domain_from_id +from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically @@ -235,7 +235,9 @@ class PersistEventsStore: """ results: List[str] = [] - def _get_events_which_are_prevs_txn(txn, batch): + def _get_events_which_are_prevs_txn( + txn: LoggingTransaction, batch: Collection[str] + ) -> None: sql = """ SELECT prev_event_id, internal_metadata FROM event_edges @@ -285,7 +287,9 @@ class PersistEventsStore: # and their prev events. existing_prevs = set() - def _get_prevs_before_rejected_txn(txn, batch): + def _get_prevs_before_rejected_txn( + txn: LoggingTransaction, batch: Collection[str] + ) -> None: to_recursively_check = batch while to_recursively_check: @@ -515,7 +519,7 @@ class PersistEventsStore: @classmethod def _add_chain_cover_index( cls, - txn, + txn: LoggingTransaction, db_pool: DatabasePool, event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], @@ -809,7 +813,7 @@ class PersistEventsStore: @staticmethod def _allocate_chain_ids( - txn, + txn: LoggingTransaction, db_pool: DatabasePool, event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], @@ -943,7 +947,7 @@ class PersistEventsStore: self, txn: LoggingTransaction, events_and_contexts: List[Tuple[EventBase, EventContext]], - ): + ) -> None: """Persist the mapping from transaction IDs to event IDs (if defined).""" to_insert = [] @@ -997,7 +1001,7 @@ class PersistEventsStore: txn: LoggingTransaction, state_delta_by_room: Dict[str, DeltaState], stream_id: int, - ): + ) -> None: for room_id, delta_state in state_delta_by_room.items(): to_delete = delta_state.to_delete to_insert = delta_state.to_insert @@ -1155,7 +1159,7 @@ class PersistEventsStore: txn, room_id, members_changed ) - def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str): + def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str) -> None: """Update the room version in the database based off current state events. @@ -1189,7 +1193,7 @@ class PersistEventsStore: txn: LoggingTransaction, new_forward_extremities: Dict[str, Set[str]], max_stream_order: int, - ): + ) -> None: for room_id in new_forward_extremities.keys(): self.db_pool.simple_delete_txn( txn, table="event_forward_extremities", keyvalues={"room_id": room_id} @@ -1254,9 +1258,9 @@ class PersistEventsStore: def _update_room_depths_txn( self, - txn, + txn: LoggingTransaction, events_and_contexts: List[Tuple[EventBase, EventContext]], - ): + ) -> None: """Update min_depth for each room Args: @@ -1385,7 +1389,7 @@ class PersistEventsStore: # nothing to do here return - def event_dict(event): + def event_dict(event: EventBase) -> JsonDict: d = event.get_dict() d.pop("redacted", None) d.pop("redacted_because", None) @@ -1476,18 +1480,20 @@ class PersistEventsStore: ), ) - def _store_rejected_events_txn(self, txn, events_and_contexts): + def _store_rejected_events_txn( + self, + txn: LoggingTransaction, + events_and_contexts: List[Tuple[EventBase, EventContext]], + ) -> List[Tuple[EventBase, EventContext]]: """Add rows to the 'rejections' table for received events which were rejected Args: - txn (twisted.enterprise.adbapi.Connection): db connection - events_and_contexts (list[(EventBase, EventContext)]): events - we are persisting + txn: db connection + events_and_contexts: events we are persisting Returns: - list[(EventBase, EventContext)] new list, without the rejected - events. + new list, without the rejected events. """ # Remove the rejected events from the list now that we've added them # to the events table and the events_json table. @@ -1508,7 +1514,7 @@ class PersistEventsStore: events_and_contexts: List[Tuple[EventBase, EventContext]], all_events_and_contexts: List[Tuple[EventBase, EventContext]], inhibit_local_membership_updates: bool = False, - ): + ) -> None: """Update all the miscellaneous tables for new events Args: @@ -1602,7 +1608,11 @@ class PersistEventsStore: # Prefill the event cache self._add_to_cache(txn, events_and_contexts) - def _add_to_cache(self, txn, events_and_contexts): + def _add_to_cache( + self, + txn: LoggingTransaction, + events_and_contexts: List[Tuple[EventBase, EventContext]], + ) -> None: to_prefill = [] rows = [] @@ -1633,7 +1643,7 @@ class PersistEventsStore: if not row["rejects"] and not row["redacts"]: to_prefill.append(EventCacheEntry(event=event, redacted_event=None)) - def prefill(): + def prefill() -> None: for cache_entry in to_prefill: self.store._get_event_cache.set( (cache_entry.event.event_id,), cache_entry @@ -1663,19 +1673,24 @@ class PersistEventsStore: ) def insert_labels_for_event_txn( - self, txn, event_id, labels, room_id, topological_ordering - ): + self, + txn: LoggingTransaction, + event_id: str, + labels: List[str], + room_id: str, + topological_ordering: int, + ) -> None: """Store the mapping between an event's ID and its labels, with one row per (event_id, label) tuple. Args: - txn (LoggingTransaction): The transaction to execute. - event_id (str): The event's ID. - labels (list[str]): A list of text labels. - room_id (str): The ID of the room the event was sent to. - topological_ordering (int): The position of the event in the room's topology. + txn: The transaction to execute. + event_id: The event's ID. + labels: A list of text labels. + room_id: The ID of the room the event was sent to. + topological_ordering: The position of the event in the room's topology. """ - return self.db_pool.simple_insert_many_txn( + self.db_pool.simple_insert_many_txn( txn=txn, table="event_labels", keys=("event_id", "label", "room_id", "topological_ordering"), @@ -1684,25 +1699,32 @@ class PersistEventsStore: ], ) - def _insert_event_expiry_txn(self, txn, event_id, expiry_ts): + def _insert_event_expiry_txn( + self, txn: LoggingTransaction, event_id: str, expiry_ts: int + ) -> None: """Save the expiry timestamp associated with a given event ID. Args: - txn (LoggingTransaction): The database transaction to use. - event_id (str): The event ID the expiry timestamp is associated with. - expiry_ts (int): The timestamp at which to expire (delete) the event. + txn: The database transaction to use. + event_id: The event ID the expiry timestamp is associated with. + expiry_ts: The timestamp at which to expire (delete) the event. """ - return self.db_pool.simple_insert_txn( + self.db_pool.simple_insert_txn( txn=txn, table="event_expiry", values={"event_id": event_id, "expiry_ts": expiry_ts}, ) def _store_room_members_txn( - self, txn, events, *, inhibit_local_membership_updates: bool = False - ): + self, + txn: LoggingTransaction, + events: List[EventBase], + *, + inhibit_local_membership_updates: bool = False, + ) -> None: """ Store a room member in the database. + Args: txn: The transaction to use. events: List of events to store. @@ -1742,6 +1764,7 @@ class PersistEventsStore: ) for event in events: + assert event.internal_metadata.stream_ordering is not None txn.call_after( self.store._membership_stream_cache.entity_has_changed, event.state_key, @@ -1838,7 +1861,9 @@ class PersistEventsStore: (parent_id, event.sender), ) - def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): + def _handle_insertion_event( + self, txn: LoggingTransaction, event: EventBase + ) -> None: """Handles keeping track of insertion events and edges/connections. Part of MSC2716. @@ -1899,7 +1924,7 @@ class PersistEventsStore: }, ) - def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase): + def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase) -> None: """Handles inserting the batch edges/connections between the batch event and an insertion event. Part of MSC2716. @@ -1999,25 +2024,29 @@ class PersistEventsStore: txn, table="event_relations", keyvalues={"event_id": redacted_event_id} ) - def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase): + def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase) -> None: if isinstance(event.content.get("topic"), str): self.store_event_search_txn( txn, event, "content.topic", event.content["topic"] ) - def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase): + def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase) -> None: if isinstance(event.content.get("name"), str): self.store_event_search_txn( txn, event, "content.name", event.content["name"] ) - def _store_room_message_txn(self, txn: LoggingTransaction, event: EventBase): + def _store_room_message_txn( + self, txn: LoggingTransaction, event: EventBase + ) -> None: if isinstance(event.content.get("body"), str): self.store_event_search_txn( txn, event, "content.body", event.content["body"] ) - def _store_retention_policy_for_room_txn(self, txn, event): + def _store_retention_policy_for_room_txn( + self, txn: LoggingTransaction, event: EventBase + ) -> None: if not event.is_state(): logger.debug("Ignoring non-state m.room.retention event") return @@ -2077,8 +2106,11 @@ class PersistEventsStore: ) def _set_push_actions_for_event_and_users_txn( - self, txn, events_and_contexts, all_events_and_contexts - ): + self, + txn: LoggingTransaction, + events_and_contexts: List[Tuple[EventBase, EventContext]], + all_events_and_contexts: List[Tuple[EventBase, EventContext]], + ) -> None: """Handles moving push actions from staging table to main event_push_actions table for all events in `events_and_contexts`. @@ -2086,12 +2118,10 @@ class PersistEventsStore: from the push action staging area. Args: - events_and_contexts (list[(EventBase, EventContext)]): events - we are persisting - all_events_and_contexts (list[(EventBase, EventContext)]): all - events that we were going to persist. This includes events - we've already persisted, etc, that wouldn't appear in - events_and_context. + events_and_contexts: events we are persisting + all_events_and_contexts: all events that we were going to persist. + This includes events we've already persisted, etc, that wouldn't + appear in events_and_context. """ # Only non outlier events will have push actions associated with them, @@ -2160,7 +2190,9 @@ class PersistEventsStore: ), ) - def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id): + def _remove_push_actions_for_event_id_txn( + self, txn: LoggingTransaction, room_id: str, event_id: str + ) -> None: # Sad that we have to blow away the cache for the whole room here txn.call_after( self.store.get_unread_event_push_actions_by_room_for_user.invalidate, @@ -2171,7 +2203,9 @@ class PersistEventsStore: (room_id, event_id), ) - def _store_rejections_txn(self, txn, event_id, reason): + def _store_rejections_txn( + self, txn: LoggingTransaction, event_id: str, reason: str + ) -> None: self.db_pool.simple_insert_txn( txn, table="rejections", @@ -2183,8 +2217,10 @@ class PersistEventsStore: ) def _store_event_state_mappings_txn( - self, txn, events_and_contexts: Iterable[Tuple[EventBase, EventContext]] - ): + self, + txn: LoggingTransaction, + events_and_contexts: Collection[Tuple[EventBase, EventContext]], + ) -> None: state_groups = {} for event, context in events_and_contexts: if event.internal_metadata.is_outlier(): @@ -2241,7 +2277,9 @@ class PersistEventsStore: state_group_id, ) - def _update_min_depth_for_room_txn(self, txn, room_id, depth): + def _update_min_depth_for_room_txn( + self, txn: LoggingTransaction, room_id: str, depth: int + ) -> None: min_depth = self.store._get_min_depth_interaction(txn, room_id) if min_depth is not None and depth >= min_depth: @@ -2254,7 +2292,9 @@ class PersistEventsStore: values={"min_depth": depth}, ) - def _handle_mult_prev_events(self, txn, events): + def _handle_mult_prev_events( + self, txn: LoggingTransaction, events: List[EventBase] + ) -> None: """ For the given event, update the event edges table and forward and backward extremities tables. @@ -2272,7 +2312,9 @@ class PersistEventsStore: self._update_backward_extremeties(txn, events) - def _update_backward_extremeties(self, txn, events): + def _update_backward_extremeties( + self, txn: LoggingTransaction, events: List[EventBase] + ) -> None: """Updates the event_backward_extremities tables based on the new/updated events being persisted. diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 3c49e7ec98..78e0773b2a 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -14,7 +14,7 @@ import logging import re -from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Set +from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Set, Tuple import attr @@ -27,7 +27,7 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.types import JsonDict if TYPE_CHECKING: @@ -149,7 +149,9 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): self.EVENT_SEARCH_DELETE_NON_STRINGS, self._background_delete_non_strings ) - async def _background_reindex_search(self, progress, batch_size): + async def _background_reindex_search( + self, progress: JsonDict, batch_size: int + ) -> int: # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] @@ -157,7 +159,7 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): TYPES = ["m.room.name", "m.room.message", "m.room.topic"] - def reindex_search_txn(txn): + def reindex_search_txn(txn: LoggingTransaction) -> int: sql = ( "SELECT stream_ordering, event_id, room_id, type, json, " " origin_server_ts FROM events" @@ -255,12 +257,14 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): return result - async def _background_reindex_gin_search(self, progress, batch_size): + async def _background_reindex_gin_search( + self, progress: JsonDict, batch_size: int + ) -> int: """This handles old synapses which used GIST indexes, if any; converting them back to be GIN as per the actual schema. """ - def create_index(conn): + def create_index(conn: LoggingDatabaseConnection) -> None: conn.rollback() # we have to set autocommit, because postgres refuses to @@ -299,7 +303,9 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): ) return 1 - async def _background_reindex_search_order(self, progress, batch_size): + async def _background_reindex_search_order( + self, progress: JsonDict, batch_size: int + ) -> int: target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) @@ -307,7 +313,7 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): if not have_added_index: - def create_index(conn): + def create_index(conn: LoggingDatabaseConnection) -> None: conn.rollback() conn.set_session(autocommit=True) c = conn.cursor() @@ -336,7 +342,7 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): pg, ) - def reindex_search_txn(txn): + def reindex_search_txn(txn: LoggingTransaction) -> Tuple[int, bool]: sql = ( "UPDATE event_search AS es SET stream_ordering = e.stream_ordering," " origin_server_ts = e.origin_server_ts" @@ -644,7 +650,8 @@ class SearchStore(SearchBackgroundUpdateStore): else: raise Exception("Unrecognized database engine") - args.append(limit) + # mypy expects to append only a `str`, not an `int` + args.append(limit) # type: ignore[arg-type] results = await self.db_pool.execute( "search_rooms", self.db_pool.cursor_to_dict, sql, *args @@ -705,7 +712,7 @@ class SearchStore(SearchBackgroundUpdateStore): A set of strings. """ - def f(txn): + def f(txn: LoggingTransaction) -> Set[str]: highlight_words = set() for event in events: # As a hack we simply join values of all possible keys. This is @@ -759,11 +766,11 @@ class SearchStore(SearchBackgroundUpdateStore): return await self.db_pool.runInteraction("_find_highlights", f) -def _to_postgres_options(options_dict): +def _to_postgres_options(options_dict: JsonDict) -> str: return "'%s'" % (",".join("%s=%s" % (k, v) for k, v in options_dict.items()),) -def _parse_query(database_engine, search_term): +def _parse_query(database_engine: BaseDatabaseEngine, search_term: str) -> str: """Takes a plain unicode string from the user and converts it into a form that can be passed to database. We use this so that we can add prefix matching, which isn't something -- cgit 1.5.1 From c72d26c1e1e997e63cef1c474010a7db783f8022 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 10 May 2022 20:43:13 +0100 Subject: Refactor `EventContext` (#12689) Refactor how the `EventContext` class works, with the intention of reducing the amount of state we fetch from the DB during event processing. The idea here is to get rid of the cached `current_state_ids` and `prev_state_ids` that live in the `EventContext`, and instead defer straight to the database (and its caching). One change that may have a noticeable effect is that we now no longer prefill the `get_current_state_ids` cache on a state change. However, that query is relatively light, since its just a case of reading a table from the DB (unlike fetching state at an event which is more heavyweight). For deployments with workers this cache isn't even used. Part of #12684 --- changelog.d/12689.misc | 1 + synapse/events/snapshot.py | 177 ++++++------------------------- synapse/handlers/federation.py | 6 +- synapse/handlers/federation_event.py | 6 +- synapse/handlers/message.py | 6 +- synapse/push/action_generator.py | 4 + synapse/state/__init__.py | 9 +- synapse/storage/databases/main/events.py | 6 -- synapse/storage/persist_events.py | 42 ++------ tests/handlers/test_federation_event.py | 4 +- tests/storage/test_event_chain.py | 2 +- tests/test_state.py | 3 + tests/test_visibility.py | 4 +- 13 files changed, 70 insertions(+), 200 deletions(-) create mode 100644 changelog.d/12689.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12689.misc b/changelog.d/12689.misc new file mode 100644 index 0000000000..daa484ea30 --- /dev/null +++ b/changelog.d/12689.misc @@ -0,0 +1 @@ +Refactor `EventContext` class. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 8120c305df..9ccd24b298 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -17,11 +17,8 @@ import attr from frozendict import frozendict from typing_extensions import Literal -from twisted.internet.defer import Deferred - from synapse.appservice import ApplicationService from synapse.events import EventBase -from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.types import JsonDict, StateMap if TYPE_CHECKING: @@ -61,6 +58,9 @@ class EventContext: If ``state_group`` is None (ie, the event is an outlier), ``state_group_before_event`` will always also be ``None``. + state_delta_due_to_event: If `state_group` and `state_group_before_event` are not None + then this is the delta of the state between the two groups. + prev_group: If it is known, ``state_group``'s prev_group. Note that this being None does not necessarily mean that ``state_group`` does not have a prev_group! @@ -79,73 +79,47 @@ class EventContext: app_service: If this event is being sent by a (local) application service, that app service. - _current_state_ids: The room state map, including this event - ie, the state - in ``state_group``. - - (type, state_key) -> event_id - - For an outlier, this is {} - - Note that this is a private attribute: it should be accessed via - ``get_current_state_ids``. _AsyncEventContext impl calculates this - on-demand: it will be None until that happens. - - _prev_state_ids: The room state map, excluding this event - ie, the state - in ``state_group_before_event``. For a non-state - event, this will be the same as _current_state_events. - - Note that it is a completely different thing to prev_group! - - (type, state_key) -> event_id - - For an outlier, this is {} - - As with _current_state_ids, this is a private attribute. It should be - accessed via get_prev_state_ids. - partial_state: if True, we may be storing this event with a temporary, incomplete state. """ + _storage: "Storage" rejected: Union[Literal[False], str] = False _state_group: Optional[int] = None state_group_before_event: Optional[int] = None + _state_delta_due_to_event: Optional[StateMap[str]] = None prev_group: Optional[int] = None delta_ids: Optional[StateMap[str]] = None app_service: Optional[ApplicationService] = None - _current_state_ids: Optional[StateMap[str]] = None - _prev_state_ids: Optional[StateMap[str]] = None - partial_state: bool = False @staticmethod def with_state( + storage: "Storage", state_group: Optional[int], state_group_before_event: Optional[int], - current_state_ids: Optional[StateMap[str]], - prev_state_ids: Optional[StateMap[str]], + state_delta_due_to_event: Optional[StateMap[str]], partial_state: bool, prev_group: Optional[int] = None, delta_ids: Optional[StateMap[str]] = None, ) -> "EventContext": return EventContext( - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, + storage=storage, state_group=state_group, state_group_before_event=state_group_before_event, + state_delta_due_to_event=state_delta_due_to_event, prev_group=prev_group, delta_ids=delta_ids, partial_state=partial_state, ) @staticmethod - def for_outlier() -> "EventContext": + def for_outlier( + storage: "Storage", + ) -> "EventContext": """Return an EventContext instance suitable for persisting an outlier event""" - return EventContext( - current_state_ids={}, - prev_state_ids={}, - ) + return EventContext(storage=storage) async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: """Converts self to a type that can be serialized as JSON, and then @@ -158,24 +132,14 @@ class EventContext: The serialized event. """ - # We don't serialize the full state dicts, instead they get pulled out - # of the DB on the other side. However, the other side can't figure out - # the prev_state_ids, so if we're a state event we include the event - # id that we replaced in the state. - if event.is_state(): - prev_state_ids = await self.get_prev_state_ids() - prev_state_id = prev_state_ids.get((event.type, event.state_key)) - else: - prev_state_id = None - return { - "prev_state_id": prev_state_id, - "event_type": event.type, - "event_state_key": event.get_state_key(), "state_group": self._state_group, "state_group_before_event": self.state_group_before_event, "rejected": self.rejected, "prev_group": self.prev_group, + "state_delta_due_to_event": _encode_state_dict( + self._state_delta_due_to_event + ), "delta_ids": _encode_state_dict(self.delta_ids), "app_service_id": self.app_service.id if self.app_service else None, "partial_state": self.partial_state, @@ -193,16 +157,16 @@ class EventContext: Returns: The event context. """ - context = _AsyncEventContextImpl( + context = EventContext( # We use the state_group and prev_state_id stuff to pull the # current_state_ids out of the DB and construct prev_state_ids. storage=storage, - prev_state_id=input["prev_state_id"], - event_type=input["event_type"], - event_state_key=input["event_state_key"], state_group=input["state_group"], state_group_before_event=input["state_group_before_event"], prev_group=input["prev_group"], + state_delta_due_to_event=_decode_state_dict( + input["state_delta_due_to_event"] + ), delta_ids=_decode_state_dict(input["delta_ids"]), rejected=input["rejected"], partial_state=input.get("partial_state", False), @@ -250,8 +214,15 @@ class EventContext: if self.rejected: raise RuntimeError("Attempt to access state_ids of rejected event") - await self._ensure_fetched() - return self._current_state_ids + assert self._state_delta_due_to_event is not None + + prev_state_ids = await self.get_prev_state_ids() + + if self._state_delta_due_to_event: + prev_state_ids = dict(prev_state_ids) + prev_state_ids.update(self._state_delta_due_to_event) + + return prev_state_ids async def get_prev_state_ids(self) -> StateMap[str]: """ @@ -266,94 +237,10 @@ class EventContext: Maps a (type, state_key) to the event ID of the state event matching this tuple. """ - await self._ensure_fetched() - # There *should* be previous state IDs now. - assert self._prev_state_ids is not None - return self._prev_state_ids - - def get_cached_current_state_ids(self) -> Optional[StateMap[str]]: - """Gets the current state IDs if we have them already cached. - - It is an error to access this for a rejected event, since rejected state should - not make it into the room state. This method will raise an exception if - ``rejected`` is set. - - Returns: - Returns None if we haven't cached the state or if state_group is None - (which happens when the associated event is an outlier). - - Otherwise, returns the the current state IDs. - """ - if self.rejected: - raise RuntimeError("Attempt to access state_ids of rejected event") - - return self._current_state_ids - - async def _ensure_fetched(self) -> None: - return None - - -@attr.s(slots=True) -class _AsyncEventContextImpl(EventContext): - """ - An implementation of EventContext which fetches _current_state_ids and - _prev_state_ids from the database on demand. - - Attributes: - - _storage - - _fetching_state_deferred: Resolves when *_state_ids have been calculated. - None if we haven't started calculating yet - - _event_type: The type of the event the context is associated with. - - _event_state_key: The state_key of the event the context is associated with. - - _prev_state_id: If the event associated with the context is a state event, - then `_prev_state_id` is the event_id of the state that was replaced. - """ - - # This needs to have a default as we're inheriting - _storage: "Storage" = attr.ib(default=None) - _prev_state_id: Optional[str] = attr.ib(default=None) - _event_type: str = attr.ib(default=None) - _event_state_key: Optional[str] = attr.ib(default=None) - _fetching_state_deferred: Optional["Deferred[None]"] = attr.ib(default=None) - - async def _ensure_fetched(self) -> None: - if not self._fetching_state_deferred: - self._fetching_state_deferred = run_in_background(self._fill_out_state) - - await make_deferred_yieldable(self._fetching_state_deferred) - - async def _fill_out_state(self) -> None: - """Called to populate the _current_state_ids and _prev_state_ids - attributes by loading from the database. - """ - if self.state_group is None: - # No state group means the event is an outlier. Usually the state_ids dicts are also - # pre-set to empty dicts, but they get reset when the context is serialized, so set - # them to empty dicts again here. - self._current_state_ids = {} - self._prev_state_ids = {} - return - - current_state_ids = await self._storage.state.get_state_ids_for_group( - self.state_group + assert self.state_group_before_event is not None + return await self._storage.state.get_state_ids_for_group( + self.state_group_before_event ) - # Set this separately so mypy knows current_state_ids is not None. - self._current_state_ids = current_state_ids - if self._event_state_key is not None: - self._prev_state_ids = dict(current_state_ids) - - key = (self._event_type, self._event_state_key) - if self._prev_state_id: - self._prev_state_ids[key] = self._prev_state_id - else: - self._prev_state_ids.pop(key, None) - else: - self._prev_state_ids = current_state_ids def _encode_state_dict( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 38dc5b1f6e..be5099b507 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -659,7 +659,7 @@ class FederationHandler: # in the invitee's sync stream. It is stripped out for all other local users. event.unsigned["knock_room_state"] = stripped_room_state["knock_state_events"] - context = EventContext.for_outlier() + context = EventContext.for_outlier(self.storage) stream_id = await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) @@ -848,7 +848,7 @@ class FederationHandler: ) ) - context = EventContext.for_outlier() + context = EventContext.for_outlier(self.storage) await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) @@ -877,7 +877,7 @@ class FederationHandler: await self.federation_client.send_leave(host_list, event) - context = EventContext.for_outlier() + context = EventContext.for_outlier(self.storage) stream_id = await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 6cf927e4ff..6d11b32b61 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1423,7 +1423,7 @@ class FederationEventHandler: # we're not bothering about room state, so flag the event as an outlier. event.internal_metadata.outlier = True - context = EventContext.for_outlier() + context = EventContext.for_outlier(self._storage) try: validate_event_for_room_version(room_version_obj, event) check_auth_rules_for_event(room_version_obj, event, auth) @@ -1874,10 +1874,10 @@ class FederationEventHandler: ) return EventContext.with_state( + storage=self._storage, state_group=state_group, state_group_before_event=context.state_group_before_event, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, + state_delta_due_to_event=state_updates, prev_group=prev_group, delta_ids=state_updates, partial_state=context.partial_state, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c28b792e6f..e47799e7f9 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -757,6 +757,10 @@ class EventCreationHandler: The previous version of the event is returned, if it is found in the event context. Otherwise, None is returned. """ + if event.internal_metadata.is_outlier(): + # This can happen due to out of band memberships + return None + prev_state_ids = await context.get_prev_state_ids() prev_event_id = prev_state_ids.get((event.type, event.state_key)) if not prev_event_id: @@ -1001,7 +1005,7 @@ class EventCreationHandler: # after it is created if builder.internal_metadata.outlier: event.internal_metadata.outlier = True - context = EventContext.for_outlier() + context = EventContext.for_outlier(self.storage) elif ( event.type == EventTypes.MSC2716_INSERTION and state_event_ids diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index 60758df016..730d9cd354 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -40,5 +40,9 @@ class ActionGenerator: async def handle_push_actions_for_event( self, event: EventBase, context: EventContext ) -> None: + if event.internal_metadata.is_outlier(): + # This can happen due to out of band memberships + return + with Measure(self.clock, "action_for_event_by_user"): await self.bulk_evaluator.action_for_event_by_user(event, context) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index cad3b42640..54e41d5375 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -130,6 +130,7 @@ class StateHandler: self.state_store = hs.get_storage().state self.hs = hs self._state_resolution_handler = hs.get_state_resolution_handler() + self._storage = hs.get_storage() @overload async def get_current_state( @@ -361,10 +362,10 @@ class StateHandler: if not event.is_state(): return EventContext.with_state( + storage=self._storage, state_group_before_event=state_group_before_event, state_group=state_group_before_event, - current_state_ids=state_ids_before_event, - prev_state_ids=state_ids_before_event, + state_delta_due_to_event={}, prev_group=state_group_before_event_prev_group, delta_ids=deltas_to_state_group_before_event, partial_state=partial_state, @@ -393,10 +394,10 @@ class StateHandler: ) return EventContext.with_state( + storage=self._storage, state_group=state_group_after_event, state_group_before_event=state_group_before_event, - current_state_ids=state_ids_after_event, - prev_state_ids=state_ids_before_event, + state_delta_due_to_event=delta_ids, prev_group=state_group_before_event, delta_ids=delta_ids, partial_state=partial_state, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 6c12653bb3..f544bcfff0 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -128,7 +128,6 @@ class PersistEventsStore: self, events_and_contexts: List[Tuple[EventBase, EventContext]], *, - current_state_for_room: Dict[str, StateMap[str]], state_delta_for_room: Dict[str, DeltaState], new_forward_extremities: Dict[str, Set[str]], use_negative_stream_ordering: bool = False, @@ -139,8 +138,6 @@ class PersistEventsStore: Args: events_and_contexts: - current_state_for_room: Map from room_id to the current state of - the room based on forward extremities state_delta_for_room: Map from room_id to the delta to apply to room state new_forward_extremities: Map from room_id to set of event IDs @@ -215,9 +212,6 @@ class PersistEventsStore: event_counter.labels(event.type, origin_type, origin_entity).inc() - for room_id, new_state in current_state_for_room.items(): - self.store.get_current_state_ids.prefill((room_id,), new_state) - for room_id, latest_event_ids in new_forward_extremities.items(): self.store.get_latest_event_ids_in_room.prefill( (room_id,), list(latest_event_ids) diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 97118045a1..a7f6338e05 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -487,12 +487,6 @@ class EventsPersistenceStorage: # extremities in each room new_forward_extremities: Dict[str, Set[str]] = {} - # map room_id->(type,state_key)->event_id tracking the full - # state in each room after adding these events. - # This is simply used to prefill the get_current_state_ids - # cache - current_state_for_room: Dict[str, StateMap[str]] = {} - # map room_id->(to_delete, to_insert) where to_delete is a list # of type/state keys to remove from current state, and to_insert # is a map (type,key)->event_id giving the state delta in each @@ -628,14 +622,8 @@ class EventsPersistenceStorage: state_delta_for_room[room_id] = delta - # If we have the current_state then lets prefill - # the cache with it. - if current_state is not None: - current_state_for_room[room_id] = current_state - await self.persist_events_store._persist_events_and_state_updates( chunk, - current_state_for_room=current_state_for_room, state_delta_for_room=state_delta_for_room, new_forward_extremities=new_forward_extremities, use_negative_stream_ordering=backfilled, @@ -733,7 +721,8 @@ class EventsPersistenceStorage: The first state map is the full new current state and the second is the delta to the existing current state. If both are None then - there has been no change. + there has been no change. Either or neither can be None if there + has been a change. The function may prune some old entries from the set of new forward extremities if it's safe to do so. @@ -743,9 +732,6 @@ class EventsPersistenceStorage: the new current state is only returned if we've already calculated it. """ - # map from state_group to ((type, key) -> event_id) state map - state_groups_map = {} - # Map from (prev state group, new state group) -> delta state dict state_group_deltas = {} @@ -759,16 +745,6 @@ class EventsPersistenceStorage: ) continue - if ctx.state_group in state_groups_map: - continue - - # We're only interested in pulling out state that has already - # been cached in the context. We'll pull stuff out of the DB later - # if necessary. - current_state_ids = ctx.get_cached_current_state_ids() - if current_state_ids is not None: - state_groups_map[ctx.state_group] = current_state_ids - if ctx.prev_group: state_group_deltas[(ctx.prev_group, ctx.state_group)] = ctx.delta_ids @@ -826,18 +802,14 @@ class EventsPersistenceStorage: delta_ids = state_group_deltas.get((old_state_group, new_state_group), None) if delta_ids is not None: # We have a delta from the existing to new current state, - # so lets just return that. If we happen to already have - # the current state in memory then lets also return that, - # but it doesn't matter if we don't. - new_state = state_groups_map.get(new_state_group) - return new_state, delta_ids, new_latest_event_ids + # so lets just return that. + return None, delta_ids, new_latest_event_ids # Now that we have calculated new_state_groups we need to get # their state IDs so we can resolve to a single state set. - missing_state = new_state_groups - set(state_groups_map) - if missing_state: - group_to_state = await self.state_store._get_state_for_groups(missing_state) - state_groups_map.update(group_to_state) + state_groups_map = await self.state_store._get_state_for_groups( + new_state_groups + ) if len(new_state_groups) == 1: # If there is only one state group, then we know what the current diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 489ba57736..e64b28f28b 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -148,7 +148,9 @@ class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase): prev_event.internal_metadata.outlier = True persistence = self.hs.get_storage().persistence self.get_success( - persistence.persist_event(prev_event, EventContext.for_outlier()) + persistence.persist_event( + prev_event, EventContext.for_outlier(self.hs.get_storage()) + ) ) else: diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index 401020fd63..c7661e7186 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -393,7 +393,7 @@ class EventChainStoreTestCase(HomeserverTestCase): # We need to persist the events to the events and state_events # tables. persist_events_store._store_event_txn( - txn, [(e, EventContext()) for e in events] + txn, [(e, EventContext(self.hs.get_storage())) for e in events] ) # Actually call the function that calculates the auth chain stuff. diff --git a/tests/test_state.py b/tests/test_state.py index e4baa69137..651ec1c7d4 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -88,6 +88,9 @@ class _DummyStore: return groups + async def get_state_ids_for_group(self, state_group): + return self._group_to_state[state_group] + async def store_state_group( self, event_id, room_id, prev_group, delta_ids, current_state_ids ): diff --git a/tests/test_visibility.py b/tests/test_visibility.py index d0230f9ebb..7a9b01ef9d 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -234,7 +234,9 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): event = self.get_success(builder.build(prev_event_ids=[], auth_event_ids=[])) event.internal_metadata.outlier = True self.get_success( - self.storage.persistence.persist_event(event, EventContext.for_outlier()) + self.storage.persistence.persist_event( + event, EventContext.for_outlier(self.storage) + ) ) return event -- cgit 1.5.1 From 84facf769eb79112be5f21942c18047b2b85f0bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 10 May 2022 23:39:14 -0500 Subject: Fix `/messages` throwing a 500 when querying for non-existent room (#12683) Fix https://github.com/matrix-org/synapse/issues/12678 Complement test added: https://github.com/matrix-org/complement/pull/369 **Before:** 500 internal server error **After:** According to the [spec](https://spec.matrix.org/latest/client-server-api/#get_matrixclientv3roomsroomidmessages), calling `/messages` against a non-existent `room_id` should throw a 403 forbidden (since you're not part of the room). This also matches the behavior before https://github.com/matrix-org/synapse/pull/12370 which regressed Synapse to the 500 behavior. ```json { "errcode": "M_FORBIDDEN", "error": "User @test:my.synapse.server not in room !dne:my.synapse.server, and room previews are disabled" } ``` --- changelog.d/12683.bugfix | 1 + synapse/handlers/pagination.py | 2 +- synapse/storage/databases/main/stream.py | 26 +++++++++++--------------- 3 files changed, 13 insertions(+), 16 deletions(-) create mode 100644 changelog.d/12683.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/12683.bugfix b/changelog.d/12683.bugfix new file mode 100644 index 0000000000..2ce84a223a --- /dev/null +++ b/changelog.d/12683.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.57.0 where `/messages` would throw a 500 error when querying for a non-existent room. diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 7ee3340373..2e30180094 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -448,7 +448,7 @@ class PaginationHandler: ) # We expect `/messages` to use historic pagination tokens by default but # `/messages` should still works with live tokens when manually provided. - assert from_token.room_key.topological + assert from_token.room_key.topological is not None if pagin_config.limit is None: # This shouldn't happen as we've set a default limit before this diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 793e906630..4e1d9647b7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -785,22 +785,14 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): return None async def get_current_room_stream_token_for_room_id( - self, room_id: Optional[str] = None + self, room_id: str ) -> RoomStreamToken: - """Returns the current position of the rooms stream. - - By default, it returns a live token with the current global stream - token. Specifying a `room_id` causes it to return a historic token with - the room specific topological token. - """ + """Returns the current position of the rooms stream (historic token).""" stream_ordering = self.get_room_max_stream_ordering() - if room_id is None: - return RoomStreamToken(None, stream_ordering) - else: - topo = await self.db_pool.runInteraction( - "_get_max_topological_txn", self._get_max_topological_txn, room_id - ) - return RoomStreamToken(topo, stream_ordering) + topo = await self.db_pool.runInteraction( + "_get_max_topological_txn", self._get_max_topological_txn, room_id + ) + return RoomStreamToken(topo, stream_ordering) def get_stream_id_for_event_txn( self, @@ -870,7 +862,11 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) rows = txn.fetchall() - return rows[0][0] if rows else 0 + # An aggregate function like MAX() will always return one row per group + # so we can safely rely on the lookup here. For example, when a we + # lookup a `room_id` which does not exist, `rows` will look like + # `[(None,)]` + return rows[0][0] if rows[0][0] is not None else 0 @staticmethod def _set_before_and_after( -- cgit 1.5.1 From 17e1eb7749adf12d43f534c50115bbe19c809ea6 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 12 May 2022 15:33:50 +0100 Subject: Reduce the number of "untyped defs" (#12716) --- changelog.d/12716.misc | 1 + mypy.ini | 24 ++++++++++++ synapse/groups/groups_server.py | 2 +- synapse/http/client.py | 16 +++++--- synapse/http/federation/matrix_federation_agent.py | 2 +- synapse/http/federation/srv_resolver.py | 4 +- synapse/http/federation/well_known_resolver.py | 6 +-- synapse/http/matrixfederationclient.py | 31 +++++++++------ synapse/http/request_metrics.py | 10 ++--- synapse/storage/database.py | 44 +++++++++++++++------- synapse/storage/databases/main/metrics.py | 24 +++++++----- synapse/storage/databases/main/stream.py | 8 ++-- synapse/storage/persist_events.py | 21 +++++++---- synapse/storage/prepare_database.py | 2 +- synapse/storage/state.py | 6 ++- synapse/storage/types.py | 10 ++++- 16 files changed, 142 insertions(+), 69 deletions(-) create mode 100644 changelog.d/12716.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12716.misc b/changelog.d/12716.misc new file mode 100644 index 0000000000..b07e1b52ee --- /dev/null +++ b/changelog.d/12716.misc @@ -0,0 +1 @@ +Add type annotations to increase the number of modules passing `disallow-untyped-defs`. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index ba0de419f5..8478dd9e51 100644 --- a/mypy.ini +++ b/mypy.ini @@ -119,9 +119,18 @@ disallow_untyped_defs = True [mypy-synapse.federation.transport.client] disallow_untyped_defs = False +[mypy-synapse.groups.*] +disallow_untyped_defs = True + [mypy-synapse.handlers.*] disallow_untyped_defs = True +[mypy-synapse.http.federation.*] +disallow_untyped_defs = True + +[mypy-synapse.http.request_metrics] +disallow_untyped_defs = True + [mypy-synapse.http.server] disallow_untyped_defs = True @@ -196,12 +205,27 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.state_deltas] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.stream] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.transactions] disallow_untyped_defs = True [mypy-synapse.storage.databases.main.user_erasure_store] disallow_untyped_defs = True +[mypy-synapse.storage.prepare_database] +disallow_untyped_defs = True + +[mypy-synapse.storage.persist_events] +disallow_untyped_defs = True + +[mypy-synapse.storage.state] +disallow_untyped_defs = True + +[mypy-synapse.storage.types] +disallow_untyped_defs = True + [mypy-synapse.storage.util.*] disallow_untyped_defs = True diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py index 4c3a5a6e24..dfd24af695 100644 --- a/synapse/groups/groups_server.py +++ b/synapse/groups/groups_server.py @@ -934,7 +934,7 @@ class GroupsServerHandler(GroupsServerWorkerHandler): # Before deleting the group lets kick everyone out of it users = await self.store.get_users_in_group(group_id, include_private=True) - async def _kick_user_from_group(user_id): + async def _kick_user_from_group(user_id: str) -> None: if self.hs.is_mine_id(user_id): groups_local = self.hs.get_groups_local_handler() assert isinstance( diff --git a/synapse/http/client.py b/synapse/http/client.py index b2c9a7c670..084d0a5b84 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -43,8 +43,10 @@ from twisted.internet import defer, error as twisted_error, protocol, ssl from twisted.internet.address import IPv4Address, IPv6Address from twisted.internet.interfaces import ( IAddress, + IDelayedCall, IHostResolution, IReactorPluggableNameResolver, + IReactorTime, IResolutionReceiver, ITCPTransport, ) @@ -121,13 +123,15 @@ def check_against_blacklist( _EPSILON = 0.00000001 -def _make_scheduler(reactor): +def _make_scheduler( + reactor: IReactorTime, +) -> Callable[[Callable[[], object]], IDelayedCall]: """Makes a schedular suitable for a Cooperator using the given reactor. (This is effectively just a copy from `twisted.internet.task`) """ - def _scheduler(x): + def _scheduler(x: Callable[[], object]) -> IDelayedCall: return reactor.callLater(_EPSILON, x) return _scheduler @@ -775,7 +779,7 @@ class SimpleHttpClient: ) -def _timeout_to_request_timed_out_error(f: Failure): +def _timeout_to_request_timed_out_error(f: Failure) -> Failure: if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError): # The TCP connection has its own timeout (set by the 'connectTimeout' param # on the Agent), which raises twisted_error.TimeoutError exception. @@ -809,7 +813,7 @@ class _DiscardBodyWithMaxSizeProtocol(protocol.Protocol): def __init__(self, deferred: defer.Deferred): self.deferred = deferred - def _maybe_fail(self): + def _maybe_fail(self) -> None: """ Report a max size exceed error and disconnect the first time this is called. """ @@ -933,12 +937,12 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory): Do not use this since it allows an attacker to intercept your communications. """ - def __init__(self): + def __init__(self) -> None: self._context = SSL.Context(SSL.SSLv23_METHOD) self._context.set_verify(VERIFY_NONE, lambda *_: False) def getContext(self, hostname=None, port=None): return self._context - def creatorForNetloc(self, hostname, port): + def creatorForNetloc(self, hostname: bytes, port: int): return self diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index a8a520f809..2f0177f1e2 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -239,7 +239,7 @@ class MatrixHostnameEndpointFactory: self._srv_resolver = srv_resolver - def endpointForURI(self, parsed_uri: URI): + def endpointForURI(self, parsed_uri: URI) -> "MatrixHostnameEndpoint": return MatrixHostnameEndpoint( self._reactor, self._proxy_reactor, diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py index f68646fd0d..de0e882b33 100644 --- a/synapse/http/federation/srv_resolver.py +++ b/synapse/http/federation/srv_resolver.py @@ -16,7 +16,7 @@ import logging import random import time -from typing import Callable, Dict, List +from typing import Any, Callable, Dict, List import attr @@ -109,7 +109,7 @@ class SrvResolver: def __init__( self, - dns_client=client, + dns_client: Any = client, cache: Dict[bytes, List[Server]] = SERVER_CACHE, get_time: Callable[[], float] = time.time, ): diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index 43f2140429..71b685fade 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -74,9 +74,9 @@ _well_known_cache: TTLCache[bytes, Optional[bytes]] = TTLCache("well-known") _had_valid_well_known_cache: TTLCache[bytes, bool] = TTLCache("had-valid-well-known") -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class WellKnownLookupResult: - delegated_server = attr.ib() + delegated_server: Optional[bytes] class WellKnownResolver: @@ -336,4 +336,4 @@ def _parse_cache_control(headers: Headers) -> Dict[bytes, Optional[bytes]]: class _FetchWellKnownFailure(Exception): # True if we didn't get a non-5xx HTTP response, i.e. this may or may not be # a temporary failure. - temporary = attr.ib() + temporary: bool = attr.ib() diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index c2ec3caa0e..725b5c33b8 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -23,6 +23,8 @@ from http import HTTPStatus from io import BytesIO, StringIO from typing import ( TYPE_CHECKING, + Any, + BinaryIO, Callable, Dict, Generic, @@ -44,7 +46,7 @@ from typing_extensions import Literal from twisted.internet import defer from twisted.internet.error import DNSLookupError from twisted.internet.interfaces import IReactorTime -from twisted.internet.task import _EPSILON, Cooperator +from twisted.internet.task import Cooperator from twisted.web.client import ResponseFailed from twisted.web.http_headers import Headers from twisted.web.iweb import IBodyProducer, IResponse @@ -58,11 +60,13 @@ from synapse.api.errors import ( RequestSendFailed, SynapseError, ) +from synapse.crypto.context_factory import FederationPolicyForHTTPS from synapse.http import QuieterFileBodyProducer from synapse.http.client import ( BlacklistingAgentWrapper, BodyExceededMaxSize, ByteWriteable, + _make_scheduler, encode_query_args, read_body_with_max_size, ) @@ -181,7 +185,7 @@ class JsonParser(ByteParser[Union[JsonDict, list]]): CONTENT_TYPE = "application/json" - def __init__(self): + def __init__(self) -> None: self._buffer = StringIO() self._binary_wrapper = BinaryIOWrapper(self._buffer) @@ -299,7 +303,9 @@ async def _handle_response( class BinaryIOWrapper: """A wrapper for a TextIO which converts from bytes on the fly.""" - def __init__(self, file: typing.TextIO, encoding="utf-8", errors="strict"): + def __init__( + self, file: typing.TextIO, encoding: str = "utf-8", errors: str = "strict" + ): self.decoder = codecs.getincrementaldecoder(encoding)(errors) self.file = file @@ -317,7 +323,11 @@ class MatrixFederationHttpClient: requests. """ - def __init__(self, hs: "HomeServer", tls_client_options_factory): + def __init__( + self, + hs: "HomeServer", + tls_client_options_factory: Optional[FederationPolicyForHTTPS], + ): self.hs = hs self.signing_key = hs.signing_key self.server_name = hs.hostname @@ -348,10 +358,7 @@ class MatrixFederationHttpClient: self.version_string_bytes = hs.version_string.encode("ascii") self.default_timeout = 60 - def schedule(x): - self.reactor.callLater(_EPSILON, x) - - self._cooperator = Cooperator(scheduler=schedule) + self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor)) self._sleeper = AwakenableSleeper(self.reactor) @@ -364,7 +371,7 @@ class MatrixFederationHttpClient: self, request: MatrixFederationRequest, try_trailing_slash_on_400: bool = False, - **send_request_args, + **send_request_args: Any, ) -> IResponse: """Wrapper for _send_request which can optionally retry the request upon receiving a combination of a 400 HTTP response code and a @@ -1159,7 +1166,7 @@ class MatrixFederationHttpClient: self, destination: str, path: str, - output_stream, + output_stream: BinaryIO, args: Optional[QueryParams] = None, retry_on_dns_fail: bool = True, max_size: Optional[int] = None, @@ -1250,10 +1257,10 @@ class MatrixFederationHttpClient: return length, headers -def _flatten_response_never_received(e): +def _flatten_response_never_received(e: BaseException) -> str: if hasattr(e, "reasons"): reasons = ", ".join( - _flatten_response_never_received(f.value) for f in e.reasons + _flatten_response_never_received(f.value) for f in e.reasons # type: ignore[attr-defined] ) return "%s:[%s]" % (type(e).__name__, reasons) diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 4886626d50..2b6d113544 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -162,7 +162,7 @@ class RequestMetrics: with _in_flight_requests_lock: _in_flight_requests.add(self) - def stop(self, time_sec, response_code, sent_bytes): + def stop(self, time_sec: float, response_code: int, sent_bytes: int) -> None: with _in_flight_requests_lock: _in_flight_requests.discard(self) @@ -186,13 +186,13 @@ class RequestMetrics: ) return - response_code = str(response_code) + response_code_str = str(response_code) - outgoing_responses_counter.labels(self.method, response_code).inc() + outgoing_responses_counter.labels(self.method, response_code_str).inc() response_count.labels(self.method, self.name, tag).inc() - response_timer.labels(self.method, self.name, tag, response_code).observe( + response_timer.labels(self.method, self.name, tag, response_code_str).observe( time_sec - self.start_ts ) @@ -221,7 +221,7 @@ class RequestMetrics: # flight. self.update_metrics() - def update_metrics(self): + def update_metrics(self) -> None: """Updates the in flight metrics with values from this request.""" if not self.start_context: logger.error( diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 41f566b648..5ddb58a8a2 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -31,6 +31,7 @@ from typing import ( List, Optional, Tuple, + Type, TypeVar, cast, overload, @@ -41,6 +42,7 @@ from prometheus_client import Histogram from typing_extensions import Concatenate, Literal, ParamSpec from twisted.enterprise import adbapi +from twisted.internet.interfaces import IReactorCore from synapse.api.errors import StoreError from synapse.config.database import DatabaseConnectionConfig @@ -92,7 +94,9 @@ UNIQUE_INDEX_BACKGROUND_UPDATES = { def make_pool( - reactor, db_config: DatabaseConnectionConfig, engine: BaseDatabaseEngine + reactor: IReactorCore, + db_config: DatabaseConnectionConfig, + engine: BaseDatabaseEngine, ) -> adbapi.ConnectionPool: """Get the connection pool for the database.""" @@ -101,7 +105,7 @@ def make_pool( db_args = dict(db_config.config.get("args", {})) db_args.setdefault("cp_reconnect", True) - def _on_new_connection(conn): + def _on_new_connection(conn: Connection) -> None: # Ensure we have a logging context so we can correctly track queries, # etc. with LoggingContext("db.on_new_connection"): @@ -157,7 +161,11 @@ class LoggingDatabaseConnection: default_txn_name: str def cursor( - self, *, txn_name=None, after_callbacks=None, exception_callbacks=None + self, + *, + txn_name: Optional[str] = None, + after_callbacks: Optional[List["_CallbackListEntry"]] = None, + exception_callbacks: Optional[List["_CallbackListEntry"]] = None, ) -> "LoggingTransaction": if not txn_name: txn_name = self.default_txn_name @@ -183,11 +191,16 @@ class LoggingDatabaseConnection: self.conn.__enter__() return self - def __exit__(self, exc_type, exc_value, traceback) -> Optional[bool]: + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[types.TracebackType], + ) -> Optional[bool]: return self.conn.__exit__(exc_type, exc_value, traceback) # Proxy through any unknown lookups to the DB conn class. - def __getattr__(self, name): + def __getattr__(self, name: str) -> Any: return getattr(self.conn, name) @@ -391,17 +404,22 @@ class LoggingTransaction: def __enter__(self) -> "LoggingTransaction": return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[types.TracebackType], + ) -> None: self.close() class PerformanceCounters: - def __init__(self): - self.current_counters = {} - self.previous_counters = {} + def __init__(self) -> None: + self.current_counters: Dict[str, Tuple[int, float]] = {} + self.previous_counters: Dict[str, Tuple[int, float]] = {} def update(self, key: str, duration_secs: float) -> None: - count, cum_time = self.current_counters.get(key, (0, 0)) + count, cum_time = self.current_counters.get(key, (0, 0.0)) count += 1 cum_time += duration_secs self.current_counters[key] = (count, cum_time) @@ -527,7 +545,7 @@ class DatabasePool: def start_profiling(self) -> None: self._previous_loop_ts = monotonic_time() - def loop(): + def loop() -> None: curr = self._current_txn_total_time prev = self._previous_txn_total_time self._previous_txn_total_time = curr @@ -1186,7 +1204,7 @@ class DatabasePool: if lock: self.engine.lock_table(txn, table) - def _getwhere(key): + def _getwhere(key: str) -> str: # If the value we're passing in is None (aka NULL), we need to use # IS, not =, as NULL = NULL equals NULL (False). if keyvalues[key] is None: @@ -2258,7 +2276,7 @@ class DatabasePool: term: Optional[str], col: str, retcols: Collection[str], - desc="simple_search_list", + desc: str = "simple_search_list", ) -> Optional[List[Dict[str, Any]]]: """Executes a SELECT query on the named table, which may return zero or more rows, returning the result as a list of dicts. diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py index 1480a0f048..d03555a585 100644 --- a/synapse/storage/databases/main/metrics.py +++ b/synapse/storage/databases/main/metrics.py @@ -23,6 +23,7 @@ from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.storage.databases.main.event_push_actions import ( EventPushActionsWorkerStore, ) +from synapse.storage.types import Cursor if TYPE_CHECKING: from synapse.server import HomeServer @@ -71,7 +72,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): self._last_user_visit_update = self._get_start_of_day() @wrap_as_background_process("read_forward_extremities") - async def _read_forward_extremities(self): + async def _read_forward_extremities(self) -> None: def fetch(txn): txn.execute( """ @@ -95,7 +96,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): (x[0] - 1) * x[1] for x in res if x[1] ) - async def count_daily_e2ee_messages(self): + async def count_daily_e2ee_messages(self) -> int: """ Returns an estimate of the number of messages sent in the last day. @@ -115,7 +116,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): return await self.db_pool.runInteraction("count_e2ee_messages", _count_messages) - async def count_daily_sent_e2ee_messages(self): + async def count_daily_sent_e2ee_messages(self) -> int: def _count_messages(txn): # This is good enough as if you have silly characters in your own # hostname then that's your own fault. @@ -136,7 +137,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): "count_daily_sent_e2ee_messages", _count_messages ) - async def count_daily_active_e2ee_rooms(self): + async def count_daily_active_e2ee_rooms(self) -> int: def _count(txn): sql = """ SELECT COUNT(DISTINCT room_id) FROM events @@ -151,7 +152,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): "count_daily_active_e2ee_rooms", _count ) - async def count_daily_messages(self): + async def count_daily_messages(self) -> int: """ Returns an estimate of the number of messages sent in the last day. @@ -171,7 +172,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): return await self.db_pool.runInteraction("count_messages", _count_messages) - async def count_daily_sent_messages(self): + async def count_daily_sent_messages(self) -> int: def _count_messages(txn): # This is good enough as if you have silly characters in your own # hostname then that's your own fault. @@ -192,7 +193,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): "count_daily_sent_messages", _count_messages ) - async def count_daily_active_rooms(self): + async def count_daily_active_rooms(self) -> int: def _count(txn): sql = """ SELECT COUNT(DISTINCT room_id) FROM events @@ -226,7 +227,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): "count_monthly_users", self._count_users, thirty_days_ago ) - def _count_users(self, txn, time_from): + def _count_users(self, txn: Cursor, time_from: int) -> int: """ Returns number of users seen in the past time_from period """ @@ -238,7 +239,10 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): ) u """ txn.execute(sql, (time_from,)) - (count,) = txn.fetchone() + # Mypy knows that fetchone() might return None if there are no rows. + # We know better: "SELECT COUNT(...) FROM ..." without any GROUP BY always + # returns exactly one row. + (count,) = txn.fetchone() # type: ignore[misc] return count async def count_r30_users(self) -> Dict[str, int]: @@ -453,7 +457,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): "count_r30v2_users", _count_r30v2_users ) - def _get_start_of_day(self): + def _get_start_of_day(self) -> int: """ Returns millisecond unixtime for start of UTC day. """ diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 4e1d9647b7..59bbca2e32 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -798,9 +798,11 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): self, txn: LoggingTransaction, event_id: str, - allow_none=False, - ) -> int: - return self.db_pool.simple_select_one_onecol_txn( + allow_none: bool = False, + ) -> Optional[int]: + # Type ignore: we pass keyvalues a Dict[str, str]; the function wants + # Dict[str, Any]. I think mypy is unhappy because Dict is invariant? + return self.db_pool.simple_select_one_onecol_txn( # type: ignore[call-overload] txn=txn, table="events", keyvalues={"event_id": event_id}, diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index a7f6338e05..0fc282866b 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -25,6 +25,7 @@ from typing import ( Collection, Deque, Dict, + Generator, Generic, Iterable, List, @@ -207,7 +208,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): return res - def _handle_queue(self, room_id): + def _handle_queue(self, room_id: str) -> None: """Attempts to handle the queue for a room if not already being handled. The queue's callback will be invoked with for each item in the queue, @@ -227,7 +228,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): self._currently_persisting_rooms.add(room_id) - async def handle_queue_loop(): + async def handle_queue_loop() -> None: try: queue = self._get_drainining_queue(room_id) for item in queue: @@ -250,15 +251,17 @@ class _EventPeristenceQueue(Generic[_PersistResult]): with PreserveLoggingContext(): item.deferred.callback(ret) finally: - queue = self._event_persist_queues.pop(room_id, None) - if queue: - self._event_persist_queues[room_id] = queue + remaining_queue = self._event_persist_queues.pop(room_id, None) + if remaining_queue: + self._event_persist_queues[room_id] = remaining_queue self._currently_persisting_rooms.discard(room_id) # set handle_queue_loop off in the background run_as_background_process("persist_events", handle_queue_loop) - def _get_drainining_queue(self, room_id): + def _get_drainining_queue( + self, room_id: str + ) -> Generator[_EventPersistQueueItem, None, None]: queue = self._event_persist_queues.setdefault(room_id, deque()) try: @@ -317,7 +320,9 @@ class EventsPersistenceStorage: for event, ctx in events_and_contexts: partitioned.setdefault(event.room_id, []).append((event, ctx)) - async def enqueue(item): + async def enqueue( + item: Tuple[str, List[Tuple[EventBase, EventContext]]] + ) -> Dict[str, str]: room_id, evs_ctxs = item return await self._event_persist_queue.add_to_queue( room_id, evs_ctxs, backfilled=backfilled @@ -1102,7 +1107,7 @@ class EventsPersistenceStorage: return False - async def _handle_potentially_left_users(self, user_ids: Set[str]): + async def _handle_potentially_left_users(self, user_ids: Set[str]) -> None: """Given a set of remote users check if the server still shares a room with them. If not then mark those users' device cache as stale. """ diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 546d6bae6e..c33df42084 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -85,7 +85,7 @@ def prepare_database( database_engine: BaseDatabaseEngine, config: Optional[HomeServerConfig], databases: Collection[str] = ("main", "state"), -): +) -> None: """Prepares a physical database for usage. Will either create all necessary tables or upgrade from an older schema version. diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d1d5859214..d4a1bd4f9d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -62,7 +62,7 @@ class StateFilter: types: "frozendict[str, Optional[FrozenSet[str]]]" include_others: bool = False - def __attrs_post_init__(self): + def __attrs_post_init__(self) -> None: # If `include_others` is set we canonicalise the filter by removing # wildcards from the types dictionary if self.include_others: @@ -138,7 +138,9 @@ class StateFilter: ) @staticmethod - def freeze(types: Mapping[str, Optional[Collection[str]]], include_others: bool): + def freeze( + types: Mapping[str, Optional[Collection[str]]], include_others: bool + ) -> "StateFilter": """ Returns a (frozen) StateFilter with the same contents as the parameters specified here, which can be made of mutable types. diff --git a/synapse/storage/types.py b/synapse/storage/types.py index d7d6f1d90e..40536c1830 100644 --- a/synapse/storage/types.py +++ b/synapse/storage/types.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Iterator, List, Mapping, Optional, Sequence, Tuple, Union +from types import TracebackType +from typing import Any, Iterator, List, Mapping, Optional, Sequence, Tuple, Type, Union from typing_extensions import Protocol @@ -86,5 +87,10 @@ class Connection(Protocol): def __enter__(self) -> "Connection": ... - def __exit__(self, exc_type, exc_value, traceback) -> Optional[bool]: + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> Optional[bool]: ... -- cgit 1.5.1 From 86a515ccbf359ecd65a42a3f409b8f97c8f22284 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 16 May 2022 08:42:45 -0400 Subject: Consolidate logic for parsing relations. (#12693) Parse the `m.relates_to` event content field (which describes relations) in a single place, this is used during: * Event persistence. * Validation of the Client-Server API. * Fetching bundled aggregations. * Processing of push rules. Each of these separately implement the logic and each made slightly different assumptions about what was valid. Some had minor / potential bugs. --- changelog.d/12693.misc | 1 + synapse/events/__init__.py | 45 +++++++++++++++++++++++++++++ synapse/handlers/message.py | 30 ++++++++----------- synapse/handlers/relations.py | 20 ++++++------- synapse/push/bulk_push_rule_evaluator.py | 6 ++-- synapse/storage/databases/main/events.py | 49 ++++++++++++++------------------ tests/rest/client/test_sync.py | 8 ++++-- 7 files changed, 98 insertions(+), 61 deletions(-) create mode 100644 changelog.d/12693.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12693.misc b/changelog.d/12693.misc new file mode 100644 index 0000000000..8bd1e1cb0c --- /dev/null +++ b/changelog.d/12693.misc @@ -0,0 +1 @@ +Consolidate parsing of relation information from events. diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index c238376caf..39ad2793d9 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -15,6 +15,7 @@ # limitations under the License. import abc +import collections.abc import os from typing import ( TYPE_CHECKING, @@ -32,9 +33,11 @@ from typing import ( overload, ) +import attr from typing_extensions import Literal from unpaddedbase64 import encode_base64 +from synapse.api.constants import RelationTypes from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions from synapse.types import JsonDict, RoomStreamToken from synapse.util.caches import intern_dict @@ -615,3 +618,45 @@ def make_event_from_dict( return event_type( event_dict, room_version, internal_metadata_dict or {}, rejected_reason ) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _EventRelation: + # The target event of the relation. + parent_id: str + # The relation type. + rel_type: str + # The aggregation key. Will be None if the rel_type is not m.annotation or is + # not a string. + aggregation_key: Optional[str] + + +def relation_from_event(event: EventBase) -> Optional[_EventRelation]: + """ + Attempt to parse relation information an event. + + Returns: + The event relation information, if it is valid. None, otherwise. + """ + relation = event.content.get("m.relates_to") + if not relation or not isinstance(relation, collections.abc.Mapping): + # No relation information. + return None + + # Relations must have a type and parent event ID. + rel_type = relation.get("rel_type") + if not isinstance(rel_type, str): + return None + + parent_id = relation.get("event_id") + if not isinstance(parent_id, str): + return None + + # Annotations have a key field. + aggregation_key = None + if rel_type == RelationTypes.ANNOTATION: + aggregation_key = relation.get("key") + if not isinstance(aggregation_key, str): + aggregation_key = None + + return _EventRelation(parent_id, rel_type, aggregation_key) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4a4b535bae..0951b9c71f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -44,7 +44,7 @@ from synapse.api.errors import ( from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersions from synapse.api.urls import ConsentURIBuilder from synapse.event_auth import validate_event_for_room_version -from synapse.events import EventBase +from synapse.events import EventBase, relation_from_event from synapse.events.builder import EventBuilder from synapse.events.snapshot import EventContext from synapse.events.validator import EventValidator @@ -1060,20 +1060,11 @@ class EventCreationHandler: SynapseError if the event is invalid. """ - relation = event.content.get("m.relates_to") + relation = relation_from_event(event) if not relation: return - relation_type = relation.get("rel_type") - if not relation_type: - return - - # Ensure the parent is real. - relates_to = relation.get("event_id") - if not relates_to: - return - - parent_event = await self.store.get_event(relates_to, allow_none=True) + parent_event = await self.store.get_event(relation.parent_id, allow_none=True) if parent_event: # And in the same room. if parent_event.room_id != event.room_id: @@ -1082,28 +1073,31 @@ class EventCreationHandler: else: # There must be some reason that the client knows the event exists, # see if there are existing relations. If so, assume everything is fine. - if not await self.store.event_is_target_of_relation(relates_to): + if not await self.store.event_is_target_of_relation(relation.parent_id): # Otherwise, the client can't know about the parent event! raise SynapseError(400, "Can't send relation to unknown event") # If this event is an annotation then we check that that the sender # can't annotate the same way twice (e.g. stops users from liking an # event multiple times). - if relation_type == RelationTypes.ANNOTATION: - aggregation_key = relation["key"] + if relation.rel_type == RelationTypes.ANNOTATION: + aggregation_key = relation.aggregation_key + + if aggregation_key is None: + raise SynapseError(400, "Missing aggregation key") if len(aggregation_key) > 500: raise SynapseError(400, "Aggregation key is too long") already_exists = await self.store.has_user_annotated_event( - relates_to, event.type, aggregation_key, event.sender + relation.parent_id, event.type, aggregation_key, event.sender ) if already_exists: raise SynapseError(400, "Can't send same reaction twice") # Don't attempt to start a thread if the parent event is a relation. - elif relation_type == RelationTypes.THREAD: - if await self.store.event_includes_relation(relates_to): + elif relation.rel_type == RelationTypes.THREAD: + if await self.store.event_includes_relation(relation.parent_id): raise SynapseError( 400, "Cannot start threads from an event with a relation" ) diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index c2754ec918..ab7e54857d 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import collections.abc import logging from typing import ( TYPE_CHECKING, @@ -28,7 +27,7 @@ import attr from synapse.api.constants import RelationTypes from synapse.api.errors import SynapseError -from synapse.events import EventBase +from synapse.events import EventBase, relation_from_event from synapse.storage.databases.main.relations import _RelatedEvent from synapse.types import JsonDict, Requester, StreamToken, UserID from synapse.visibility import filter_events_for_client @@ -373,20 +372,21 @@ class RelationsHandler: if event.is_state(): continue - relates_to = event.content.get("m.relates_to") - relation_type = None - if isinstance(relates_to, collections.abc.Mapping): - relation_type = relates_to.get("rel_type") + relates_to = relation_from_event(event) + if relates_to: # An event which is a replacement (ie edit) or annotation (ie, # reaction) may not have any other event related to it. - if relation_type in (RelationTypes.ANNOTATION, RelationTypes.REPLACE): + if relates_to.rel_type in ( + RelationTypes.ANNOTATION, + RelationTypes.REPLACE, + ): continue + # Track the event's relation information for later. + relations_by_id[event.event_id] = relates_to.rel_type + # The event should get bundled aggregations. events_by_id[event.event_id] = event - # Track the event's relation information for later. - if isinstance(relation_type, str): - relations_by_id[event.event_id] = relation_type # event ID -> bundled aggregation in non-serialized form. results: Dict[str, BundledAggregations] = {} diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 0ffafc882b..4ac2c546bf 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -21,7 +21,7 @@ from prometheus_client import Counter from synapse.api.constants import EventTypes, Membership, RelationTypes from synapse.event_auth import get_user_power_level -from synapse.events import EventBase +from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership @@ -78,8 +78,8 @@ def _should_count_as_unread(event: EventBase, context: EventContext) -> bool: return False # Exclude edits. - relates_to = event.content.get("m.relates_to", {}) - if relates_to.get("rel_type") == RelationTypes.REPLACE: + relates_to = relation_from_event(event) + if relates_to and relates_to.rel_type == RelationTypes.REPLACE: return False # Mark events that have a non-empty string body as unread. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f544bcfff0..42d484dc98 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -36,8 +36,8 @@ from prometheus_client import Counter import synapse.metrics from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.room_versions import RoomVersions -from synapse.events import EventBase # noqa: F401 -from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.events import EventBase, relation_from_event +from synapse.events.snapshot import EventContext from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -1807,52 +1807,45 @@ class PersistEventsStore: txn: The current database transaction. event: The event which might have relations. """ - relation = event.content.get("m.relates_to") + relation = relation_from_event(event) if not relation: - # No relations + # No relation, nothing to do. return - # Relations must have a type and parent event ID. - rel_type = relation.get("rel_type") - if not isinstance(rel_type, str): - return - - parent_id = relation.get("event_id") - if not isinstance(parent_id, str): - return - - # Annotations have a key field. - aggregation_key = None - if rel_type == RelationTypes.ANNOTATION: - aggregation_key = relation.get("key") - self.db_pool.simple_insert_txn( txn, table="event_relations", values={ "event_id": event.event_id, - "relates_to_id": parent_id, - "relation_type": rel_type, - "aggregation_key": aggregation_key, + "relates_to_id": relation.parent_id, + "relation_type": relation.rel_type, + "aggregation_key": relation.aggregation_key, }, ) - txn.call_after(self.store.get_relations_for_event.invalidate, (parent_id,)) txn.call_after( - self.store.get_aggregation_groups_for_event.invalidate, (parent_id,) + self.store.get_relations_for_event.invalidate, (relation.parent_id,) + ) + txn.call_after( + self.store.get_aggregation_groups_for_event.invalidate, + (relation.parent_id,), ) - if rel_type == RelationTypes.REPLACE: - txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,)) + if relation.rel_type == RelationTypes.REPLACE: + txn.call_after( + self.store.get_applicable_edit.invalidate, (relation.parent_id,) + ) - if rel_type == RelationTypes.THREAD: - txn.call_after(self.store.get_thread_summary.invalidate, (parent_id,)) + if relation.rel_type == RelationTypes.THREAD: + txn.call_after( + self.store.get_thread_summary.invalidate, (relation.parent_id,) + ) # It should be safe to only invalidate the cache if the user has not # previously participated in the thread, but that's difficult (and # potentially error-prone) so it is always invalidated. txn.call_after( self.store.get_thread_participated.invalidate, - (parent_id, event.sender), + (relation.parent_id, event.sender), ) def _handle_insertion_event( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 2722bf26e7..74b6560cbc 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -656,12 +656,13 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase): self._check_unread_count(3) # Check that custom events with a body increase the unread counter. - self.helper.send_event( + result = self.helper.send_event( self.room_id, "org.matrix.custom_type", {"body": "hello"}, tok=self.tok2, ) + event_id = result["event_id"] self._check_unread_count(4) # Check that edits don't increase the unread counter. @@ -671,7 +672,10 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase): content={ "body": "hello", "msgtype": "m.text", - "m.relates_to": {"rel_type": RelationTypes.REPLACE}, + "m.relates_to": { + "rel_type": RelationTypes.REPLACE, + "event_id": event_id, + }, }, tok=self.tok2, ) -- cgit 1.5.1 From 83be72d76ca171ceb0fc381aa4548c1d9fea0dc7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 16 May 2022 16:35:31 +0100 Subject: Add `StreamKeyType` class and replace string literals with constants (#12567) --- changelog.d/12567.misc | 1 + synapse/handlers/account_data.py | 10 +++--- synapse/handlers/appservice.py | 39 ++++++++++++------------ synapse/handlers/device.py | 7 +++-- synapse/handlers/devicemessage.py | 6 ++-- synapse/handlers/initial_sync.py | 13 +++++--- synapse/handlers/pagination.py | 6 ++-- synapse/handlers/presence.py | 6 ++-- synapse/handlers/receipts.py | 12 ++++++-- synapse/handlers/room.py | 9 +++--- synapse/handlers/search.py | 10 +++--- synapse/handlers/sync.py | 23 ++++++++------ synapse/handlers/typing.py | 4 +-- synapse/notifier.py | 5 +-- synapse/replication/tcp/client.py | 18 ++++++----- synapse/server_notices/server_notices_manager.py | 4 +-- synapse/storage/databases/main/e2e_room_keys.py | 4 +-- synapse/storage/databases/main/relations.py | 6 ++-- synapse/types.py | 22 +++++++++++-- 19 files changed, 125 insertions(+), 80 deletions(-) create mode 100644 changelog.d/12567.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12567.misc b/changelog.d/12567.misc new file mode 100644 index 0000000000..35f08569ba --- /dev/null +++ b/changelog.d/12567.misc @@ -0,0 +1 @@ +Replace string literal instances of stream key types with typed constants. \ No newline at end of file diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index 4af9fbc5d1..0478448b47 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -23,7 +23,7 @@ from synapse.replication.http.account_data import ( ReplicationUserAccountDataRestServlet, ) from synapse.streams import EventSource -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict, StreamKeyType, UserID if TYPE_CHECKING: from synapse.server import HomeServer @@ -105,7 +105,7 @@ class AccountDataHandler: ) self._notifier.on_new_event( - "account_data_key", max_stream_id, users=[user_id] + StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] ) await self._notify_modules(user_id, room_id, account_data_type, content) @@ -141,7 +141,7 @@ class AccountDataHandler: ) self._notifier.on_new_event( - "account_data_key", max_stream_id, users=[user_id] + StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] ) await self._notify_modules(user_id, None, account_data_type, content) @@ -176,7 +176,7 @@ class AccountDataHandler: ) self._notifier.on_new_event( - "account_data_key", max_stream_id, users=[user_id] + StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] ) return max_stream_id else: @@ -201,7 +201,7 @@ class AccountDataHandler: ) self._notifier.on_new_event( - "account_data_key", max_stream_id, users=[user_id] + StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] ) return max_stream_id else: diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 85bd5e4768..1da7bcc85b 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -38,6 +38,7 @@ from synapse.types import ( JsonDict, RoomAlias, RoomStreamToken, + StreamKeyType, UserID, ) from synapse.util.async_helpers import Linearizer @@ -213,8 +214,8 @@ class ApplicationServicesHandler: Args: stream_key: The stream the event came from. - `stream_key` can be "typing_key", "receipt_key", "presence_key", - "to_device_key" or "device_list_key". Any other value for `stream_key` + `stream_key` can be StreamKeyType.TYPING, StreamKeyType.RECEIPT, StreamKeyType.PRESENCE, + StreamKeyType.TO_DEVICE or StreamKeyType.DEVICE_LIST. Any other value for `stream_key` will cause this function to return early. Ephemeral events will only be pushed to appservices that have opted into @@ -235,11 +236,11 @@ class ApplicationServicesHandler: # Only the following streams are currently supported. # FIXME: We should use constants for these values. if stream_key not in ( - "typing_key", - "receipt_key", - "presence_key", - "to_device_key", - "device_list_key", + StreamKeyType.TYPING, + StreamKeyType.RECEIPT, + StreamKeyType.PRESENCE, + StreamKeyType.TO_DEVICE, + StreamKeyType.DEVICE_LIST, ): return @@ -258,14 +259,14 @@ class ApplicationServicesHandler: # Ignore to-device messages if the feature flag is not enabled if ( - stream_key == "to_device_key" + stream_key == StreamKeyType.TO_DEVICE and not self._msc2409_to_device_messages_enabled ): return # Ignore device lists if the feature flag is not enabled if ( - stream_key == "device_list_key" + stream_key == StreamKeyType.DEVICE_LIST and not self._msc3202_transaction_extensions_enabled ): return @@ -283,15 +284,15 @@ class ApplicationServicesHandler: if ( stream_key in ( - "typing_key", - "receipt_key", - "presence_key", - "to_device_key", + StreamKeyType.TYPING, + StreamKeyType.RECEIPT, + StreamKeyType.PRESENCE, + StreamKeyType.TO_DEVICE, ) and service.supports_ephemeral ) or ( - stream_key == "device_list_key" + stream_key == StreamKeyType.DEVICE_LIST and service.msc3202_transaction_extensions ) ] @@ -317,7 +318,7 @@ class ApplicationServicesHandler: logger.debug("Checking interested services for %s", stream_key) with Measure(self.clock, "notify_interested_services_ephemeral"): for service in services: - if stream_key == "typing_key": + if stream_key == StreamKeyType.TYPING: # Note that we don't persist the token (via set_appservice_stream_type_pos) # for typing_key due to performance reasons and due to their highly # ephemeral nature. @@ -333,7 +334,7 @@ class ApplicationServicesHandler: async with self._ephemeral_events_linearizer.queue( (service.id, stream_key) ): - if stream_key == "receipt_key": + if stream_key == StreamKeyType.RECEIPT: events = await self._handle_receipts(service, new_token) self.scheduler.enqueue_for_appservice(service, ephemeral=events) @@ -342,7 +343,7 @@ class ApplicationServicesHandler: service, "read_receipt", new_token ) - elif stream_key == "presence_key": + elif stream_key == StreamKeyType.PRESENCE: events = await self._handle_presence(service, users, new_token) self.scheduler.enqueue_for_appservice(service, ephemeral=events) @@ -351,7 +352,7 @@ class ApplicationServicesHandler: service, "presence", new_token ) - elif stream_key == "to_device_key": + elif stream_key == StreamKeyType.TO_DEVICE: # Retrieve a list of to-device message events, as well as the # maximum stream token of the messages we were able to retrieve. to_device_messages = await self._get_to_device_messages( @@ -366,7 +367,7 @@ class ApplicationServicesHandler: service, "to_device", new_token ) - elif stream_key == "device_list_key": + elif stream_key == StreamKeyType.DEVICE_LIST: device_list_summary = await self._get_device_list_summary( service, new_token ) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index a91b1ee4d5..1d6d1f8a92 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -43,6 +43,7 @@ from synapse.metrics.background_process_metrics import ( ) from synapse.types import ( JsonDict, + StreamKeyType, StreamToken, UserID, get_domain_from_id, @@ -502,7 +503,7 @@ class DeviceHandler(DeviceWorkerHandler): # specify the user ID too since the user should always get their own device list # updates, even if they aren't in any rooms. self.notifier.on_new_event( - "device_list_key", position, users={user_id}, rooms=room_ids + StreamKeyType.DEVICE_LIST, position, users={user_id}, rooms=room_ids ) # We may need to do some processing asynchronously for local user IDs. @@ -523,7 +524,9 @@ class DeviceHandler(DeviceWorkerHandler): from_user_id, user_ids ) - self.notifier.on_new_event("device_list_key", position, users=[from_user_id]) + self.notifier.on_new_event( + StreamKeyType.DEVICE_LIST, position, users=[from_user_id] + ) async def user_left_room(self, user: UserID, room_id: str) -> None: user_id = user.to_string() diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index 4cb725d027..53668cce3b 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -26,7 +26,7 @@ from synapse.logging.opentracing import ( set_tag, ) from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet -from synapse.types import JsonDict, Requester, UserID, get_domain_from_id +from synapse.types import JsonDict, Requester, StreamKeyType, UserID, get_domain_from_id from synapse.util import json_encoder from synapse.util.stringutils import random_string @@ -151,7 +151,7 @@ class DeviceMessageHandler: # Notify listeners that there are new to-device messages to process, # handing them the latest stream id. self.notifier.on_new_event( - "to_device_key", last_stream_id, users=local_messages.keys() + StreamKeyType.TO_DEVICE, last_stream_id, users=local_messages.keys() ) async def _check_for_unknown_devices( @@ -285,7 +285,7 @@ class DeviceMessageHandler: # Notify listeners that there are new to-device messages to process, # handing them the latest stream id. self.notifier.on_new_event( - "to_device_key", last_stream_id, users=local_messages.keys() + StreamKeyType.TO_DEVICE, last_stream_id, users=local_messages.keys() ) if self.federation_sender: diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index de09aed3a3..d79248ad90 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -30,6 +30,7 @@ from synapse.types import ( Requester, RoomStreamToken, StateMap, + StreamKeyType, StreamToken, UserID, ) @@ -220,8 +221,10 @@ class InitialSyncHandler: self.storage, user_id, messages ) - start_token = now_token.copy_and_replace("room_key", token) - end_token = now_token.copy_and_replace("room_key", room_end_token) + start_token = now_token.copy_and_replace(StreamKeyType.ROOM, token) + end_token = now_token.copy_and_replace( + StreamKeyType.ROOM, room_end_token + ) time_now = self.clock.time_msec() d["messages"] = { @@ -369,8 +372,8 @@ class InitialSyncHandler: self.storage, user_id, messages, is_peeking=is_peeking ) - start_token = StreamToken.START.copy_and_replace("room_key", token) - end_token = StreamToken.START.copy_and_replace("room_key", stream_token) + start_token = StreamToken.START.copy_and_replace(StreamKeyType.ROOM, token) + end_token = StreamToken.START.copy_and_replace(StreamKeyType.ROOM, stream_token) time_now = self.clock.time_msec() @@ -474,7 +477,7 @@ class InitialSyncHandler: self.storage, user_id, messages, is_peeking=is_peeking ) - start_token = now_token.copy_and_replace("room_key", token) + start_token = now_token.copy_and_replace(StreamKeyType.ROOM, token) end_token = now_token time_now = self.clock.time_msec() diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 2e30180094..6ae88add95 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -27,7 +27,7 @@ from synapse.handlers.room import ShutdownRoomResponse from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig -from synapse.types import JsonDict, Requester +from synapse.types import JsonDict, Requester, StreamKeyType from synapse.util.async_helpers import ReadWriteLock from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client @@ -491,7 +491,7 @@ class PaginationHandler: if leave_token.topological < curr_topo: from_token = from_token.copy_and_replace( - "room_key", leave_token + StreamKeyType.ROOM, leave_token ) await self.hs.get_federation_handler().maybe_backfill( @@ -513,7 +513,7 @@ class PaginationHandler: event_filter=event_filter, ) - next_token = from_token.copy_and_replace("room_key", next_key) + next_token = from_token.copy_and_replace(StreamKeyType.ROOM, next_key) if events: if event_filter: diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 268481ec19..dd84e6c88b 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -66,7 +66,7 @@ from synapse.replication.tcp.commands import ClearUserSyncsCommand from synapse.replication.tcp.streams import PresenceFederationStream, PresenceStream from synapse.storage.databases.main import DataStore from synapse.streams import EventSource -from synapse.types import JsonDict, UserID, get_domain_from_id +from synapse.types import JsonDict, StreamKeyType, UserID, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.metrics import Measure @@ -522,7 +522,7 @@ class WorkerPresenceHandler(BasePresenceHandler): room_ids_to_states, users_to_states = parties self.notifier.on_new_event( - "presence_key", + StreamKeyType.PRESENCE, stream_id, rooms=room_ids_to_states.keys(), users=users_to_states.keys(), @@ -1145,7 +1145,7 @@ class PresenceHandler(BasePresenceHandler): room_ids_to_states, users_to_states = parties self.notifier.on_new_event( - "presence_key", + StreamKeyType.PRESENCE, stream_id, rooms=room_ids_to_states.keys(), users=[UserID.from_string(u) for u in users_to_states], diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 550d58b0e1..e6a35f1d09 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -17,7 +17,13 @@ from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple from synapse.api.constants import ReceiptTypes from synapse.appservice import ApplicationService from synapse.streams import EventSource -from synapse.types import JsonDict, ReadReceipt, UserID, get_domain_from_id +from synapse.types import ( + JsonDict, + ReadReceipt, + StreamKeyType, + UserID, + get_domain_from_id, +) if TYPE_CHECKING: from synapse.server import HomeServer @@ -129,7 +135,9 @@ class ReceiptsHandler: affected_room_ids = list({r.room_id for r in receipts}) - self.notifier.on_new_event("receipt_key", max_batch_id, rooms=affected_room_ids) + self.notifier.on_new_event( + StreamKeyType.RECEIPT, max_batch_id, rooms=affected_room_ids + ) # Note that the min here shouldn't be relied upon to be accurate. await self.hs.get_pusherpool().on_new_receipts( min_batch_id, max_batch_id, affected_room_ids diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 23baa50d03..a2973109ad 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -73,6 +73,7 @@ from synapse.types import ( RoomID, RoomStreamToken, StateMap, + StreamKeyType, StreamToken, UserID, create_requester, @@ -1292,10 +1293,10 @@ class RoomContextHandler: events_after=events_after, state=await filter_evts(state_events), aggregations=aggregations, - start=await token.copy_and_replace("room_key", results.start).to_string( - self.store - ), - end=await token.copy_and_replace("room_key", results.end).to_string( + start=await token.copy_and_replace( + StreamKeyType.ROOM, results.start + ).to_string(self.store), + end=await token.copy_and_replace(StreamKeyType.ROOM, results.end).to_string( self.store ), ) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 5619f8f50e..cd1c47dae8 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -24,7 +24,7 @@ from synapse.api.errors import NotFoundError, SynapseError from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.storage.state import StateFilter -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict, StreamKeyType, UserID from synapse.visibility import filter_events_for_client if TYPE_CHECKING: @@ -655,11 +655,11 @@ class SearchHandler: "events_before": events_before, "events_after": events_after, "start": await now_token.copy_and_replace( - "room_key", res.start + StreamKeyType.ROOM, res.start + ).to_string(self.store), + "end": await now_token.copy_and_replace( + StreamKeyType.ROOM, res.end ).to_string(self.store), - "end": await now_token.copy_and_replace("room_key", res.end).to_string( - self.store - ), } if include_profile: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 2c555a66d0..4be08fe7cb 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -37,6 +37,7 @@ from synapse.types import ( Requester, RoomStreamToken, StateMap, + StreamKeyType, StreamToken, UserID, ) @@ -449,7 +450,7 @@ class SyncHandler: room_ids=room_ids, is_guest=sync_config.is_guest, ) - now_token = now_token.copy_and_replace("typing_key", typing_key) + now_token = now_token.copy_and_replace(StreamKeyType.TYPING, typing_key) ephemeral_by_room: JsonDict = {} @@ -471,7 +472,7 @@ class SyncHandler: room_ids=room_ids, is_guest=sync_config.is_guest, ) - now_token = now_token.copy_and_replace("receipt_key", receipt_key) + now_token = now_token.copy_and_replace(StreamKeyType.RECEIPT, receipt_key) for event in receipts: room_id = event["room_id"] @@ -537,7 +538,9 @@ class SyncHandler: prev_batch_token = now_token if recents: room_key = recents[0].internal_metadata.before - prev_batch_token = now_token.copy_and_replace("room_key", room_key) + prev_batch_token = now_token.copy_and_replace( + StreamKeyType.ROOM, room_key + ) return TimelineBatch( events=recents, prev_batch=prev_batch_token, limited=False @@ -611,7 +614,7 @@ class SyncHandler: recents = recents[-timeline_limit:] room_key = recents[0].internal_metadata.before - prev_batch_token = now_token.copy_and_replace("room_key", room_key) + prev_batch_token = now_token.copy_and_replace(StreamKeyType.ROOM, room_key) # Don't bother to bundle aggregations if the timeline is unlimited, # as clients will have all the necessary information. @@ -1398,7 +1401,7 @@ class SyncHandler: now_token.to_device_key, ) sync_result_builder.now_token = now_token.copy_and_replace( - "to_device_key", stream_id + StreamKeyType.TO_DEVICE, stream_id ) sync_result_builder.to_device = messages else: @@ -1503,7 +1506,7 @@ class SyncHandler: ) assert presence_key sync_result_builder.now_token = now_token.copy_and_replace( - "presence_key", presence_key + StreamKeyType.PRESENCE, presence_key ) extra_users_ids = set(newly_joined_or_invited_users) @@ -1826,7 +1829,7 @@ class SyncHandler: # stream token as it'll only be used in the context of this # room. (c.f. the docstring of `to_room_stream_token`). leave_token = since_token.copy_and_replace( - "room_key", leave_position.to_room_stream_token() + StreamKeyType.ROOM, leave_position.to_room_stream_token() ) # If this is an out of band message, like a remote invite @@ -1875,7 +1878,9 @@ class SyncHandler: if room_entry: events, start_key = room_entry - prev_batch_token = now_token.copy_and_replace("room_key", start_key) + prev_batch_token = now_token.copy_and_replace( + StreamKeyType.ROOM, start_key + ) entry = RoomSyncResultBuilder( room_id=room_id, @@ -1972,7 +1977,7 @@ class SyncHandler: continue leave_token = now_token.copy_and_replace( - "room_key", RoomStreamToken(None, event.stream_ordering) + StreamKeyType.ROOM, RoomStreamToken(None, event.stream_ordering) ) room_entries.append( RoomSyncResultBuilder( diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 6854428b7c..bb00750bfd 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -25,7 +25,7 @@ from synapse.metrics.background_process_metrics import ( ) from synapse.replication.tcp.streams import TypingStream from synapse.streams import EventSource -from synapse.types import JsonDict, Requester, UserID, get_domain_from_id +from synapse.types import JsonDict, Requester, StreamKeyType, UserID, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.metrics import Measure from synapse.util.wheel_timer import WheelTimer @@ -382,7 +382,7 @@ class TypingWriterHandler(FollowerTypingHandler): ) self.notifier.on_new_event( - "typing_key", self._latest_room_serial, rooms=[member.room_id] + StreamKeyType.TYPING, self._latest_room_serial, rooms=[member.room_id] ) async def get_all_typing_updates( diff --git a/synapse/notifier.py b/synapse/notifier.py index 01a50b9d62..ba23257f54 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -46,6 +46,7 @@ from synapse.types import ( JsonDict, PersistedEventPosition, RoomStreamToken, + StreamKeyType, StreamToken, UserID, ) @@ -370,7 +371,7 @@ class Notifier: if users or rooms: self.on_new_event( - "room_key", + StreamKeyType.ROOM, max_room_stream_token, users=users, rooms=rooms, @@ -440,7 +441,7 @@ class Notifier: for room in rooms: user_streams |= self.room_to_user_streams.get(room, set()) - if stream_key == "to_device_key": + if stream_key == StreamKeyType.TO_DEVICE: issue9533_logger.debug( "to-device messages stream id %s, awaking streams for %s", new_token, diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 350762f494..a52e25c1af 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -43,7 +43,7 @@ from synapse.replication.tcp.streams.events import ( EventsStreamEventRow, EventsStreamRow, ) -from synapse.types import PersistedEventPosition, ReadReceipt, UserID +from synapse.types import PersistedEventPosition, ReadReceipt, StreamKeyType, UserID from synapse.util.async_helpers import Linearizer, timeout_deferred from synapse.util.metrics import Measure @@ -153,19 +153,19 @@ class ReplicationDataHandler: if stream_name == TypingStream.NAME: self._typing_handler.process_replication_rows(token, rows) self.notifier.on_new_event( - "typing_key", token, rooms=[row.room_id for row in rows] + StreamKeyType.TYPING, token, rooms=[row.room_id for row in rows] ) elif stream_name == PushRulesStream.NAME: self.notifier.on_new_event( - "push_rules_key", token, users=[row.user_id for row in rows] + StreamKeyType.PUSH_RULES, token, users=[row.user_id for row in rows] ) elif stream_name in (AccountDataStream.NAME, TagAccountDataStream.NAME): self.notifier.on_new_event( - "account_data_key", token, users=[row.user_id for row in rows] + StreamKeyType.ACCOUNT_DATA, token, users=[row.user_id for row in rows] ) elif stream_name == ReceiptsStream.NAME: self.notifier.on_new_event( - "receipt_key", token, rooms=[row.room_id for row in rows] + StreamKeyType.RECEIPT, token, rooms=[row.room_id for row in rows] ) await self._pusher_pool.on_new_receipts( token, token, {row.room_id for row in rows} @@ -173,14 +173,18 @@ class ReplicationDataHandler: elif stream_name == ToDeviceStream.NAME: entities = [row.entity for row in rows if row.entity.startswith("@")] if entities: - self.notifier.on_new_event("to_device_key", token, users=entities) + self.notifier.on_new_event( + StreamKeyType.TO_DEVICE, token, users=entities + ) elif stream_name == DeviceListsStream.NAME: all_room_ids: Set[str] = set() for row in rows: if row.entity.startswith("@"): room_ids = await self.store.get_rooms_for_user(row.entity) all_room_ids.update(room_ids) - self.notifier.on_new_event("device_list_key", token, rooms=all_room_ids) + self.notifier.on_new_event( + StreamKeyType.DEVICE_LIST, token, rooms=all_room_ids + ) elif stream_name == GroupServerStream.NAME: self.notifier.on_new_event( "groups_key", token, users=[row.user_id for row in rows] diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py index c2c37e1015..8ecab86ec7 100644 --- a/synapse/server_notices/server_notices_manager.py +++ b/synapse/server_notices/server_notices_manager.py @@ -16,7 +16,7 @@ from typing import TYPE_CHECKING, Optional from synapse.api.constants import EventTypes, Membership, RoomCreationPreset from synapse.events import EventBase -from synapse.types import Requester, UserID, create_requester +from synapse.types import Requester, StreamKeyType, UserID, create_requester from synapse.util.caches.descriptors import cached if TYPE_CHECKING: @@ -189,7 +189,7 @@ class ServerNoticesManager: max_id = await self._account_data_handler.add_tag_to_room( user_id, room_id, SERVER_NOTICE_ROOM_TAG, {} ) - self._notifier.on_new_event("account_data_key", max_id, users=[user_id]) + self._notifier.on_new_event(StreamKeyType.ACCOUNT_DATA, max_id, users=[user_id]) logger.info("Created server notices room %s for %s", room_id, user_id) return room_id diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py index b789a588a5..af59be6b48 100644 --- a/synapse/storage/databases/main/e2e_room_keys.py +++ b/synapse/storage/databases/main/e2e_room_keys.py @@ -21,7 +21,7 @@ from synapse.api.errors import StoreError from synapse.logging.opentracing import log_kv, trace from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import LoggingTransaction -from synapse.types import JsonDict, JsonSerializable +from synapse.types import JsonDict, JsonSerializable, StreamKeyType from synapse.util import json_encoder @@ -126,7 +126,7 @@ class EndToEndRoomKeyStore(SQLBaseStore): "message": "Set room key", "room_id": room_id, "session_id": session_id, - "room_key": room_key, + StreamKeyType.ROOM: room_key, } ) diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 484976ca6b..fe8fded88b 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -34,7 +34,7 @@ from synapse.storage._base import SQLBaseStore from synapse.storage.database import LoggingTransaction, make_in_list_sql_clause from synapse.storage.databases.main.stream import generate_pagination_where_clause from synapse.storage.engines import PostgresEngine -from synapse.types import JsonDict, RoomStreamToken, StreamToken +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken from synapse.util.caches.descriptors import cached, cachedList logger = logging.getLogger(__name__) @@ -161,7 +161,9 @@ class RelationsWorkerStore(SQLBaseStore): if len(events) > limit and last_topo_id and last_stream_id: next_key = RoomStreamToken(last_topo_id, last_stream_id) if from_token: - next_token = from_token.copy_and_replace("room_key", next_key) + next_token = from_token.copy_and_replace( + StreamKeyType.ROOM, next_key + ) else: next_token = StreamToken( room_key=next_key, diff --git a/synapse/types.py b/synapse/types.py index 325332a6e0..bd8071d51d 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -37,7 +37,7 @@ import attr from frozendict import frozendict from signedjson.key import decode_verify_key_bytes from signedjson.types import VerifyKey -from typing_extensions import TypedDict +from typing_extensions import Final, TypedDict from unpaddedbase64 import decode_base64 from zope.interface import Interface @@ -630,6 +630,22 @@ class RoomStreamToken: return "s%d" % (self.stream,) +class StreamKeyType: + """Known stream types. + + A stream is a list of entities ordered by an incrementing "stream token". + """ + + ROOM: Final = "room_key" + PRESENCE: Final = "presence_key" + TYPING: Final = "typing_key" + RECEIPT: Final = "receipt_key" + ACCOUNT_DATA: Final = "account_data_key" + PUSH_RULES: Final = "push_rules_key" + TO_DEVICE: Final = "to_device_key" + DEVICE_LIST: Final = "device_list_key" + + @attr.s(slots=True, frozen=True, auto_attribs=True) class StreamToken: """A collection of keys joined together by underscores in the following @@ -743,9 +759,9 @@ class StreamToken: :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken. """ - if key == "room_key": + if key == StreamKeyType.ROOM: new_token = self.copy_and_replace( - "room_key", self.room_key.copy_and_advance(new_value) + StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value) ) return new_token -- cgit 1.5.1 From fcf951d5dc7ca8c4cb18aa9c1f5ccb005df3610a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 May 2022 10:34:27 +0100 Subject: Track in memory events using weakrefs (#10533) --- changelog.d/10533.misc | 1 + synapse/storage/databases/main/events_worker.py | 35 ++++++++++++++++++++-- tests/handlers/test_sync.py | 1 + tests/storage/databases/main/test_events_worker.py | 25 ++++++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10533.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/10533.misc b/changelog.d/10533.misc new file mode 100644 index 0000000000..f70dc6496f --- /dev/null +++ b/changelog.d/10533.misc @@ -0,0 +1 @@ +Improve event caching mechanism to avoid having multiple copies of an event in memory at a time. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index a4a604a499..5b22d6b452 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -14,6 +14,7 @@ import logging import threading +import weakref from enum import Enum, auto from typing import ( TYPE_CHECKING, @@ -23,6 +24,7 @@ from typing import ( Dict, Iterable, List, + MutableMapping, Optional, Set, Tuple, @@ -248,6 +250,12 @@ class EventsWorkerStore(SQLBaseStore): str, ObservableDeferred[Dict[str, EventCacheEntry]] ] = {} + # We keep track of the events we have currently loaded in memory so that + # we can reuse them even if they've been evicted from the cache. We only + # track events that don't need redacting in here (as then we don't need + # to track redaction status). + self._event_ref: MutableMapping[str, EventBase] = weakref.WeakValueDictionary() + self._event_fetch_lock = threading.Condition() self._event_fetch_list: List[ Tuple[Iterable[str], "defer.Deferred[Dict[str, _EventRow]]"] @@ -723,6 +731,8 @@ class EventsWorkerStore(SQLBaseStore): def _invalidate_get_event_cache(self, event_id: str) -> None: self._get_event_cache.invalidate((event_id,)) + self._event_ref.pop(event_id, None) + self._current_event_fetches.pop(event_id, None) def _get_events_from_cache( self, events: Iterable[str], update_metrics: bool = True @@ -738,13 +748,30 @@ class EventsWorkerStore(SQLBaseStore): event_map = {} for event_id in events: + # First check if it's in the event cache ret = self._get_event_cache.get( (event_id,), None, update_metrics=update_metrics ) - if not ret: + if ret: + event_map[event_id] = ret continue - event_map[event_id] = ret + # Otherwise check if we still have the event in memory. + event = self._event_ref.get(event_id) + if event: + # Reconstruct an event cache entry + + cache_entry = EventCacheEntry( + event=event, + # We don't cache weakrefs to redacted events, so we know + # this is None. + redacted_event=None, + ) + event_map[event_id] = cache_entry + + # We add the entry back into the cache as we want to keep + # recently queried events in the cache. + self._get_event_cache.set((event_id,), cache_entry) return event_map @@ -1124,6 +1151,10 @@ class EventsWorkerStore(SQLBaseStore): self._get_event_cache.set((event_id,), cache_entry) result_map[event_id] = cache_entry + if not redacted_event: + # We only cache references to unredacted events. + self._event_ref[event_id] = original_ev + return result_map async def _enqueue_events(self, events: Collection[str]) -> Dict[str, _EventRow]: diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py index 865b8b7e47..db3302a4c7 100644 --- a/tests/handlers/test_sync.py +++ b/tests/handlers/test_sync.py @@ -160,6 +160,7 @@ class SyncTestCase(tests.unittest.HomeserverTestCase): # Blow away caches (supported room versions can only change due to a restart). self.store.get_rooms_for_user_with_stream_ordering.invalidate_all() self.store._get_event_cache.clear() + self.store._event_ref.clear() # The rooms should be excluded from the sync response. # Get a new request key. diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py index c237a8c7e2..38963ce4a7 100644 --- a/tests/storage/databases/main/test_events_worker.py +++ b/tests/storage/databases/main/test_events_worker.py @@ -154,6 +154,31 @@ class EventCacheTestCase(unittest.HomeserverTestCase): # We should have fetched the event from the DB self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1) + def test_event_ref(self): + """Test that we reuse events that are still in memory but have fallen + out of the cache, rather than requesting them from the DB. + """ + + # Reset the event cache + self.store._get_event_cache.clear() + + with LoggingContext("test") as ctx: + # We keep hold of the event event though we never use it. + event = self.get_success(self.store.get_event(self.event_id)) # noqa: F841 + + # We should have fetched the event from the DB + self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1) + + # Reset the event cache + self.store._get_event_cache.clear() + + with LoggingContext("test") as ctx: + self.get_success(self.store.get_event(self.event_id)) + + # Since the event is still in memory we shouldn't have fetched it + # from the DB + self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 0) + def test_dedupe(self): """Test that if we request the same event multiple times we only pull it out once. -- cgit 1.5.1 From 32ef24fbd74b8822c3e57c8ce74b979506aea7be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 May 2022 10:34:59 +0100 Subject: Add index to cache invalidations (#12747) For workers that rarely write to the cache the `get_all_updated_caches` query can become expensive if the worker falls behind when reading the cache. --- changelog.d/12747.bugfix | 1 + synapse/storage/databases/main/cache.py | 8 ++++++++ .../main/delta/69/02cache_invalidation_index.sql | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 changelog.d/12747.bugfix create mode 100644 synapse/storage/schema/main/delta/69/02cache_invalidation_index.sql (limited to 'synapse/storage/databases') diff --git a/changelog.d/12747.bugfix b/changelog.d/12747.bugfix new file mode 100644 index 0000000000..0fb0059237 --- /dev/null +++ b/changelog.d/12747.bugfix @@ -0,0 +1 @@ +Fix poor database performance when reading the cache invalidation stream for large servers with lots of workers. diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index dd4e83a2ad..1653a6a9b6 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -57,6 +57,14 @@ class CacheInvalidationWorkerStore(SQLBaseStore): self._instance_name = hs.get_instance_name() + self.db_pool.updates.register_background_index_update( + update_name="cache_invalidation_index_by_instance", + index_name="cache_invalidation_stream_by_instance_instance_index", + table="cache_invalidation_stream_by_instance", + columns=("instance_name", "stream_id"), + psql_only=True, # The table is only on postgres DBs. + ) + async def get_all_updated_caches( self, instance_name: str, last_id: int, current_id: int, limit: int ) -> Tuple[List[Tuple[int, tuple]], int, bool]: diff --git a/synapse/storage/schema/main/delta/69/02cache_invalidation_index.sql b/synapse/storage/schema/main/delta/69/02cache_invalidation_index.sql new file mode 100644 index 0000000000..22ae3b8c00 --- /dev/null +++ b/synapse/storage/schema/main/delta/69/02cache_invalidation_index.sql @@ -0,0 +1,18 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Background update to clear the inboxes of hidden and deleted devices. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6902, 'cache_invalidation_index_by_instance', '{}'); -- cgit 1.5.1 From 24b590de32154eb3965220bd62715e52b37b4074 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 17 May 2022 12:07:18 +0200 Subject: Remove code which updates `application_services_state.last_txn` (#12680) This column is unused as of #12209, so let's stop writing to it. --- changelog.d/12680.misc | 1 + synapse/storage/databases/main/appservice.py | 47 ++++++++++++++-------------- synapse/storage/schema/__init__.py | 5 ++- tests/handlers/test_appservice.py | 10 ------ tests/storage/test_appservice.py | 27 ++++------------ 5 files changed, 35 insertions(+), 55 deletions(-) create mode 100644 changelog.d/12680.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12680.misc b/changelog.d/12680.misc new file mode 100644 index 0000000000..dfd1f0a6c6 --- /dev/null +++ b/changelog.d/12680.misc @@ -0,0 +1 @@ +Remove code which updates unused database column `application_services_state.last_txn`. diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 945707b0ec..e284454b66 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -203,19 +203,29 @@ class ApplicationServiceTransactionWorkerStore( """Get the application service state. Args: - service: The service whose state to set. + service: The service whose state to get. Returns: - An ApplicationServiceState or none. + An ApplicationServiceState, or None if we have yet to attempt any + transactions to the AS. """ - result = await self.db_pool.simple_select_one( + # if we have created transactions for this AS but not yet attempted to send + # them, we will have a row in the table with state=NULL (recording the stream + # positions we have processed up to). + # + # On the other hand, if we have yet to create any transactions for this AS at + # all, then there will be no row for the AS. + # + # In either case, we return None to indicate "we don't yet know the state of + # this AS". + result = await self.db_pool.simple_select_one_onecol( "application_services_state", {"as_id": service.id}, - ["state"], + retcol="state", allow_none=True, desc="get_appservice_state", ) if result: - return ApplicationServiceState(result.get("state")) + return ApplicationServiceState(result) return None async def set_appservice_state( @@ -296,14 +306,6 @@ class ApplicationServiceTransactionWorkerStore( """ def _complete_appservice_txn(txn: LoggingTransaction) -> None: - # Set current txn_id for AS to 'txn_id' - self.db_pool.simple_upsert_txn( - txn, - "application_services_state", - {"as_id": service.id}, - {"last_txn": txn_id}, - ) - # Delete txn self.db_pool.simple_delete_txn( txn, @@ -452,16 +454,15 @@ class ApplicationServiceTransactionWorkerStore( % (stream_type,) ) - def set_appservice_stream_type_pos_txn(txn: LoggingTransaction) -> None: - stream_id_type = "%s_stream_id" % stream_type - txn.execute( - "UPDATE application_services_state SET %s = ? WHERE as_id=?" - % stream_id_type, - (pos, service.id), - ) - - await self.db_pool.runInteraction( - "set_appservice_stream_type_pos", set_appservice_stream_type_pos_txn + # this may be the first time that we're recording any state for this AS, so + # we don't yet know if a row for it exists; hence we have to upsert here. + await self.db_pool.simple_upsert( + table="application_services_state", + keyvalues={"as_id": service.id}, + values={f"{stream_type}_stream_id": pos}, + # no need to lock when emulating upsert: as_id is a unique key + lock=False, + desc="set_appservice_stream_type_pos", ) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 20c344faea..da98f05e03 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -61,7 +61,9 @@ Changes in SCHEMA_VERSION = 68: Changes in SCHEMA_VERSION = 69: - We now write to `device_lists_changes_in_room` table. - - Use sequence to generate future `application_services_txns.txn_id`s + - We now use a PostgreSQL sequence to generate future txn_ids for + `application_services_txns`. `application_services_state.last_txn` is no longer + updated. Changes in SCHEMA_VERSION = 70: - event_reference_hashes is no longer written to. @@ -71,6 +73,7 @@ Changes in SCHEMA_VERSION = 70: SCHEMA_COMPAT_VERSION = ( # We now assume that `device_lists_changes_in_room` has been filled out for # recent device_list_updates. + # ... and that `application_services_state.last_txn` is not used. 69 ) """Limit on how far the synapse codebase can be rolled back without breaking db compat diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index 5b0cd1ab86..53e7a5d81b 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -434,16 +434,6 @@ class ApplicationServicesHandlerSendEventsTestCase(unittest.HomeserverTestCase): }, ) - # "Complete" a transaction. - # All this really does for us is make an entry in the application_services_state - # database table, which tracks the current stream_token per stream ID per AS. - self.get_success( - self.hs.get_datastores().main.complete_appservice_txn( - 0, - interested_appservice, - ) - ) - # Now, pretend that we receive a large burst of read receipts (300 total) that # all come in at once. for i in range(300): diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index 1bf93e79a7..1047ed09c8 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -14,7 +14,7 @@ import json import os import tempfile -from typing import List, Optional, cast +from typing import List, cast from unittest.mock import Mock import yaml @@ -149,15 +149,12 @@ class ApplicationServiceTransactionStoreTestCase(unittest.HomeserverTestCase): outfile.write(yaml.dump(as_yaml)) self.as_yaml_files.append(as_token) - def _set_state( - self, id: str, state: ApplicationServiceState, txn: Optional[int] = None - ): + def _set_state(self, id: str, state: ApplicationServiceState): return self.db_pool.runOperation( self.engine.convert_param_style( - "INSERT INTO application_services_state(as_id, state, last_txn) " - "VALUES(?,?,?)" + "INSERT INTO application_services_state(as_id, state) VALUES(?,?)" ), - (id, state.value, txn), + (id, state.value), ) def _insert_txn(self, as_id, txn_id, events): @@ -280,17 +277,6 @@ class ApplicationServiceTransactionStoreTestCase(unittest.HomeserverTestCase): self.store.complete_appservice_txn(txn_id=txn_id, service=service) ) - res = self.get_success( - self.db_pool.runQuery( - self.engine.convert_param_style( - "SELECT last_txn FROM application_services_state WHERE as_id=?" - ), - (service.id,), - ) - ) - self.assertEqual(1, len(res)) - self.assertEqual(txn_id, res[0][0]) - res = self.get_success( self.db_pool.runQuery( self.engine.convert_param_style( @@ -316,14 +302,13 @@ class ApplicationServiceTransactionStoreTestCase(unittest.HomeserverTestCase): res = self.get_success( self.db_pool.runQuery( self.engine.convert_param_style( - "SELECT last_txn, state FROM application_services_state WHERE as_id=?" + "SELECT state FROM application_services_state WHERE as_id=?" ), (service.id,), ) ) self.assertEqual(1, len(res)) - self.assertEqual(txn_id, res[0][0]) - self.assertEqual(ApplicationServiceState.UP.value, res[0][1]) + self.assertEqual(ApplicationServiceState.UP.value, res[0][0]) res = self.get_success( self.db_pool.runQuery( -- cgit 1.5.1 From 6edefef60289cc54e17fd6af838eb66c4973f5f5 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 17 May 2022 16:29:06 +0200 Subject: Add some type hints to datastore (#12717) --- changelog.d/12717.misc | 1 + mypy.ini | 2 - synapse/federation/sender/__init__.py | 24 +++- synapse/handlers/sync.py | 6 +- synapse/rest/client/push_rule.py | 4 +- synapse/state/__init__.py | 4 +- synapse/storage/databases/main/__init__.py | 8 +- synapse/storage/databases/main/metrics.py | 56 ++++---- synapse/storage/databases/main/push_rule.py | 184 +++++++++++++++++---------- synapse/storage/databases/main/roommember.py | 126 +++++++++++------- 10 files changed, 254 insertions(+), 161 deletions(-) create mode 100644 changelog.d/12717.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12717.misc b/changelog.d/12717.misc new file mode 100644 index 0000000000..e793d08e5e --- /dev/null +++ b/changelog.d/12717.misc @@ -0,0 +1 @@ +Add some type hints to datastore. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index b5b907973f..45668974b3 100644 --- a/mypy.ini +++ b/mypy.ini @@ -28,8 +28,6 @@ exclude = (?x) |synapse/storage/databases/main/cache.py |synapse/storage/databases/main/devices.py |synapse/storage/databases/main/event_federation.py - |synapse/storage/databases/main/push_rule.py - |synapse/storage/databases/main/roommember.py |synapse/storage/schema/ |tests/api/test_auth.py diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 6d2f46318b..dbe303ed9b 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -15,7 +15,17 @@ import abc import logging from collections import OrderedDict -from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Hashable, + Iterable, + List, + Optional, + Set, + Tuple, +) import attr from prometheus_client import Counter @@ -409,7 +419,7 @@ class FederationSender(AbstractFederationSender): ) return - destinations: Optional[Set[str]] = None + destinations: Optional[Collection[str]] = None if not event.prev_event_ids(): # If there are no prev event IDs then the state is empty # and so no remote servers in the room @@ -444,7 +454,7 @@ class FederationSender(AbstractFederationSender): ) return - destinations = { + sharded_destinations = { d for d in destinations if self._federation_shard_config.should_handle( @@ -456,12 +466,12 @@ class FederationSender(AbstractFederationSender): # If we are sending the event on behalf of another server # then it already has the event and there is no reason to # send the event to it. - destinations.discard(send_on_behalf_of) + sharded_destinations.discard(send_on_behalf_of) - logger.debug("Sending %s to %r", event, destinations) + logger.debug("Sending %s to %r", event, sharded_destinations) - if destinations: - await self._send_pdu(event, destinations) + if sharded_destinations: + await self._send_pdu(event, sharded_destinations) now = self.clock.time_msec() ts = await self.store.get_received_ts(event.event_id) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4be08fe7cb..59b5d497be 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -411,10 +411,10 @@ class SyncHandler: set_tag(SynapseTags.SYNC_RESULT, bool(sync_result)) return sync_result - async def push_rules_for_user(self, user: UserID) -> JsonDict: + async def push_rules_for_user(self, user: UserID) -> Dict[str, Dict[str, list]]: user_id = user.to_string() - rules = await self.store.get_push_rules_for_user(user_id) - rules = format_push_rules_for_user(user, rules) + rules_raw = await self.store.get_push_rules_for_user(user_id) + rules = format_push_rules_for_user(user, rules_raw) return rules async def ephemeral_by_room( diff --git a/synapse/rest/client/push_rule.py b/synapse/rest/client/push_rule.py index b98640b14a..8191b4e32c 100644 --- a/synapse/rest/client/push_rule.py +++ b/synapse/rest/client/push_rule.py @@ -148,9 +148,9 @@ class PushRuleRestServlet(RestServlet): # we build up the full structure and then decide which bits of it # to send which means doing unnecessary work sometimes but is # is probably not going to make a whole lot of difference - rules = await self.store.get_push_rules_for_user(user_id) + rules_raw = await self.store.get_push_rules_for_user(user_id) - rules = format_push_rules_for_user(requester.user, rules) + rules = format_push_rules_for_user(requester.user, rules_raw) path_parts = path.split("/")[1:] diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 54e41d5375..0219091c4e 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -239,13 +239,13 @@ class StateHandler: entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids) return await self.store.get_joined_users_from_state(room_id, entry) - async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + async def get_current_hosts_in_room(self, room_id: str) -> FrozenSet[str]: event_ids = await self.store.get_latest_event_ids_in_room(room_id) return await self.get_hosts_in_room_at_events(room_id, event_ids) async def get_hosts_in_room_at_events( self, room_id: str, event_ids: Collection[str] - ) -> Set[str]: + ) -> FrozenSet[str]: """Get the hosts that were in a room at the given event ids Args: diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 5895b89202..d545a1c002 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -26,11 +26,7 @@ from synapse.storage.database import ( from synapse.storage.databases.main.stats import UserSortOrder from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine from synapse.storage.types import Cursor -from synapse.storage.util.id_generators import ( - IdGenerator, - MultiWriterIdGenerator, - StreamIdGenerator, -) +from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.types import JsonDict, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -155,8 +151,6 @@ class DataStore( ], ) - self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") - self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id") self._group_updates_id_gen = StreamIdGenerator( db_conn, "local_group_updates", "stream_id" ) diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py index d03555a585..14294a0bb8 100644 --- a/synapse/storage/databases/main/metrics.py +++ b/synapse/storage/databases/main/metrics.py @@ -14,16 +14,19 @@ import calendar import logging import time -from typing import TYPE_CHECKING, Dict +from typing import TYPE_CHECKING, Dict, List, Tuple, cast from synapse.metrics import GaugeBucketCollector from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool, LoggingDatabaseConnection +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.databases.main.event_push_actions import ( EventPushActionsWorkerStore, ) -from synapse.storage.types import Cursor if TYPE_CHECKING: from synapse.server import HomeServer @@ -73,7 +76,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): @wrap_as_background_process("read_forward_extremities") async def _read_forward_extremities(self) -> None: - def fetch(txn): + def fetch(txn: LoggingTransaction) -> List[Tuple[int, int]]: txn.execute( """ SELECT t1.c, t2.c @@ -86,7 +89,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): ) t2 ON t1.room_id = t2.room_id """ ) - return txn.fetchall() + return cast(List[Tuple[int, int]], txn.fetchall()) res = await self.db_pool.runInteraction("read_forward_extremities", fetch) @@ -104,20 +107,20 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): call to this function, it will return None. """ - def _count_messages(txn): + def _count_messages(txn: LoggingTransaction) -> int: sql = """ SELECT COUNT(*) FROM events WHERE type = 'm.room.encrypted' AND stream_ordering > ? """ txn.execute(sql, (self.stream_ordering_day_ago,)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction("count_e2ee_messages", _count_messages) async def count_daily_sent_e2ee_messages(self) -> int: - def _count_messages(txn): + def _count_messages(txn: LoggingTransaction) -> int: # This is good enough as if you have silly characters in your own # hostname then that's your own fault. like_clause = "%:" + self.hs.hostname @@ -130,7 +133,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): """ txn.execute(sql, (like_clause, self.stream_ordering_day_ago)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction( @@ -138,14 +141,14 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): ) async def count_daily_active_e2ee_rooms(self) -> int: - def _count(txn): + def _count(txn: LoggingTransaction) -> int: sql = """ SELECT COUNT(DISTINCT room_id) FROM events WHERE type = 'm.room.encrypted' AND stream_ordering > ? """ txn.execute(sql, (self.stream_ordering_day_ago,)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction( @@ -160,20 +163,20 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): call to this function, it will return None. """ - def _count_messages(txn): + def _count_messages(txn: LoggingTransaction) -> int: sql = """ SELECT COUNT(*) FROM events WHERE type = 'm.room.message' AND stream_ordering > ? """ txn.execute(sql, (self.stream_ordering_day_ago,)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction("count_messages", _count_messages) async def count_daily_sent_messages(self) -> int: - def _count_messages(txn): + def _count_messages(txn: LoggingTransaction) -> int: # This is good enough as if you have silly characters in your own # hostname then that's your own fault. like_clause = "%:" + self.hs.hostname @@ -186,7 +189,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): """ txn.execute(sql, (like_clause, self.stream_ordering_day_ago)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction( @@ -194,14 +197,14 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): ) async def count_daily_active_rooms(self) -> int: - def _count(txn): + def _count(txn: LoggingTransaction) -> int: sql = """ SELECT COUNT(DISTINCT room_id) FROM events WHERE type = 'm.room.message' AND stream_ordering > ? """ txn.execute(sql, (self.stream_ordering_day_ago,)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction("count_daily_active_rooms", _count) @@ -227,7 +230,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): "count_monthly_users", self._count_users, thirty_days_ago ) - def _count_users(self, txn: Cursor, time_from: int) -> int: + def _count_users(self, txn: LoggingTransaction, time_from: int) -> int: """ Returns number of users seen in the past time_from period """ @@ -242,7 +245,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): # Mypy knows that fetchone() might return None if there are no rows. # We know better: "SELECT COUNT(...) FROM ..." without any GROUP BY always # returns exactly one row. - (count,) = txn.fetchone() # type: ignore[misc] + (count,) = cast(Tuple[int], txn.fetchone()) return count async def count_r30_users(self) -> Dict[str, int]: @@ -256,7 +259,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): A mapping of counts globally as well as broken out by platform. """ - def _count_r30_users(txn): + def _count_r30_users(txn: LoggingTransaction) -> Dict[str, int]: thirty_days_in_secs = 86400 * 30 now = int(self._clock.time()) thirty_days_ago_in_secs = now - thirty_days_in_secs @@ -321,7 +324,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) results["all"] = count return results @@ -348,7 +351,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): - "web" (any web application -- it's not possible to distinguish Element Web here) """ - def _count_r30v2_users(txn): + def _count_r30v2_users(txn: LoggingTransaction) -> Dict[str, int]: thirty_days_in_secs = 86400 * 30 now = int(self._clock.time()) sixty_days_ago_in_secs = now - 2 * thirty_days_in_secs @@ -445,11 +448,8 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): thirty_days_in_secs * 1000, ), ) - row = txn.fetchone() - if row is None: - results["all"] = 0 - else: - results["all"] = row[0] + (count,) = cast(Tuple[int], txn.fetchone()) + results["all"] = count return results @@ -471,7 +471,7 @@ class ServerMetricsStore(EventPushActionsWorkerStore, SQLBaseStore): Generates daily visit data for use in cohort/ retention analysis """ - def _generate_user_daily_visits(txn): + def _generate_user_daily_visits(txn: LoggingTransaction) -> None: logger.info("Calling _generate_user_daily_visits") today_start = self._get_start_of_day() a_day_in_milliseconds = 24 * 60 * 60 * 1000 diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 4ed913e248..0e2855fb44 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -14,14 +14,18 @@ # limitations under the License. import abc import logging -from typing import TYPE_CHECKING, Dict, List, Tuple, Union +from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Tuple, Union, cast from synapse.api.errors import StoreError from synapse.config.homeserver import ExperimentalConfig from synapse.push.baserules import list_with_base_rules from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.storage._base import SQLBaseStore, db_to_json -from synapse.storage.database import DatabasePool, LoggingDatabaseConnection +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.pusher import PusherWorkerStore @@ -30,9 +34,12 @@ from synapse.storage.databases.main.roommember import RoomMemberWorkerStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException from synapse.storage.util.id_generators import ( + AbstractStreamIdGenerator, AbstractStreamIdTracker, + IdGenerator, StreamIdGenerator, ) +from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -57,7 +64,11 @@ def _is_experimental_rule_enabled( return True -def _load_rules(rawrules, enabled_map, experimental_config: ExperimentalConfig): +def _load_rules( + rawrules: List[JsonDict], + enabled_map: Dict[str, bool], + experimental_config: ExperimentalConfig, +) -> List[JsonDict]: ruleslist = [] for rawrule in rawrules: rule = dict(rawrule) @@ -137,7 +148,7 @@ class PushRulesWorkerStore( ) @abc.abstractmethod - def get_max_push_rules_stream_id(self): + def get_max_push_rules_stream_id(self) -> int: """Get the position of the push rules stream. Returns: @@ -146,7 +157,7 @@ class PushRulesWorkerStore( raise NotImplementedError() @cached(max_entries=5000) - async def get_push_rules_for_user(self, user_id): + async def get_push_rules_for_user(self, user_id: str) -> List[JsonDict]: rows = await self.db_pool.simple_select_list( table="push_rules", keyvalues={"user_name": user_id}, @@ -168,7 +179,7 @@ class PushRulesWorkerStore( return _load_rules(rows, enabled_map, self.hs.config.experimental) @cached(max_entries=5000) - async def get_push_rules_enabled_for_user(self, user_id) -> Dict[str, bool]: + async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool]: results = await self.db_pool.simple_select_list( table="push_rules_enable", keyvalues={"user_name": user_id}, @@ -184,13 +195,13 @@ class PushRulesWorkerStore( return False else: - def have_push_rules_changed_txn(txn): + def have_push_rules_changed_txn(txn: LoggingTransaction) -> bool: sql = ( "SELECT COUNT(stream_id) FROM push_rules_stream" " WHERE user_id = ? AND ? < stream_id" ) txn.execute(sql, (user_id, last_id)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return bool(count) return await self.db_pool.runInteraction( @@ -202,11 +213,13 @@ class PushRulesWorkerStore( list_name="user_ids", num_args=1, ) - async def bulk_get_push_rules(self, user_ids): + async def bulk_get_push_rules( + self, user_ids: Collection[str] + ) -> Dict[str, List[JsonDict]]: if not user_ids: return {} - results = {user_id: [] for user_id in user_ids} + results: Dict[str, List[JsonDict]] = {user_id: [] for user_id in user_ids} rows = await self.db_pool.simple_select_many_batch( table="push_rules", @@ -250,7 +263,7 @@ class PushRulesWorkerStore( condition["pattern"] = new_room_id # Add the rule for the new room - await self.add_push_rule( + await self.add_push_rule( # type: ignore[attr-defined] user_id=user_id, rule_id=new_rule_id, priority_class=rule["priority_class"], @@ -286,11 +299,13 @@ class PushRulesWorkerStore( list_name="user_ids", num_args=1, ) - async def bulk_get_push_rules_enabled(self, user_ids): + async def bulk_get_push_rules_enabled( + self, user_ids: Collection[str] + ) -> Dict[str, Dict[str, bool]]: if not user_ids: return {} - results = {user_id: {} for user_id in user_ids} + results: Dict[str, Dict[str, bool]] = {user_id: {} for user_id in user_ids} rows = await self.db_pool.simple_select_many_batch( table="push_rules_enable", @@ -306,7 +321,7 @@ class PushRulesWorkerStore( async def get_all_push_rule_updates( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> Tuple[List[Tuple[int, tuple]], int, bool]: + ) -> Tuple[List[Tuple[int, Tuple[str]]], int, bool]: """Get updates for push_rules replication stream. Args: @@ -331,7 +346,9 @@ class PushRulesWorkerStore( if last_id == current_id: return [], current_id, False - def get_all_push_rule_updates_txn(txn): + def get_all_push_rule_updates_txn( + txn: LoggingTransaction, + ) -> Tuple[List[Tuple[int, Tuple[str]]], int, bool]: sql = """ SELECT stream_id, user_id FROM push_rules_stream @@ -340,7 +357,10 @@ class PushRulesWorkerStore( LIMIT ? """ txn.execute(sql, (last_id, current_id, limit)) - updates = [(stream_id, (user_id,)) for stream_id, user_id in txn] + updates = cast( + List[Tuple[int, Tuple[str]]], + [(stream_id, (user_id,)) for stream_id, user_id in txn], + ) limited = False upper_bound = current_id @@ -356,15 +376,30 @@ class PushRulesWorkerStore( class PushRuleStore(PushRulesWorkerStore): + # Because we have write access, this will be a StreamIdGenerator + # (see PushRulesWorkerStore.__init__) + _push_rules_stream_id_gen: AbstractStreamIdGenerator + + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") + self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id") + async def add_push_rule( self, - user_id, - rule_id, - priority_class, - conditions, - actions, - before=None, - after=None, + user_id: str, + rule_id: str, + priority_class: int, + conditions: List[Dict[str, str]], + actions: List[Union[JsonDict, str]], + before: Optional[str] = None, + after: Optional[str] = None, ) -> None: conditions_json = json_encoder.encode(conditions) actions_json = json_encoder.encode(actions) @@ -400,17 +435,17 @@ class PushRuleStore(PushRulesWorkerStore): def _add_push_rule_relative_txn( self, - txn, - stream_id, - event_stream_ordering, - user_id, - rule_id, - priority_class, - conditions_json, - actions_json, - before, - after, - ): + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + priority_class: int, + conditions_json: str, + actions_json: str, + before: str, + after: str, + ) -> None: # Lock the table since otherwise we'll have annoying races between the # SELECT here and the UPSERT below. self.database_engine.lock_table(txn, "push_rules") @@ -470,15 +505,15 @@ class PushRuleStore(PushRulesWorkerStore): def _add_push_rule_highest_priority_txn( self, - txn, - stream_id, - event_stream_ordering, - user_id, - rule_id, - priority_class, - conditions_json, - actions_json, - ): + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + priority_class: int, + conditions_json: str, + actions_json: str, + ) -> None: # Lock the table since otherwise we'll have annoying races between the # SELECT here and the UPSERT below. self.database_engine.lock_table(txn, "push_rules") @@ -510,17 +545,17 @@ class PushRuleStore(PushRulesWorkerStore): def _upsert_push_rule_txn( self, - txn, - stream_id, - event_stream_ordering, - user_id, - rule_id, - priority_class, - priority, - conditions_json, - actions_json, - update_stream=True, - ): + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + priority_class: int, + priority: int, + conditions_json: str, + actions_json: str, + update_stream: bool = True, + ) -> None: """Specialised version of simple_upsert_txn that picks a push_rule_id using the _push_rule_id_gen if it needs to insert the rule. It assumes that the "push_rules" table is locked""" @@ -600,7 +635,11 @@ class PushRuleStore(PushRulesWorkerStore): rule_id: The rule_id of the rule to be deleted """ - def delete_push_rule_txn(txn, stream_id, event_stream_ordering): + def delete_push_rule_txn( + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + ) -> None: # we don't use simple_delete_one_txn because that would fail if the # user did not have a push_rule_enable row. self.db_pool.simple_delete_txn( @@ -661,14 +700,14 @@ class PushRuleStore(PushRulesWorkerStore): def _set_push_rule_enabled_txn( self, - txn, - stream_id, - event_stream_ordering, - user_id, - rule_id, - enabled, - is_default_rule, - ): + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + enabled: bool, + is_default_rule: bool, + ) -> None: new_id = self._push_rules_enable_id_gen.get_next() if not is_default_rule: @@ -740,7 +779,11 @@ class PushRuleStore(PushRulesWorkerStore): """ actions_json = json_encoder.encode(actions) - def set_push_rule_actions_txn(txn, stream_id, event_stream_ordering): + def set_push_rule_actions_txn( + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + ) -> None: if is_default_rule: # Add a dummy rule to the rules table with the user specified # actions. @@ -794,8 +837,15 @@ class PushRuleStore(PushRulesWorkerStore): ) def _insert_push_rules_update_txn( - self, txn, stream_id, event_stream_ordering, user_id, rule_id, op, data=None - ): + self, + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + op: str, + data: Optional[JsonDict] = None, + ) -> None: values = { "stream_id": stream_id, "event_stream_ordering": event_stream_ordering, @@ -814,5 +864,5 @@ class PushRuleStore(PushRulesWorkerStore): self.push_rules_stream_cache.entity_has_changed, user_id, stream_id ) - def get_max_push_rules_stream_id(self): + def get_max_push_rules_stream_id(self) -> int: return self._push_rules_stream_id_gen.get_current_token() diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 48e83592e7..608d40dfa1 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -37,7 +37,12 @@ from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool, LoggingDatabaseConnection +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) +from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.engines import Sqlite3Engine from synapse.storage.roommember import ( @@ -46,7 +51,7 @@ from synapse.storage.roommember import ( ProfileInfo, RoomsForUser, ) -from synapse.types import PersistedEventPosition, get_domain_from_id +from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import _CacheContext, cached, cachedList @@ -115,7 +120,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) @wrap_as_background_process("_count_known_servers") - async def _count_known_servers(self): + async def _count_known_servers(self) -> int: """ Count the servers that this server knows about. @@ -123,7 +128,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): `synapse_federation_known_servers` LaterGauge to collect. """ - def _transact(txn): + def _transact(txn: LoggingTransaction) -> int: if isinstance(self.database_engine, Sqlite3Engine): query = """ SELECT COUNT(DISTINCT substr(out.user_id, pos+1)) @@ -150,7 +155,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): self._known_servers_count = max([count, 1]) return self._known_servers_count - def _check_safe_current_state_events_membership_updated_txn(self, txn): + def _check_safe_current_state_events_membership_updated_txn( + self, txn: LoggingTransaction + ) -> None: """Checks if it is safe to assume the new current_state_events membership column is up to date """ @@ -182,7 +189,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): "get_users_in_room", self.get_users_in_room_txn, room_id ) - def get_users_in_room_txn(self, txn, room_id: str) -> List[str]: + def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]: # If we can assume current_state_events.membership is up to date # then we can avoid a join, which is a Very Good Thing given how # frequently this function gets called. @@ -222,7 +229,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): A mapping from user ID to ProfileInfo. """ - def _get_users_in_room_with_profiles(txn) -> Dict[str, ProfileInfo]: + def _get_users_in_room_with_profiles( + txn: LoggingTransaction, + ) -> Dict[str, ProfileInfo]: sql = """ SELECT state_key, display_name, avatar_url FROM room_memberships as m INNER JOIN current_state_events as c @@ -250,7 +259,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): dict of membership states, pointing to a MemberSummary named tuple. """ - def _get_room_summary_txn(txn): + def _get_room_summary_txn( + txn: LoggingTransaction, + ) -> Dict[str, MemberSummary]: # first get counts. # We do this all in one transaction to keep the cache small. # FIXME: get rid of this when we have room_stats @@ -279,7 +290,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): """ txn.execute(sql, (room_id,)) - res = {} + res: Dict[str, MemberSummary] = {} for count, membership in txn: res.setdefault(membership, MemberSummary([], count)) @@ -400,7 +411,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): def _get_rooms_for_local_user_where_membership_is_txn( self, - txn, + txn: LoggingTransaction, user_id: str, membership_list: List[str], ) -> List[RoomsForUser]: @@ -488,7 +499,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) def _get_rooms_for_user_with_stream_ordering_txn( - self, txn, user_id: str + self, txn: LoggingTransaction, user_id: str ) -> FrozenSet[GetRoomsForUserWithStreamOrdering]: # We use `current_state_events` here and not `local_current_membership` # as a) this gets called with remote users and b) this only gets called @@ -542,7 +553,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) def _get_rooms_for_users_with_stream_ordering_txn( - self, txn, user_ids: Collection[str] + self, txn: LoggingTransaction, user_ids: Collection[str] ) -> Dict[str, FrozenSet[GetRoomsForUserWithStreamOrdering]]: clause, args = make_in_list_sql_clause( @@ -575,7 +586,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): txn.execute(sql, [Membership.JOIN] + args) - result = {user_id: set() for user_id in user_ids} + result: Dict[str, Set[GetRoomsForUserWithStreamOrdering]] = { + user_id: set() for user_id in user_ids + } for user_id, room_id, instance, stream_id in txn: result[user_id].add( GetRoomsForUserWithStreamOrdering( @@ -595,7 +608,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): if not user_ids: return set() - def _get_users_server_still_shares_room_with_txn(txn): + def _get_users_server_still_shares_room_with_txn( + txn: LoggingTransaction, + ) -> Set[str]: sql = """ SELECT state_key FROM current_state_events WHERE @@ -657,7 +672,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): async def get_joined_users_from_context( self, event: EventBase, context: EventContext ) -> Dict[str, ProfileInfo]: - state_group = context.state_group + state_group: Union[object, int] = context.state_group if not state_group: # If state_group is None it means it has yet to be assigned a # state group, i.e. we need to make sure that calls with a state_group @@ -666,14 +681,16 @@ class RoomMemberWorkerStore(EventsWorkerStore): state_group = object() current_state_ids = await context.get_current_state_ids() + assert current_state_ids is not None + assert state_group is not None return await self._get_joined_users_from_context( event.room_id, state_group, current_state_ids, event=event, context=context ) async def get_joined_users_from_state( - self, room_id, state_entry + self, room_id: str, state_entry: "_StateCacheEntry" ) -> Dict[str, ProfileInfo]: - state_group = state_entry.state_group + state_group: Union[object, int] = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a # state group, i.e. we need to make sure that calls with a state_group @@ -681,6 +698,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() + assert state_group is not None with Measure(self._clock, "get_joined_users_from_state"): return await self._get_joined_users_from_context( room_id, state_group, state_entry.state, context=state_entry @@ -689,12 +707,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(num_args=2, cache_context=True, iterable=True, max_entries=100000) async def _get_joined_users_from_context( self, - room_id, - state_group, - current_state_ids, - cache_context, - event=None, - context=None, + room_id: str, + state_group: Union[object, int], + current_state_ids: StateMap[str], + cache_context: _CacheContext, + event: Optional[EventBase] = None, + context: Optional[Union[EventContext, "_StateCacheEntry"]] = None, ) -> Dict[str, ProfileInfo]: # We don't use `state_group`, it's there so that we can cache based # on it. However, it's important that it's never None, since two current_states @@ -765,14 +783,18 @@ class RoomMemberWorkerStore(EventsWorkerStore): return users_in_room @cached(max_entries=10000) - def _get_joined_profile_from_event_id(self, event_id): + def _get_joined_profile_from_event_id( + self, event_id: str + ) -> Optional[Tuple[str, ProfileInfo]]: raise NotImplementedError() @cachedList( cached_method_name="_get_joined_profile_from_event_id", list_name="event_ids", ) - async def _get_joined_profiles_from_event_ids(self, event_ids: Iterable[str]): + async def _get_joined_profiles_from_event_ids( + self, event_ids: Iterable[str] + ) -> Dict[str, Optional[Tuple[str, ProfileInfo]]]: """For given set of member event_ids check if they point to a join event and if so return the associated user and profile info. @@ -780,8 +802,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): event_ids: The member event IDs to lookup Returns: - dict[str, Tuple[str, ProfileInfo]|None]: Map from event ID - to `user_id` and ProfileInfo (or None if not join event). + Map from event ID to `user_id` and ProfileInfo (or None if not join event). """ rows = await self.db_pool.simple_select_many_batch( @@ -847,8 +868,10 @@ class RoomMemberWorkerStore(EventsWorkerStore): return True - async def get_joined_hosts(self, room_id: str, state_entry): - state_group = state_entry.state_group + async def get_joined_hosts( + self, room_id: str, state_entry: "_StateCacheEntry" + ) -> FrozenSet[str]: + state_group: Union[object, int] = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a # state group, i.e. we need to make sure that calls with a state_group @@ -856,6 +879,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): # To do this we set the state_group to a new object as object() != object() state_group = object() + assert state_group is not None with Measure(self._clock, "get_joined_hosts"): return await self._get_joined_hosts( room_id, state_group, state_entry=state_entry @@ -863,7 +887,10 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(num_args=2, max_entries=10000, iterable=True) async def _get_joined_hosts( - self, room_id: str, state_group: int, state_entry: "_StateCacheEntry" + self, + room_id: str, + state_group: Union[object, int], + state_entry: "_StateCacheEntry", ) -> FrozenSet[str]: # We don't use `state_group`, it's there so that we can cache based on # it. However, its important that its never None, since two @@ -881,7 +908,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): # `get_joined_hosts` is called with the "current" state group for the # room, and so consecutive calls will be for consecutive state groups # which point to the previous state group. - cache = await self._get_joined_hosts_cache(room_id) + cache = await self._get_joined_hosts_cache(room_id) # type: ignore[misc] # If the state group in the cache matches, we already have the data we need. if state_entry.state_group == cache.state_group: @@ -897,6 +924,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): elif state_entry.prev_group == cache.state_group: # The cached work is for the previous state group, so we work out # the delta. + assert state_entry.delta_ids is not None for (typ, state_key), event_id in state_entry.delta_ids.items(): if typ != EventTypes.Member: continue @@ -942,7 +970,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): Returns False if they have since re-joined.""" - def f(txn): + def f(txn: LoggingTransaction) -> int: sql = ( "SELECT" " COUNT(*)" @@ -973,7 +1001,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): The forgotten rooms. """ - def _get_forgotten_rooms_for_user_txn(txn): + def _get_forgotten_rooms_for_user_txn(txn: LoggingTransaction) -> Set[str]: # This is a slightly convoluted query that first looks up all rooms # that the user has forgotten in the past, then rechecks that list # to see if any have subsequently been updated. This is done so that @@ -1076,7 +1104,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): clause, ) - def _is_local_host_in_room_ignoring_users_txn(txn): + def _is_local_host_in_room_ignoring_users_txn( + txn: LoggingTransaction, + ) -> bool: txn.execute(sql, (room_id, Membership.JOIN, *args)) return bool(txn.fetchone()) @@ -1110,15 +1140,17 @@ class RoomMemberBackgroundUpdateStore(SQLBaseStore): where_clause="forgotten = 1", ) - async def _background_add_membership_profile(self, progress, batch_size): + async def _background_add_membership_profile( + self, progress: JsonDict, batch_size: int + ) -> int: target_min_stream_id = progress.get( - "target_min_stream_id_inclusive", self._min_stream_order_on_start + "target_min_stream_id_inclusive", self._min_stream_order_on_start # type: ignore[attr-defined] ) max_stream_id = progress.get( - "max_stream_id_exclusive", self._stream_order_on_start + 1 + "max_stream_id_exclusive", self._stream_order_on_start + 1 # type: ignore[attr-defined] ) - def add_membership_profile_txn(txn): + def add_membership_profile_txn(txn: LoggingTransaction) -> int: sql = """ SELECT stream_ordering, event_id, events.room_id, event_json.json FROM events @@ -1182,13 +1214,17 @@ class RoomMemberBackgroundUpdateStore(SQLBaseStore): return result - async def _background_current_state_membership(self, progress, batch_size): + async def _background_current_state_membership( + self, progress: JsonDict, batch_size: int + ) -> int: """Update the new membership column on current_state_events. This works by iterating over all rooms in alphebetical order. """ - def _background_current_state_membership_txn(txn, last_processed_room): + def _background_current_state_membership_txn( + txn: LoggingTransaction, last_processed_room: str + ) -> Tuple[int, bool]: processed = 0 while processed < batch_size: txn.execute( @@ -1242,7 +1278,11 @@ class RoomMemberBackgroundUpdateStore(SQLBaseStore): return row_count -class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore): +class RoomMemberStore( + RoomMemberWorkerStore, + RoomMemberBackgroundUpdateStore, + CacheInvalidationWorkerStore, +): def __init__( self, database: DatabasePool, @@ -1254,7 +1294,7 @@ class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore): async def forget(self, user_id: str, room_id: str) -> None: """Indicate that user_id wishes to discard history for room_id.""" - def f(txn): + def f(txn: LoggingTransaction) -> None: sql = ( "UPDATE" " room_memberships" @@ -1288,5 +1328,5 @@ class _JoinedHostsCache: # equal to anything else). state_group: Union[object, int] = attr.Factory(object) - def __len__(self): + def __len__(self) -> int: return sum(len(v) for v in self.hosts_to_joined_users.values()) -- cgit 1.5.1 From 5331fb5b478789a3ffaaeddb58f8d1cefd42a9eb Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 17 May 2022 17:06:45 +0100 Subject: allow `on_invalidate=None` in `@cached` methods (#12769) --- changelog.d/12769.misc | 1 + scripts-dev/mypy_synapse_plugin.py | 25 ++++++++++++++++--------- synapse/storage/databases/main/roommember.py | 3 ++- 3 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 changelog.d/12769.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12769.misc b/changelog.d/12769.misc new file mode 100644 index 0000000000..27bd53abe3 --- /dev/null +++ b/changelog.d/12769.misc @@ -0,0 +1 @@ +Tweak the mypy plugin so that `@cached` can accept `on_invalidate=None`. diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py index c775865212..d08517a953 100644 --- a/scripts-dev/mypy_synapse_plugin.py +++ b/scripts-dev/mypy_synapse_plugin.py @@ -21,7 +21,7 @@ from typing import Callable, Optional, Type from mypy.nodes import ARG_NAMED_OPT from mypy.plugin import MethodSigContext, Plugin from mypy.typeops import bind_self -from mypy.types import CallableType, NoneType +from mypy.types import CallableType, NoneType, UnionType class SynapsePlugin(Plugin): @@ -72,13 +72,20 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: # Third, we add an optional "on_invalidate" argument. # - # This is a callable which accepts no input and returns nothing. - calltyp = CallableType( - arg_types=[], - arg_kinds=[], - arg_names=[], - ret_type=NoneType(), - fallback=ctx.api.named_generic_type("builtins.function", []), + # This is a either + # - a callable which accepts no input and returns nothing, or + # - None. + calltyp = UnionType( + [ + NoneType(), + CallableType( + arg_types=[], + arg_kinds=[], + arg_names=[], + ret_type=NoneType(), + fallback=ctx.api.named_generic_type("builtins.function", []), + ), + ] ) arg_types.append(calltyp) @@ -95,7 +102,7 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: def plugin(version: str) -> Type[SynapsePlugin]: - # This is the entry point of the plugin, and let's us deal with the fact + # This is the entry point of the plugin, and lets us deal with the fact # that the mypy plugin interface is *not* stable by looking at the version # string. # diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 608d40dfa1..cc528fcf2d 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -15,6 +15,7 @@ import logging from typing import ( TYPE_CHECKING, + Callable, Collection, Dict, FrozenSet, @@ -634,7 +635,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) async def get_rooms_for_user( - self, user_id: str, on_invalidate=None + self, user_id: str, on_invalidate: Optional[Callable[[], None]] = None ) -> FrozenSet[str]: """Returns a set of room_ids the user is currently joined to. -- cgit 1.5.1 From 182ca78a12c4ae0f37726d43d5e592d669d99ee1 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Tue, 17 May 2022 19:01:06 +0200 Subject: Delete events from federation_inbound_events_staging table on purge (#12770) --- changelog.d/12770.bugfix | 1 + synapse/storage/databases/main/purge_events.py | 1 + tests/rest/admin/test_room.py | 1 + 3 files changed, 3 insertions(+) create mode 100644 changelog.d/12770.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/12770.bugfix b/changelog.d/12770.bugfix new file mode 100644 index 0000000000..a958f9a16b --- /dev/null +++ b/changelog.d/12770.bugfix @@ -0,0 +1 @@ +Delete events from the `federation_inbound_events_staging` table when a room is purged through the admin API. diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 38ba91af4c..c94d5f9f81 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -417,6 +417,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): "room_account_data", "room_tags", "local_current_membership", + "federation_inbound_events_staging", ): logger.info("[purge] removing %s from %s", room_id, table) txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,)) diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 95282f078e..608d3f2dc3 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -2489,4 +2489,5 @@ PURGE_TABLES = [ "room_tags", # "state_groups", # Current impl leaves orphaned state groups around. "state_groups_state", + "federation_inbound_events_staging", ] -- cgit 1.5.1 From 37935b5183ab3cbee2f80359d80b1ff2176428f0 Mon Sep 17 00:00:00 2001 From: Adam <65660516+ajr0d@users.noreply.github.com> Date: Wed, 18 May 2022 10:37:48 +0100 Subject: Move methods that call add_push_rule to PushRuleStore (#12772) Signed-off-by: Adam Roddick --- changelog.d/12772.misc | 1 + synapse/storage/databases/main/push_rule.py | 102 ++++++++++++++-------------- 2 files changed, 52 insertions(+), 51 deletions(-) create mode 100644 changelog.d/12772.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12772.misc b/changelog.d/12772.misc new file mode 100644 index 0000000000..da66f376fe --- /dev/null +++ b/changelog.d/12772.misc @@ -0,0 +1 @@ +Move methods that call `add_push_rule` to the `PushRuleStore` class. diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 0e2855fb44..ad67901cc1 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -243,57 +243,6 @@ class PushRulesWorkerStore( return results - async def copy_push_rule_from_room_to_room( - self, new_room_id: str, user_id: str, rule: dict - ) -> None: - """Copy a single push rule from one room to another for a specific user. - - Args: - new_room_id: ID of the new room. - user_id : ID of user the push rule belongs to. - rule: A push rule. - """ - # Create new rule id - rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1]) - new_rule_id = rule_id_scope + "/" + new_room_id - - # Change room id in each condition - for condition in rule.get("conditions", []): - if condition.get("key") == "room_id": - condition["pattern"] = new_room_id - - # Add the rule for the new room - await self.add_push_rule( # type: ignore[attr-defined] - user_id=user_id, - rule_id=new_rule_id, - priority_class=rule["priority_class"], - conditions=rule["conditions"], - actions=rule["actions"], - ) - - async def copy_push_rules_from_room_to_room_for_user( - self, old_room_id: str, new_room_id: str, user_id: str - ) -> None: - """Copy all of the push rules from one room to another for a specific - user. - - Args: - old_room_id: ID of the old room. - new_room_id: ID of the new room. - user_id: ID of user to copy push rules for. - """ - # Retrieve push rules for this user - user_push_rules = await self.get_push_rules_for_user(user_id) - - # Get rules relating to the old room and copy them to the new room - for rule in user_push_rules: - conditions = rule.get("conditions", []) - if any( - (c.get("key") == "room_id" and c.get("pattern") == old_room_id) - for c in conditions - ): - await self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule) - @cachedList( cached_method_name="get_push_rules_enabled_for_user", list_name="user_ids", @@ -866,3 +815,54 @@ class PushRuleStore(PushRulesWorkerStore): def get_max_push_rules_stream_id(self) -> int: return self._push_rules_stream_id_gen.get_current_token() + + async def copy_push_rule_from_room_to_room( + self, new_room_id: str, user_id: str, rule: dict + ) -> None: + """Copy a single push rule from one room to another for a specific user. + + Args: + new_room_id: ID of the new room. + user_id : ID of user the push rule belongs to. + rule: A push rule. + """ + # Create new rule id + rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1]) + new_rule_id = rule_id_scope + "/" + new_room_id + + # Change room id in each condition + for condition in rule.get("conditions", []): + if condition.get("key") == "room_id": + condition["pattern"] = new_room_id + + # Add the rule for the new room + await self.add_push_rule( + user_id=user_id, + rule_id=new_rule_id, + priority_class=rule["priority_class"], + conditions=rule["conditions"], + actions=rule["actions"], + ) + + async def copy_push_rules_from_room_to_room_for_user( + self, old_room_id: str, new_room_id: str, user_id: str + ) -> None: + """Copy all of the push rules from one room to another for a specific + user. + + Args: + old_room_id: ID of the old room. + new_room_id: ID of the new room. + user_id: ID of user to copy push rules for. + """ + # Retrieve push rules for this user + user_push_rules = await self.get_push_rules_for_user(user_id) + + # Get rules relating to the old room and copy them to the new room + for rule in user_push_rules: + conditions = rule.get("conditions", []) + if any( + (c.get("key") == "room_id" and c.get("pattern") == old_room_id) + for c in conditions + ): + await self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule) -- cgit 1.5.1 From d4713d3e335b21d12284ddd8ebd00e38abcfd521 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 18 May 2022 11:28:14 +0100 Subject: Discard null-containing strings before updating the user directory (#12762) --- changelog.d/12762.misc | 1 + synapse/rest/client/room.py | 4 ++-- synapse/storage/databases/main/events.py | 4 +--- synapse/storage/databases/main/user_directory.py | 9 ++++---- synapse/util/stringutils.py | 10 ++++++++- tests/handlers/test_user_directory.py | 28 ++++++++++++++++++++++++ 6 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 changelog.d/12762.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12762.misc b/changelog.d/12762.misc new file mode 100644 index 0000000000..990fb6fe74 --- /dev/null +++ b/changelog.d/12762.misc @@ -0,0 +1 @@ +Fix a long-standing bug where the user directory background process would fail to make forward progress if a user included a null codepoint in their display name or avatar. diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 4b8bfbffcb..5a2361a2e6 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -109,10 +109,10 @@ class RoomStateEventRestServlet(TransactionRestServlet): self.auth = hs.get_auth() def register(self, http_server: HttpServer) -> None: - # /room/$roomid/state/$eventtype + # /rooms/$roomid/state/$eventtype no_state_key = "/rooms/(?P[^/]*)/state/(?P[^/]*)$" - # /room/$roomid/state/$eventtype/$statekey + # /rooms/$roomid/state/$eventtype/$statekey state_key = ( "/rooms/(?P[^/]*)/state/" "(?P[^/]*)/(?P[^/]*)$" diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 42d484dc98..0df8ff5395 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -52,6 +52,7 @@ from synapse.storage.util.sequence import SequenceGenerator from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically +from synapse.util.stringutils import non_null_str_or_none if TYPE_CHECKING: from synapse.server import HomeServer @@ -1728,9 +1729,6 @@ class PersistEventsStore: not affect the current local state. """ - def non_null_str_or_none(val: Any) -> Optional[str]: - return val if isinstance(val, str) and "\u0000" not in val else None - self.db_pool.simple_insert_many_txn( txn, table="room_memberships", diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index df772d4721..028db69af3 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -29,6 +29,7 @@ from typing import ( from typing_extensions import TypedDict from synapse.api.errors import StoreError +from synapse.util.stringutils import non_null_str_or_none if TYPE_CHECKING: from synapse.server import HomeServer @@ -469,11 +470,9 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): """ Update or add a user's profile in the user directory. """ - # If the display name or avatar URL are unexpected types, overwrite them. - if not isinstance(display_name, str): - display_name = None - if not isinstance(avatar_url, str): - avatar_url = None + # If the display name or avatar URL are unexpected types, replace with None. + display_name = non_null_str_or_none(display_name) + avatar_url = non_null_str_or_none(avatar_url) def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None: self.db_pool.simple_upsert_txn( diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index b26546aecd..27a363d7e5 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -16,7 +16,7 @@ import itertools import re import secrets import string -from typing import Iterable, Optional, Tuple +from typing import Any, Iterable, Optional, Tuple from netaddr import valid_ipv6 @@ -247,3 +247,11 @@ def base62_encode(num: int, minwidth: int = 1) -> str: # pad to minimum width pad = "0" * (minwidth - len(res)) return pad + res + + +def non_null_str_or_none(val: Any) -> Optional[str]: + """Check that the arg is a string containing no null (U+0000) codepoints. + + If so, returns the given string unmodified; otherwise, returns None. + """ + return val if isinstance(val, str) and "\u0000" not in val else None diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 96e2e3039b..4d658d29ca 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -1007,6 +1007,34 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual(in_public, {(bob, room1), (bob, room2)}) self.assertEqual(in_private, set()) + def test_ignore_display_names_with_null_codepoints(self) -> None: + MXC_DUMMY = "mxc://dummy" + + # Alice creates a public room. + alice = self.register_user("alice", "pass") + + # Alice has a user directory entry to start with. + self.assertIn( + alice, + self.get_success(self.user_dir_helper.get_profiles_in_user_directory()), + ) + + # Alice changes her name to include a null codepoint. + self.get_success( + self.hs.get_user_directory_handler().handle_local_profile_change( + alice, + ProfileInfo( + display_name="abcd\u0000efgh", + avatar_url=MXC_DUMMY, + ), + ) + ) + # Alice's profile should be updated with the new avatar, but no display name. + self.assertEqual( + self.get_success(self.user_dir_helper.get_profiles_in_user_directory()), + {alice: ProfileInfo(display_name=None, avatar_url=MXC_DUMMY)}, + ) + class TestUserDirSearchDisabled(unittest.HomeserverTestCase): servlets = [ -- cgit 1.5.1 From 50ae4eafe1f8ba31f1977e5dc11c85f15722f1ee Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 18 May 2022 17:02:10 +0200 Subject: Add some type hints to `event_federation` datastore (#12753) Co-authored-by: David Robertson --- changelog.d/12753.misc | 1 + mypy.ini | 1 - synapse/handlers/room_batch.py | 2 + synapse/storage/databases/main/event_federation.py | 187 ++++++++++++++------- tests/handlers/test_federation.py | 1 + 5 files changed, 127 insertions(+), 65 deletions(-) create mode 100644 changelog.d/12753.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12753.misc b/changelog.d/12753.misc new file mode 100644 index 0000000000..e793d08e5e --- /dev/null +++ b/changelog.d/12753.misc @@ -0,0 +1 @@ +Add some type hints to datastore. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 45668974b3..4fa020b876 100644 --- a/mypy.ini +++ b/mypy.ini @@ -27,7 +27,6 @@ exclude = (?x) |synapse/storage/databases/__init__.py |synapse/storage/databases/main/cache.py |synapse/storage/databases/main/devices.py - |synapse/storage/databases/main/event_federation.py |synapse/storage/schema/ |tests/api/test_auth.py diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 29de7e5bed..fbfd748406 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -53,6 +53,7 @@ class RoomBatchHandler: # We want to use the successor event depth so they appear after `prev_event` because # it has a larger `depth` but before the successor event because the `stream_ordering` # is negative before the successor event. + assert most_recent_prev_event_id is not None successor_event_ids = await self.store.get_successor_events( most_recent_prev_event_id ) @@ -139,6 +140,7 @@ class RoomBatchHandler: _, ) = await self.store.get_max_depth_of(event_ids) # mapping from (type, state_key) -> state_event_id + assert most_recent_event_id is not None prev_state_map = await self.state_store.get_state_ids_for_event( most_recent_event_id ) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 4710224708..dcfe8caf47 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,7 +14,17 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, + cast, +) import attr from prometheus_client import Counter, Gauge @@ -33,7 +43,7 @@ from synapse.storage.database import ( from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore from synapse.storage.engines import PostgresEngine -from synapse.storage.types import Cursor +from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.descriptors import cached from synapse.util.caches.lrucache import LruCache @@ -135,7 +145,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # Check if we have indexed the room so we can use the chain cover # algorithm. - room = await self.get_room(room_id) + room = await self.get_room(room_id) # type: ignore[attr-defined] if room["has_auth_chain_index"]: try: return await self.db_pool.runInteraction( @@ -158,7 +168,11 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) def _get_auth_chain_ids_using_cover_index_txn( - self, txn: Cursor, room_id: str, event_ids: Collection[str], include_given: bool + self, + txn: LoggingTransaction, + room_id: str, + event_ids: Collection[str], + include_given: bool, ) -> Set[str]: """Calculates the auth chain IDs using the chain index.""" @@ -215,9 +229,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas chains: Dict[int, int] = {} # Add all linked chains reachable from initial set of chains. - for batch in batch_iter(event_chains, 1000): + for batch2 in batch_iter(event_chains, 1000): clause, args = make_in_list_sql_clause( - txn.database_engine, "origin_chain_id", batch + txn.database_engine, "origin_chain_id", batch2 ) txn.execute(sql % (clause,), args) @@ -297,7 +311,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas front = set(event_ids) while front: - new_front = set() + new_front: Set[str] = set() for chunk in batch_iter(front, 100): # Pull the auth events either from the cache or DB. to_fetch: List[str] = [] # Event IDs to fetch from DB @@ -316,7 +330,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # Note we need to batch up the results by event ID before # adding to the cache. - to_cache = {} + to_cache: Dict[str, List[Tuple[str, int]]] = {} for event_id, auth_event_id, auth_event_depth in txn: to_cache.setdefault(event_id, []).append( (auth_event_id, auth_event_depth) @@ -349,7 +363,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # Check if we have indexed the room so we can use the chain cover # algorithm. - room = await self.get_room(room_id) + room = await self.get_room(room_id) # type: ignore[attr-defined] if room["has_auth_chain_index"]: try: return await self.db_pool.runInteraction( @@ -370,7 +384,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) def _get_auth_chain_difference_using_cover_index_txn( - self, txn: Cursor, room_id: str, state_sets: List[Set[str]] + self, txn: LoggingTransaction, room_id: str, state_sets: List[Set[str]] ) -> Set[str]: """Calculates the auth chain difference using the chain index. @@ -444,9 +458,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # (We need to take a copy of `seen_chains` as we want to mutate it in # the loop) - for batch in batch_iter(set(seen_chains), 1000): + for batch2 in batch_iter(set(seen_chains), 1000): clause, args = make_in_list_sql_clause( - txn.database_engine, "origin_chain_id", batch + txn.database_engine, "origin_chain_id", batch2 ) txn.execute(sql % (clause,), args) @@ -529,7 +543,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas return result def _get_auth_chain_difference_txn( - self, txn, state_sets: List[Set[str]] + self, txn: LoggingTransaction, state_sets: List[Set[str]] ) -> Set[str]: """Calculates the auth chain difference using a breadth first search. @@ -602,7 +616,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # I think building a temporary list with fetchall is more efficient than # just `search.extend(txn)`, but this is unconfirmed - search.extend(txn.fetchall()) + search.extend(cast(List[Tuple[int, str]], txn.fetchall())) # sort by depth search.sort() @@ -645,7 +659,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # We parse the results and add the to the `found` set and the # cache (note we need to batch up the results by event ID before # adding to the cache). - to_cache = {} + to_cache: Dict[str, List[Tuple[str, int]]] = {} for event_id, auth_event_id, auth_event_depth in txn: to_cache.setdefault(event_id, []).append( (auth_event_id, auth_event_depth) @@ -696,7 +710,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas return {eid for eid, n in event_to_missing_sets.items() if n} async def get_oldest_event_ids_with_depth_in_room( - self, room_id + self, room_id: str ) -> List[Tuple[str, int]]: """Gets the oldest events(backwards extremities) in the room along with the aproximate depth. @@ -713,7 +727,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas List of (event_id, depth) tuples """ - def get_oldest_event_ids_with_depth_in_room_txn(txn, room_id): + def get_oldest_event_ids_with_depth_in_room_txn( + txn: LoggingTransaction, room_id: str + ) -> List[Tuple[str, int]]: # Assemble a dictionary with event_id -> depth for the oldest events # we know of in the room. Backwards extremeties are the oldest # events we know of in the room but we only know of them because @@ -743,7 +759,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas txn.execute(sql, (room_id, False)) - return txn.fetchall() + return cast(List[Tuple[str, int]], txn.fetchall()) return await self.db_pool.runInteraction( "get_oldest_event_ids_with_depth_in_room", @@ -752,7 +768,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) async def get_insertion_event_backward_extremities_in_room( - self, room_id + self, room_id: str ) -> List[Tuple[str, int]]: """Get the insertion events we know about that we haven't backfilled yet. @@ -768,7 +784,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas List of (event_id, depth) tuples """ - def get_insertion_event_backward_extremities_in_room_txn(txn, room_id): + def get_insertion_event_backward_extremities_in_room_txn( + txn: LoggingTransaction, room_id: str + ) -> List[Tuple[str, int]]: sql = """ SELECT b.event_id, MAX(e.depth) FROM insertion_events as i /* We only want insertion events that are also marked as backwards extremities */ @@ -780,7 +798,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas """ txn.execute(sql, (room_id,)) - return txn.fetchall() + return cast(List[Tuple[str, int]], txn.fetchall()) return await self.db_pool.runInteraction( "get_insertion_event_backward_extremities_in_room", @@ -788,7 +806,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas room_id, ) - async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: + async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[Optional[str], int]: """Returns the event ID and depth for the event that has the max depth from a set of event IDs Args: @@ -817,7 +835,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas return max_depth_event_id, current_max_depth - async def get_min_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: + async def get_min_depth_of(self, event_ids: List[str]) -> Tuple[Optional[str], int]: """Returns the event ID and depth for the event that has the min depth from a set of event IDs Args: @@ -865,7 +883,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas "get_prev_events_for_room", self._get_prev_events_for_room_txn, room_id ) - def _get_prev_events_for_room_txn(self, txn, room_id: str): + def _get_prev_events_for_room_txn( + self, txn: LoggingTransaction, room_id: str + ) -> List[str]: # we just use the 10 newest events. Older events will become # prev_events of future events. @@ -896,7 +916,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas sorted by extremity count. """ - def _get_rooms_with_many_extremities_txn(txn): + def _get_rooms_with_many_extremities_txn(txn: LoggingTransaction) -> List[str]: where_clause = "1=1" if room_id_filter: where_clause = "room_id NOT IN (%s)" % ( @@ -937,7 +957,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas "get_min_depth", self._get_min_depth_interaction, room_id ) - def _get_min_depth_interaction(self, txn, room_id): + def _get_min_depth_interaction( + self, txn: LoggingTransaction, room_id: str + ) -> Optional[int]: min_depth = self.db_pool.simple_select_one_onecol_txn( txn, table="room_depth", @@ -966,22 +988,24 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas """ # We want to make the cache more effective, so we clamp to the last # change before the given ordering. - last_change = self._events_stream_cache.get_max_pos_of_last_change(room_id) + last_change = self._events_stream_cache.get_max_pos_of_last_change(room_id) # type: ignore[attr-defined] # We don't always have a full stream_to_exterm_id table, e.g. after # the upgrade that introduced it, so we make sure we never ask for a # stream_ordering from before a restart - last_change = max(self._stream_order_on_start, last_change) + last_change = max(self._stream_order_on_start, last_change) # type: ignore[attr-defined] # provided the last_change is recent enough, we now clamp the requested # stream_ordering to it. - if last_change > self.stream_ordering_month_ago: + if last_change > self.stream_ordering_month_ago: # type: ignore[attr-defined] stream_ordering = min(last_change, stream_ordering) return await self._get_forward_extremeties_for_room(room_id, stream_ordering) @cached(max_entries=5000, num_args=2) - async def _get_forward_extremeties_for_room(self, room_id, stream_ordering): + async def _get_forward_extremeties_for_room( + self, room_id: str, stream_ordering: int + ) -> List[str]: """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". @@ -989,7 +1013,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas stream_orderings from that point. """ - if stream_ordering <= self.stream_ordering_month_ago: + if stream_ordering <= self.stream_ordering_month_ago: # type: ignore[attr-defined] raise StoreError(400, "stream_ordering too old %s" % (stream_ordering,)) sql = """ @@ -1002,7 +1026,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas WHERE room_id = ? """ - def get_forward_extremeties_for_room_txn(txn): + def get_forward_extremeties_for_room_txn(txn: LoggingTransaction) -> List[str]: txn.execute(sql, (stream_ordering, room_id)) return [event_id for event_id, in txn] @@ -1104,8 +1128,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ] async def get_backfill_events( - self, room_id: str, seed_event_id_list: list, limit: int - ): + self, room_id: str, seed_event_id_list: List[str], limit: int + ) -> List[EventBase]: """Get a list of Events for a given topic that occurred before (and including) the events in seed_event_id_list. Return a list of max size `limit` @@ -1123,10 +1147,19 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) events = await self.get_events_as_list(event_ids) return sorted( - events, key=lambda e: (-e.depth, -e.internal_metadata.stream_ordering) + # type-ignore: mypy doesn't like negating the Optional[int] stream_ordering. + # But it's never None, because these events were previously persisted to the DB. + events, + key=lambda e: (-e.depth, -e.internal_metadata.stream_ordering), # type: ignore[operator] ) - def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): + def _get_backfill_events( + self, + txn: LoggingTransaction, + room_id: str, + seed_event_id_list: List[str], + limit: int, + ) -> Set[str]: """ We want to make sure that we do a breadth-first, "depth" ordered search. We also handle navigating historical branches of history connected by @@ -1139,7 +1172,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas limit, ) - event_id_results = set() + event_id_results: Set[str] = set() # In a PriorityQueue, the lowest valued entries are retrieved first. # We're using depth as the priority in the queue and tie-break based on @@ -1147,7 +1180,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # highest and newest-in-time message. We add events to the queue with a # negative depth so that we process the newest-in-time messages first # going backwards in time. stream_ordering follows the same pattern. - queue = PriorityQueue() + queue: "PriorityQueue[Tuple[int, int, str, str]]" = PriorityQueue() for seed_event_id in seed_event_id_list: event_lookup_result = self.db_pool.simple_select_one_txn( @@ -1253,7 +1286,13 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas return event_id_results - async def get_missing_events(self, room_id, earliest_events, latest_events, limit): + async def get_missing_events( + self, + room_id: str, + earliest_events: List[str], + latest_events: List[str], + limit: int, + ) -> List[EventBase]: ids = await self.db_pool.runInteraction( "get_missing_events", self._get_missing_events, @@ -1264,11 +1303,18 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) return await self.get_events_as_list(ids) - def _get_missing_events(self, txn, room_id, earliest_events, latest_events, limit): + def _get_missing_events( + self, + txn: LoggingTransaction, + room_id: str, + earliest_events: List[str], + latest_events: List[str], + limit: int, + ) -> List[str]: seen_events = set(earliest_events) front = set(latest_events) - seen_events - event_results = [] + event_results: List[str] = [] query = ( "SELECT prev_event_id FROM event_edges " @@ -1311,7 +1357,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas @wrap_as_background_process("delete_old_forward_extrem_cache") async def _delete_old_forward_extrem_cache(self) -> None: - def _delete_old_forward_extrem_cache_txn(txn): + def _delete_old_forward_extrem_cache_txn(txn: LoggingTransaction) -> None: # Delete entries older than a month, while making sure we don't delete # the only entries for a room. sql = """ @@ -1324,7 +1370,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) AND stream_ordering < ? """ txn.execute( - sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago) + sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago) # type: ignore[attr-defined] ) await self.db_pool.runInteraction( @@ -1382,7 +1428,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas """ if self.db_pool.engine.supports_returning: - def _remove_received_event_from_staging_txn(txn): + def _remove_received_event_from_staging_txn( + txn: LoggingTransaction, + ) -> Optional[int]: sql = """ DELETE FROM federation_inbound_events_staging WHERE origin = ? AND event_id = ? @@ -1390,21 +1438,24 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas """ txn.execute(sql, (origin, event_id)) - return txn.fetchone() + row = cast(Optional[Tuple[int]], txn.fetchone()) - row = await self.db_pool.runInteraction( + if row is None: + return None + + return row[0] + + return await self.db_pool.runInteraction( "remove_received_event_from_staging", _remove_received_event_from_staging_txn, db_autocommit=True, ) - if row is None: - return None - - return row[0] else: - def _remove_received_event_from_staging_txn(txn): + def _remove_received_event_from_staging_txn( + txn: LoggingTransaction, + ) -> Optional[int]: received_ts = self.db_pool.simple_select_one_onecol_txn( txn, table="federation_inbound_events_staging", @@ -1437,7 +1488,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) -> Optional[Tuple[str, str]]: """Get the next event ID in the staging area for the given room.""" - def _get_next_staged_event_id_for_room_txn(txn): + def _get_next_staged_event_id_for_room_txn( + txn: LoggingTransaction, + ) -> Optional[Tuple[str, str]]: sql = """ SELECT origin, event_id FROM federation_inbound_events_staging @@ -1448,7 +1501,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas txn.execute(sql, (room_id,)) - return txn.fetchone() + return cast(Optional[Tuple[str, str]], txn.fetchone()) return await self.db_pool.runInteraction( "get_next_staged_event_id_for_room", _get_next_staged_event_id_for_room_txn @@ -1461,7 +1514,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) -> Optional[Tuple[str, EventBase]]: """Get the next event in the staging area for the given room.""" - def _get_next_staged_event_for_room_txn(txn): + def _get_next_staged_event_for_room_txn( + txn: LoggingTransaction, + ) -> Optional[Tuple[str, str, str]]: sql = """ SELECT event_json, internal_metadata, origin FROM federation_inbound_events_staging @@ -1471,7 +1526,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas """ txn.execute(sql, (room_id,)) - return txn.fetchone() + return cast(Optional[Tuple[str, str, str]], txn.fetchone()) row = await self.db_pool.runInteraction( "get_next_staged_event_for_room", _get_next_staged_event_for_room_txn @@ -1599,18 +1654,20 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) @wrap_as_background_process("_get_stats_for_federation_staging") - async def _get_stats_for_federation_staging(self): + async def _get_stats_for_federation_staging(self) -> None: """Update the prometheus metrics for the inbound federation staging area.""" - def _get_stats_for_federation_staging_txn(txn): + def _get_stats_for_federation_staging_txn( + txn: LoggingTransaction, + ) -> Tuple[int, int]: txn.execute("SELECT count(*) FROM federation_inbound_events_staging") - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) txn.execute( "SELECT min(received_ts) FROM federation_inbound_events_staging" ) - (received_ts,) = txn.fetchone() + (received_ts,) = cast(Tuple[Optional[int]], txn.fetchone()) # If there is nothing in the staging area default it to 0. age = 0 @@ -1651,19 +1708,21 @@ class EventFederationStore(EventFederationWorkerStore): self.EVENT_AUTH_STATE_ONLY, self._background_delete_non_state_event_auth ) - async def clean_room_for_join(self, room_id): - return await self.db_pool.runInteraction( + async def clean_room_for_join(self, room_id: str) -> None: + await self.db_pool.runInteraction( "clean_room_for_join", self._clean_room_for_join_txn, room_id ) - def _clean_room_for_join_txn(self, txn, room_id): + def _clean_room_for_join_txn(self, txn: LoggingTransaction, room_id: str) -> None: query = "DELETE FROM event_forward_extremities WHERE room_id = ?" txn.execute(query, (room_id,)) txn.call_after(self.get_latest_event_ids_in_room.invalidate, (room_id,)) - async def _background_delete_non_state_event_auth(self, progress, batch_size): - def delete_event_auth(txn): + async def _background_delete_non_state_event_auth( + self, progress: JsonDict, batch_size: int + ) -> int: + def delete_event_auth(txn: LoggingTransaction) -> bool: target_min_stream_id = progress.get("target_min_stream_id_inclusive") max_stream_id = progress.get("max_stream_id_exclusive") diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 060ba5f517..e95dfdce20 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -332,6 +332,7 @@ class FederationTestCase(unittest.FederatingHomeserverTestCase): most_recent_prev_event_depth, ) = self.get_success(self.store.get_max_depth_of(prev_event_ids)) # mapping from (type, state_key) -> state_event_id + assert most_recent_prev_event_id is not None prev_state_map = self.get_success( self.state_store.get_state_ids_for_event(most_recent_prev_event_id) ) -- cgit 1.5.1 From 19d79b6ebe3070ad7352f24549fbafb9dee44b75 Mon Sep 17 00:00:00 2001 From: Shay Date: Wed, 18 May 2022 10:15:52 -0700 Subject: Refactor `resolve_state_groups_for_events` to not pull out full state when no state resolution happens. (#12775) --- changelog.d/12775.misc | 1 + synapse/state/__init__.py | 35 +++++++++++++++++--------------- synapse/storage/databases/state/store.py | 2 +- synapse/storage/state.py | 12 +++++------ tests/test_state.py | 13 ++++++++++++ 5 files changed, 40 insertions(+), 23 deletions(-) create mode 100644 changelog.d/12775.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/12775.misc b/changelog.d/12775.misc new file mode 100644 index 0000000000..eac326cde3 --- /dev/null +++ b/changelog.d/12775.misc @@ -0,0 +1 @@ +Refactor `resolve_state_groups_for_events` to not pull out full state when no state resolution happens. \ No newline at end of file diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 0219091c4e..4b4ed42cff 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -288,7 +288,6 @@ class StateHandler: # # first of all, figure out the state before the event # - if old_state: # if we're given the state before the event, then we use that state_ids_before_event: StateMap[str] = { @@ -419,33 +418,37 @@ class StateHandler: """ logger.debug("resolve_state_groups event_ids %s", event_ids) - # map from state group id to the state in that state group (where - # 'state' is a map from state key to event id) - # dict[int, dict[(str, str), str]] - state_groups_ids = await self.state_store.get_state_groups_ids( - room_id, event_ids - ) - - if len(state_groups_ids) == 0: - return _StateCacheEntry(state={}, state_group=None) - elif len(state_groups_ids) == 1: - name, state_list = list(state_groups_ids.items()).pop() + state_groups = await self.state_store.get_state_group_for_events(event_ids) - prev_group, delta_ids = await self.state_store.get_state_group_delta(name) + state_group_ids = state_groups.values() + # check if each event has same state group id, if so there's no state to resolve + state_group_ids_set = set(state_group_ids) + if len(state_group_ids_set) == 1: + (state_group_id,) = state_group_ids_set + state = await self.state_store.get_state_for_groups(state_group_ids_set) + prev_group, delta_ids = await self.state_store.get_state_group_delta( + state_group_id + ) return _StateCacheEntry( - state=state_list, - state_group=name, + state=state[state_group_id], + state_group=state_group_id, prev_group=prev_group, delta_ids=delta_ids, ) + elif len(state_group_ids_set) == 0: + return _StateCacheEntry(state={}, state_group=None) room_version = await self.store.get_room_version_id(room_id) + state_to_resolve = await self.state_store.get_state_for_groups( + state_group_ids_set + ) + result = await self._state_resolution_handler.resolve_state_groups( room_id, room_version, - state_groups_ids, + state_to_resolve, None, state_res_store=StateResolutionStore(self.store), ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 7614d76ac6..609a2b88bf 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -189,7 +189,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): group: int, state_filter: StateFilter, ) -> Tuple[MutableStateMap[str], bool]: - """Checks if group is in cache. See `_get_state_for_groups` + """Checks if group is in cache. See `get_state_for_groups` Args: cache: the state group cache to use diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d4a1bd4f9d..a6c60de504 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -586,7 +586,7 @@ class StateGroupStorage: if not event_ids: return {} - event_to_groups = await self._get_state_group_for_events(event_ids) + event_to_groups = await self.get_state_group_for_events(event_ids) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups(groups) @@ -602,7 +602,7 @@ class StateGroupStorage: Returns: Resolves to a map of (type, state_key) -> event_id """ - group_to_state = await self._get_state_for_groups((state_group,)) + group_to_state = await self.get_state_for_groups((state_group,)) return group_to_state[state_group] @@ -675,7 +675,7 @@ class StateGroupStorage: RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ - event_to_groups = await self._get_state_group_for_events(event_ids) + event_to_groups = await self.get_state_group_for_events(event_ids) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups( @@ -716,7 +716,7 @@ class StateGroupStorage: RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ - event_to_groups = await self._get_state_group_for_events(event_ids) + event_to_groups = await self.get_state_group_for_events(event_ids) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups( @@ -774,7 +774,7 @@ class StateGroupStorage: ) return state_map[event_id] - def _get_state_for_groups( + def get_state_for_groups( self, groups: Iterable[int], state_filter: Optional[StateFilter] = None ) -> Awaitable[Dict[int, MutableStateMap[str]]]: """Gets the state at each of a list of state groups, optionally @@ -792,7 +792,7 @@ class StateGroupStorage: groups, state_filter or StateFilter.all() ) - async def _get_state_group_for_events( + async def get_state_group_for_events( self, event_ids: Collection[str], await_full_state: bool = True, diff --git a/tests/test_state.py b/tests/test_state.py index 651ec1c7d4..74a8ce6096 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -129,6 +129,19 @@ class _DummyStore: async def get_room_version_id(self, room_id): return RoomVersions.V1.identifier + async def get_state_group_for_events(self, event_ids): + res = {} + for event in event_ids: + res[event] = self._event_to_state_group[event] + return res + + async def get_state_for_groups(self, groups): + res = {} + for group in groups: + state = self._group_to_state[group] + res[group] = state + return res + class DictObj(dict): def __init__(self, **kwargs): -- cgit 1.5.1 From 66a5f6c40018018cccffd79aded0850d13efe513 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 19 May 2022 14:16:49 +0100 Subject: Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. (#12687) --- changelog.d/12687.bugfix | 1 + docs/upgrade.md | 90 ++++++++++++++++++++++ synapse/storage/background_updates.py | 15 ++++ synapse/storage/databases/state/bg_updates.py | 16 ++++ .../state/delta/70/08_state_group_edges_unique.sql | 17 ++++ 5 files changed, 139 insertions(+) create mode 100644 changelog.d/12687.bugfix create mode 100644 synapse/storage/schema/state/delta/70/08_state_group_edges_unique.sql (limited to 'synapse/storage/databases') diff --git a/changelog.d/12687.bugfix b/changelog.d/12687.bugfix new file mode 100644 index 0000000000..196d976670 --- /dev/null +++ b/changelog.d/12687.bugfix @@ -0,0 +1 @@ +Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. \ No newline at end of file diff --git a/docs/upgrade.md b/docs/upgrade.md index fa4b3ef590..92ca31b2f8 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -89,6 +89,96 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.60.0 + +## Adding a new unique index to `state_group_edges` could fail if your database is corrupted + +This release of Synapse will add a unique index to the `state_group_edges` table, in order +to prevent accidentally introducing duplicate information (for example, because a database +backup was restored multiple times). + +Duplicate rows being present in this table could cause drastic performance problems; see +[issue 11779](https://github.com/matrix-org/synapse/issues/11779) for more details. + +If your Synapse database already has had duplicate rows introduced into this table, +this could fail, with either of these errors: + + +**On Postgres:** +``` +synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges +synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update +... +psycopg2.errors.UniqueViolation: could not create unique index "state_group_edges_unique_idx" +DETAIL: Key (state_group, prev_state_group)=(2, 1) is duplicated. +``` +(The numbers may be different.) + +**On SQLite:** +``` +synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges +synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update +... +sqlite3.IntegrityError: UNIQUE constraint failed: state_group_edges.state_group, state_group_edges.prev_state_group +``` + + +
+Expand this section for steps to resolve this problem + +### On Postgres + +Connect to your database with `psql`. + +```sql +BEGIN; +DELETE FROM state_group_edges WHERE (ctid, state_group, prev_state_group) IN ( + SELECT row_id, state_group, prev_state_group + FROM ( + SELECT + ctid AS row_id, + MIN(ctid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id, + state_group, + prev_state_group + FROM state_group_edges + ) AS t1 + WHERE row_id <> min_row_id +); +COMMIT; +``` + + +### On SQLite + +At the command-line, use `sqlite3 path/to/your-homeserver-database.db`: + +```sql +BEGIN; +DELETE FROM state_group_edges WHERE (rowid, state_group, prev_state_group) IN ( + SELECT row_id, state_group, prev_state_group + FROM ( + SELECT + rowid AS row_id, + MIN(rowid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id, + state_group, + prev_state_group + FROM state_group_edges + ) + WHERE row_id <> min_row_id +); +COMMIT; +``` + + +### For more details + +[This comment on issue 11779](https://github.com/matrix-org/synapse/issues/11779#issuecomment-1131545970) +has queries that can be used to check a database for this problem in advance. + +
+ + + # Upgrading to v1.59.0 ## Device name lookup over federation has been disabled by default diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 37f2d6c644..b1e5208c76 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -535,6 +535,7 @@ class BackgroundUpdater: where_clause: Optional[str] = None, unique: bool = False, psql_only: bool = False, + replaces_index: Optional[str] = None, ) -> None: """Helper for store classes to do a background index addition @@ -554,6 +555,8 @@ class BackgroundUpdater: unique: true to make a UNIQUE index psql_only: true to only create this index on psql databases (useful for virtual sqlite tables) + replaces_index: The name of an index that this index replaces. + The named index will be dropped upon completion of the new index. """ def create_index_psql(conn: Connection) -> None: @@ -585,6 +588,12 @@ class BackgroundUpdater: } logger.debug("[SQL] %s", sql) c.execute(sql) + + if replaces_index is not None: + # We drop the old index as the new index has now been created. + sql = f"DROP INDEX IF EXISTS {replaces_index}" + logger.debug("[SQL] %s", sql) + c.execute(sql) finally: conn.set_session(autocommit=False) # type: ignore @@ -613,6 +622,12 @@ class BackgroundUpdater: logger.debug("[SQL] %s", sql) c.execute(sql) + if replaces_index is not None: + # We drop the old index as the new index has now been created. + sql = f"DROP INDEX IF EXISTS {replaces_index}" + logger.debug("[SQL] %s", sql) + c.execute(sql) + if isinstance(self.db_pool.engine, engines.PostgresEngine): runner: Optional[Callable[[Connection], None]] = create_index_psql elif psql_only: diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 5de70f31d2..fa9eadaca7 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -195,6 +195,7 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" + STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME = "state_group_edges_unique_idx" def __init__( self, @@ -217,6 +218,21 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): columns=["room_id"], ) + # `state_group_edges` can cause severe performance issues if duplicate + # rows are introduced, which can accidentally be done by well-meaning + # server admins when trying to restore a database dump, etc. + # See https://github.com/matrix-org/synapse/issues/11779. + # Introduce a unique index to guard against that. + self.db_pool.updates.register_background_index_update( + self.STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME, + index_name="state_group_edges_unique_idx", + table="state_group_edges", + columns=["state_group", "prev_state_group"], + unique=True, + # The old index was on (state_group) and was not unique. + replaces_index="state_group_edges_idx", + ) + async def _background_deduplicate_state( self, progress: dict, batch_size: int ) -> int: diff --git a/synapse/storage/schema/state/delta/70/08_state_group_edges_unique.sql b/synapse/storage/schema/state/delta/70/08_state_group_edges_unique.sql new file mode 100644 index 0000000000..b8c0ee0fa0 --- /dev/null +++ b/synapse/storage/schema/state/delta/70/08_state_group_edges_unique.sql @@ -0,0 +1,17 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7008, 'state_group_edges_unique_idx', '{}'); -- cgit 1.5.1 From 4cc4229cd7a55d2556c798fecbb1c9660dc821c8 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 23 May 2022 19:18:23 +0200 Subject: Prevent expired events from being filtered out when retention is disabled (#12611) Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Co-authored-by: Patrick Cloke --- changelog.d/12611.bugfix | 1 + synapse/handlers/pagination.py | 2 +- synapse/storage/databases/main/room.py | 45 +++++++++++++++++++--------------- synapse/types.py | 6 +++++ synapse/visibility.py | 6 ++--- tests/rest/client/test_relations.py | 8 +++--- tests/rest/client/test_retention.py | 35 +++++++++++++++++++++++--- 7 files changed, 71 insertions(+), 32 deletions(-) create mode 100644 changelog.d/12611.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/12611.bugfix b/changelog.d/12611.bugfix new file mode 100644 index 0000000000..093c45a20b --- /dev/null +++ b/changelog.d/12611.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.7.0 that would prevent events from being sent to clients if there's a retention policy in the room when the support for retention policies is disabled. diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 6ae88add95..19a4407050 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -239,7 +239,7 @@ class PaginationHandler: # defined in the server's configuration, we can safely assume that's the # case and use it for this room. max_lifetime = ( - retention_policy["max_lifetime"] or self._retention_default_max_lifetime + retention_policy.max_lifetime or self._retention_default_max_lifetime ) # Cap the effective max_lifetime to be within the range allowed in the diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 87e9482c60..ded15b92ef 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -45,7 +45,7 @@ from synapse.storage.database import ( from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.types import Cursor from synapse.storage.util.id_generators import IdGenerator -from synapse.types import JsonDict, ThirdPartyInstanceID +from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID from synapse.util import json_encoder from synapse.util.caches.descriptors import cached from synapse.util.stringutils import MXC_REGEX @@ -699,7 +699,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): await self.db_pool.runInteraction("delete_ratelimit", delete_ratelimit_txn) @cached() - async def get_retention_policy_for_room(self, room_id: str) -> Dict[str, int]: + async def get_retention_policy_for_room(self, room_id: str) -> RetentionPolicy: """Get the retention policy for a given room. If no retention policy has been found for this room, returns a policy defined @@ -707,12 +707,20 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): the 'max_lifetime' if no default policy has been defined in the server's configuration). + If support for retention policies is disabled, a policy with a 'min_lifetime' and + 'max_lifetime' of None is returned. + Args: room_id: The ID of the room to get the retention policy of. Returns: A dict containing "min_lifetime" and "max_lifetime" for this room. """ + # If the room retention feature is disabled, return a policy with no minimum nor + # maximum. This prevents incorrectly filtering out events when sending to + # the client. + if not self.config.retention.retention_enabled: + return RetentionPolicy() def get_retention_policy_for_room_txn( txn: LoggingTransaction, @@ -736,10 +744,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): # If we don't know this room ID, ret will be None, in this case return the default # policy. if not ret: - return { - "min_lifetime": self.config.retention.retention_default_min_lifetime, - "max_lifetime": self.config.retention.retention_default_max_lifetime, - } + return RetentionPolicy( + min_lifetime=self.config.retention.retention_default_min_lifetime, + max_lifetime=self.config.retention.retention_default_max_lifetime, + ) min_lifetime = ret[0]["min_lifetime"] max_lifetime = ret[0]["max_lifetime"] @@ -754,10 +762,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): if max_lifetime is None: max_lifetime = self.config.retention.retention_default_max_lifetime - return { - "min_lifetime": min_lifetime, - "max_lifetime": max_lifetime, - } + return RetentionPolicy( + min_lifetime=min_lifetime, + max_lifetime=max_lifetime, + ) async def get_media_mxcs_in_room(self, room_id: str) -> Tuple[List[str], List[str]]: """Retrieves all the local and remote media MXC URIs in a given room @@ -994,7 +1002,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): async def get_rooms_for_retention_period_in_range( self, min_ms: Optional[int], max_ms: Optional[int], include_null: bool = False - ) -> Dict[str, Dict[str, Optional[int]]]: + ) -> Dict[str, RetentionPolicy]: """Retrieves all of the rooms within the given retention range. Optionally includes the rooms which don't have a retention policy. @@ -1016,7 +1024,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): def get_rooms_for_retention_period_in_range_txn( txn: LoggingTransaction, - ) -> Dict[str, Dict[str, Optional[int]]]: + ) -> Dict[str, RetentionPolicy]: range_conditions = [] args = [] @@ -1047,10 +1055,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): rooms_dict = {} for row in rows: - rooms_dict[row["room_id"]] = { - "min_lifetime": row["min_lifetime"], - "max_lifetime": row["max_lifetime"], - } + rooms_dict[row["room_id"]] = RetentionPolicy( + min_lifetime=row["min_lifetime"], + max_lifetime=row["max_lifetime"], + ) if include_null: # If required, do a second query that retrieves all of the rooms we know @@ -1065,10 +1073,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): # policy in its state), add it with a null policy. for row in rows: if row["room_id"] not in rooms_dict: - rooms_dict[row["room_id"]] = { - "min_lifetime": None, - "max_lifetime": None, - } + rooms_dict[row["room_id"]] = RetentionPolicy() return rooms_dict diff --git a/synapse/types.py b/synapse/types.py index bd8071d51d..6f7128ddd6 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -932,3 +932,9 @@ class UserProfile(TypedDict): user_id: str display_name: Optional[str] avatar_url: Optional[str] + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class RetentionPolicy: + min_lifetime: Optional[int] = None + max_lifetime: Optional[int] = None diff --git a/synapse/visibility.py b/synapse/visibility.py index de6d2ffc52..da4af02796 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -22,7 +22,7 @@ from synapse.events import EventBase from synapse.events.utils import prune_event from synapse.storage import Storage from synapse.storage.state import StateFilter -from synapse.types import StateMap, get_domain_from_id +from synapse.types import RetentionPolicy, StateMap, get_domain_from_id logger = logging.getLogger(__name__) @@ -94,7 +94,7 @@ async def filter_events_for_client( if filter_send_to_client: room_ids = {e.room_id for e in events} - retention_policies = {} + retention_policies: Dict[str, RetentionPolicy] = {} for room_id in room_ids: retention_policies[ @@ -137,7 +137,7 @@ async def filter_events_for_client( # events. if not event.is_state(): retention_policy = retention_policies[event.room_id] - max_lifetime = retention_policy.get("max_lifetime") + max_lifetime = retention_policy.max_lifetime if max_lifetime is not None: oldest_allowed_ts = storage.main.clock.time_msec() - max_lifetime diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 27dee8f697..bc9cc51b92 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -995,7 +995,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): bundled_aggregations, ) - self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 7) + self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 6) def test_annotation_to_annotation(self) -> None: """Any relation to an annotation should be ignored.""" @@ -1031,7 +1031,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): bundled_aggregations, ) - self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 7) + self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 6) def test_thread(self) -> None: """ @@ -1060,7 +1060,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): bundled_aggregations.get("latest_event"), ) - self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 10) + self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 9) def test_thread_with_bundled_aggregations_for_latest(self) -> None: """ @@ -1106,7 +1106,7 @@ class BundledAggregationsTestCase(BaseRelationsTestCase): bundled_aggregations["latest_event"].get("unsigned"), ) - self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 10) + self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 9) def test_nested_thread(self) -> None: """ diff --git a/tests/rest/client/test_retention.py b/tests/rest/client/test_retention.py index 7b8fe6d025..2cd7a9e6c5 100644 --- a/tests/rest/client/test_retention.py +++ b/tests/rest/client/test_retention.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict from unittest.mock import Mock from twisted.test.proto_helpers import MemoryReactor @@ -252,16 +253,24 @@ class RetentionNoDefaultPolicyTestCase(unittest.HomeserverTestCase): room.register_servlets, ] - def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() - config["retention"] = { + def default_config(self) -> Dict[str, Any]: + config = super().default_config() + + retention_config = { "enabled": True, } + # Update this config with what's in the default config so that + # override_config works as expected. + retention_config.update(config.get("retention", {})) + config["retention"] = retention_config + + return config + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: mock_federation_client = Mock(spec=["backfill"]) self.hs = self.setup_test_homeserver( - config=config, federation_client=mock_federation_client, ) return self.hs @@ -295,6 +304,24 @@ class RetentionNoDefaultPolicyTestCase(unittest.HomeserverTestCase): self._test_retention(room_id, expected_code_for_first_event=404) + @unittest.override_config({"retention": {"enabled": False}}) + def test_visibility_when_disabled(self) -> None: + """Retention policies should be ignored when the retention feature is disabled.""" + room_id = self.helper.create_room_as(self.user_id, tok=self.token) + + self.helper.send_state( + room_id=room_id, + event_type=EventTypes.Retention, + body={"max_lifetime": one_day_ms}, + tok=self.token, + ) + + resp = self.helper.send(room_id=room_id, body="test", tok=self.token) + + self.reactor.advance(one_day_ms * 2 / 1000) + + self.get_event(room_id, resp["event_id"]) + def _test_retention( self, room_id: str, expected_code_for_first_event: int = 200 ) -> None: -- cgit 1.5.1