From 145c006ef76ab3955fb8294203cb8e6e61372cd1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 15 Sep 2021 03:34:30 -0500 Subject: Verify `?chunk_id` actually corresponds to an insertion event that exists (MSC2716) (#10776) --- synapse/storage/databases/main/__init__.py | 2 ++ synapse/storage/databases/main/room_batch.py | 36 ++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 synapse/storage/databases/main/room_batch.py (limited to 'synapse/storage/databases') diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 1dc347f0c9..5c21402dea 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -61,6 +61,7 @@ from .registration import RegistrationStore from .rejections import RejectionsStore from .relations import RelationsStore from .room import RoomStore +from .room_batch import RoomBatchStore from .roommember import RoomMemberStore from .search import SearchStore from .session import SessionStore @@ -81,6 +82,7 @@ class DataStore( EventsBackgroundUpdatesStore, RoomMemberStore, RoomStore, + RoomBatchStore, RegistrationStore, StreamStore, ProfileStore, diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py new file mode 100644 index 0000000000..54fa361d3e --- /dev/null +++ b/synapse/storage/databases/main/room_batch.py @@ -0,0 +1,36 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from synapse.storage._base import SQLBaseStore + + +class RoomBatchStore(SQLBaseStore): + async def get_insertion_event_by_chunk_id(self, chunk_id: str) -> Optional[str]: + """Retrieve a insertion event ID. + + Args: + chunk_id: The chunk ID of the insertion event to retrieve. + + Returns: + The event_id of an insertion event, or None if there is no known + insertion event for the given insertion event. + """ + return await self.db_pool.simple_select_one_onecol( + table="insertion_events", + keyvalues={"next_chunk_id": chunk_id}, + retcol="event_id", + allow_none=True, + ) -- cgit 1.5.1 From 3eba047d388fd0d798229a0779f343dbda8a2887 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 15 Sep 2021 09:54:13 -0400 Subject: Add type hints to state database module. (#10823) --- changelog.d/10823.misc | 1 + mypy.ini | 1 + synapse/storage/databases/state/bg_updates.py | 60 ++++++++---- synapse/storage/databases/state/store.py | 136 ++++++++++++++++---------- synapse/storage/state.py | 3 +- synapse/util/caches/dictionary_cache.py | 4 +- 6 files changed, 133 insertions(+), 72 deletions(-) create mode 100644 changelog.d/10823.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/10823.misc b/changelog.d/10823.misc new file mode 100644 index 0000000000..0532969900 --- /dev/null +++ b/changelog.d/10823.misc @@ -0,0 +1 @@ +Add type hints to the state database. diff --git a/mypy.ini b/mypy.ini index e9052fa01b..b21e1555ab 100644 --- a/mypy.ini +++ b/mypy.ini @@ -60,6 +60,7 @@ files = synapse/storage/databases/main/session.py, synapse/storage/databases/main/stream.py, synapse/storage/databases/main/ui_auth.py, + synapse/storage/databases/state, synapse/storage/database.py, synapse/storage/engines, synapse/storage/keys.py, diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index c2891cb07f..eb1118d2cb 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -13,12 +13,20 @@ # limitations under the License. import logging -from typing import Optional +from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.engines import PostgresEngine from synapse.storage.state import StateFilter +from synapse.types import MutableStateMap, StateMap + +if TYPE_CHECKING: + from synapse.server import HomeServer logger = logging.getLogger(__name__) @@ -31,7 +39,9 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): updates. """ - def _count_state_group_hops_txn(self, txn, state_group): + def _count_state_group_hops_txn( + self, txn: LoggingTransaction, state_group: int + ) -> int: """Given a state group, count how many hops there are in the tree. This is used to ensure the delta chains don't get too long. @@ -56,7 +66,7 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): else: # We don't use WITH RECURSIVE on sqlite3 as there are distributions # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) - next_group = state_group + next_group: Optional[int] = state_group count = 0 while next_group: @@ -73,11 +83,14 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): return count def _get_state_groups_from_groups_txn( - self, txn, groups, state_filter: Optional[StateFilter] = None - ): + self, + txn: LoggingTransaction, + groups: List[int], + state_filter: Optional[StateFilter] = None, + ) -> Mapping[int, StateMap[str]]: state_filter = state_filter or StateFilter.all() - results = {group: {} for group in groups} + results: Dict[int, MutableStateMap[str]] = {group: {} for group in groups} where_clause, where_args = state_filter.make_sql_filter_clause() @@ -117,7 +130,7 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): """ for group in groups: - args = [group] + args: List[Union[int, str]] = [group] args.extend(where_args) txn.execute(sql % (where_clause,), args) @@ -131,7 +144,7 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): # We don't use WITH RECURSIVE on sqlite3 as there are distributions # that ship with an sqlite3 version that doesn't support it (e.g. wheezy) for group in groups: - next_group = group + next_group: Optional[int] = group while next_group: # We did this before by getting the list of group ids, and @@ -173,6 +186,7 @@ class StateGroupBackgroundUpdateStore(SQLBaseStore): allow_none=True, ) + # The results shouldn't be considered mutable. return results @@ -182,7 +196,12 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) self.db_pool.updates.register_background_update_handler( self.STATE_GROUP_DEDUPLICATION_UPDATE_NAME, @@ -198,7 +217,9 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): columns=["room_id"], ) - async def _background_deduplicate_state(self, progress, batch_size): + async def _background_deduplicate_state( + self, progress: dict, batch_size: int + ) -> int: """This background update will slowly deduplicate state by reencoding them as deltas. """ @@ -218,7 +239,7 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): ) max_group = rows[0][0] - def reindex_txn(txn): + def reindex_txn(txn: LoggingTransaction) -> Tuple[bool, int]: new_last_state_group = last_state_group for count in range(batch_size): txn.execute( @@ -251,7 +272,8 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): " WHERE id < ? AND room_id = ?", (state_group, room_id), ) - (prev_group,) = txn.fetchone() + # There will be a result due to the coalesce. + (prev_group,) = txn.fetchone() # type: ignore new_last_state_group = state_group if prev_group: @@ -261,15 +283,15 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): # otherwise read performance degrades. continue - prev_state = self._get_state_groups_from_groups_txn( + prev_state_by_group = self._get_state_groups_from_groups_txn( txn, [prev_group] ) - prev_state = prev_state[prev_group] + prev_state = prev_state_by_group[prev_group] - curr_state = self._get_state_groups_from_groups_txn( + curr_state_by_group = self._get_state_groups_from_groups_txn( txn, [state_group] ) - curr_state = curr_state[state_group] + curr_state = curr_state_by_group[state_group] if not set(prev_state.keys()) - set(curr_state.keys()): # We can only do a delta if the current has a strict super set @@ -340,8 +362,8 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): return result * BATCH_SIZE_SCALE_FACTOR - async def _background_index_state(self, progress, batch_size): - def reindex_txn(conn): + async def _background_index_state(self, progress: dict, batch_size: int) -> int: + def reindex_txn(conn: LoggingDatabaseConnection) -> None: conn.rollback() if isinstance(self.database_engine, PostgresEngine): # postgres insists on autocommit for the index diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index f839c0c24f..f1e3a27e63 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -13,43 +13,56 @@ # limitations under the License. import logging -from collections import namedtuple -from typing import Dict, Iterable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple + +import attr from synapse.api.constants import EventTypes from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.databases.state.bg_updates import StateBackgroundUpdateStore from synapse.storage.state import StateFilter from synapse.storage.types import Cursor from synapse.storage.util.sequence import build_sequence_generator -from synapse.types import MutableStateMap, StateMap +from synapse.types import MutableStateMap, StateKey, StateMap from synapse.util.caches.descriptors import cached from synapse.util.caches.dictionary_cache import DictionaryCache +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) MAX_STATE_DELTA_HOPS = 100 -class _GetStateGroupDelta( - namedtuple("_GetStateGroupDelta", ("prev_group", "delta_ids")) -): +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _GetStateGroupDelta: """Return type of get_state_group_delta that implements __len__, which lets - us use the itrable flag when caching + us use the iterable flag when caching """ - __slots__ = [] + prev_group: Optional[int] + delta_ids: Optional[StateMap[str]] - def __len__(self): + def __len__(self) -> int: return len(self.delta_ids) if self.delta_ids else 0 class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): """A data store for fetching/storing state groups.""" - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) # Originally the state store used a single DictionaryCache to cache the @@ -81,19 +94,21 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): # We size the non-members cache to be smaller than the members cache as the # vast majority of state in Matrix (today) is member events. - self._state_group_cache = DictionaryCache( + self._state_group_cache: DictionaryCache[int, StateKey, str] = DictionaryCache( "*stateGroupCache*", # TODO: this hasn't been tuned yet 50000, ) - self._state_group_members_cache = DictionaryCache( + self._state_group_members_cache: DictionaryCache[ + int, StateKey, str + ] = DictionaryCache( "*stateGroupMembersCache*", 500000, ) - def get_max_state_group_txn(txn: Cursor): + def get_max_state_group_txn(txn: Cursor) -> int: txn.execute("SELECT COALESCE(max(id), 0) FROM state_groups") - return txn.fetchone()[0] + return txn.fetchone()[0] # type: ignore self._state_group_seq_gen = build_sequence_generator( db_conn, @@ -105,15 +120,15 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): ) @cached(max_entries=10000, iterable=True) - async def get_state_group_delta(self, state_group): + async def get_state_group_delta(self, state_group: int) -> _GetStateGroupDelta: """Given a state group try to return a previous group and a delta between the old and the new. Returns: - (prev_group, delta_ids), where both may be None. + _GetStateGroupDelta containing prev_group and delta_ids, where both may be None. """ - def _get_state_group_delta_txn(txn): + def _get_state_group_delta_txn(txn: LoggingTransaction) -> _GetStateGroupDelta: prev_group = self.db_pool.simple_select_one_onecol_txn( txn, table="state_group_edges", @@ -154,7 +169,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): Returns: Dict of state group to state map. """ - results = {} + results: Dict[int, StateMap[str]] = {} chunks = [groups[i : i + 100] for i in range(0, len(groups), 100)] for chunk in chunks: @@ -168,19 +183,24 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): return results - def _get_state_for_group_using_cache(self, cache, group, state_filter): + def _get_state_for_group_using_cache( + self, + cache: DictionaryCache[int, StateKey, str], + group: int, + state_filter: StateFilter, + ) -> Tuple[MutableStateMap[str], bool]: """Checks if group is in cache. See `_get_state_for_groups` Args: - cache(DictionaryCache): the state group cache to use - group(int): The state group to lookup - state_filter (StateFilter): The state filter used to fetch state - from the database. + cache: the state group cache to use + group: The state group to lookup + state_filter: The state filter used to fetch state from the database. - Returns 2-tuple (`state_dict`, `got_all`). - `got_all` is a bool indicating if we successfully retrieved all - requests state from the cache, if False we need to query the DB for the - missing state. + Returns: + 2-tuple (`state_dict`, `got_all`). + `got_all` is a bool indicating if we successfully retrieved all + requests state from the cache, if False we need to query the DB for the + missing state. """ cache_entry = cache.get(group) state_dict_ids = cache_entry.value @@ -277,8 +297,11 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): return state def _get_state_for_groups_using_cache( - self, groups: Iterable[int], cache: DictionaryCache, state_filter: StateFilter - ) -> Tuple[Dict[int, StateMap[str]], Set[int]]: + self, + groups: Iterable[int], + cache: DictionaryCache[int, StateKey, str], + state_filter: StateFilter, + ) -> Tuple[Dict[int, MutableStateMap[str]], Set[int]]: """Gets the state at each of a list of state groups, optionally filtering by type/state_key, querying from a specific cache. @@ -310,21 +333,21 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): def _insert_into_cache( self, - group_to_state_dict, - state_filter, - cache_seq_num_members, - cache_seq_num_non_members, - ): + group_to_state_dict: Dict[int, StateMap[str]], + state_filter: StateFilter, + cache_seq_num_members: int, + cache_seq_num_non_members: int, + ) -> None: """Inserts results from querying the database into the relevant cache. Args: - group_to_state_dict (dict): The new entries pulled from database. + group_to_state_dict: The new entries pulled from database. Map from state group to state dict - state_filter (StateFilter): The state filter used to fetch state + state_filter: The state filter used to fetch state from the database. - cache_seq_num_members (int): Sequence number of member cache since + cache_seq_num_members: Sequence number of member cache since last lookup in cache - cache_seq_num_non_members (int): Sequence number of member cache since + cache_seq_num_non_members: Sequence number of member cache since last lookup in cache """ @@ -395,7 +418,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): The state group ID """ - def _store_state_group_txn(txn): + def _store_state_group_txn(txn: LoggingTransaction) -> int: if current_state_ids is None: # AFAIK, this can never happen raise Exception("current_state_ids cannot be None") @@ -426,6 +449,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): potential_hops = self._count_state_group_hops_txn(txn, prev_group) if prev_group and potential_hops < MAX_STATE_DELTA_HOPS: + assert delta_ids is not None + self.db_pool.simple_insert_txn( txn, table="state_group_edges", @@ -498,7 +523,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): ) async def purge_unreferenced_state_groups( - self, room_id: str, state_groups_to_delete + self, room_id: str, state_groups_to_delete: Collection[int] ) -> None: """Deletes no longer referenced state groups and de-deltas any state groups that reference them. @@ -506,8 +531,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): Args: room_id: The room the state groups belong to (must all be in the same room). - state_groups_to_delete (Collection[int]): Set of all state groups - to delete. + state_groups_to_delete: Set of all state groups to delete. """ await self.db_pool.runInteraction( @@ -517,7 +541,12 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): state_groups_to_delete, ) - def _purge_unreferenced_state_groups(self, txn, room_id, state_groups_to_delete): + def _purge_unreferenced_state_groups( + self, + txn: LoggingTransaction, + room_id: str, + state_groups_to_delete: Collection[int], + ) -> None: logger.info( "[purge] found %i state groups to delete", len(state_groups_to_delete) ) @@ -546,8 +575,8 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): # groups to non delta versions. for sg in remaining_state_groups: logger.info("[purge] de-delta-ing remaining state group %s", sg) - curr_state = self._get_state_groups_from_groups_txn(txn, [sg]) - curr_state = curr_state[sg] + curr_state_by_group = self._get_state_groups_from_groups_txn(txn, [sg]) + curr_state = curr_state_by_group[sg] self.db_pool.simple_delete_txn( txn, table="state_groups_state", keyvalues={"state_group": sg} @@ -605,12 +634,14 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): return {row["state_group"]: row["prev_state_group"] for row in rows} - async def purge_room_state(self, room_id, state_groups_to_delete): + async def purge_room_state( + self, room_id: str, state_groups_to_delete: Collection[int] + ) -> None: """Deletes all record of a room from state tables Args: - room_id (str): - state_groups_to_delete (list[int]): State groups to delete + room_id: + state_groups_to_delete: State groups to delete """ await self.db_pool.runInteraction( @@ -620,7 +651,12 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): state_groups_to_delete, ) - def _purge_room_state_txn(self, txn, room_id, state_groups_to_delete): + def _purge_room_state_txn( + self, + txn: LoggingTransaction, + room_id: str, + state_groups_to_delete: Collection[int], + ) -> None: # first we have to delete the state groups states logger.info("[purge] removing %s from state_groups_state", room_id) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c76529cb57..5e86befde4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -377,7 +377,8 @@ class StateGroupStorage: make up the delta between the old and new state groups. """ - return await self.stores.state.get_state_group_delta(state_group) + state_group_delta = await self.stores.state.get_state_group_delta(state_group) + return state_group_delta.prev_group, state_group_delta.delta_ids async def get_state_groups_ids( self, _room_id: str, event_ids: Iterable[str] diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index ade088aae2..485ddb1893 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -130,7 +130,7 @@ class DictionaryCache(Generic[KT, DKT, DV]): sequence: int, key: KT, value: Dict[DKT, DV], - fetched_keys: Optional[Set[DKT]] = None, + fetched_keys: Optional[Iterable[DKT]] = None, ) -> None: """Updates the entry in the cache @@ -155,7 +155,7 @@ class DictionaryCache(Generic[KT, DKT, DV]): self._update_or_insert(key, value, fetched_keys) def _update_or_insert( - self, key: KT, value: Dict[DKT, DV], known_absent: Set[DKT] + self, key: KT, value: Dict[DKT, DV], known_absent: Iterable[DKT] ) -> None: # We pop and reinsert as we need to tell the cache the size may have # changed -- cgit 1.5.1 From 437961744c6c8761e6483bb215e5e779123ffd97 Mon Sep 17 00:00:00 2001 From: reivilibre <38398653+reivilibre@users.noreply.github.com> Date: Mon, 20 Sep 2021 10:26:13 +0100 Subject: Fix remove_stale_pushers job on SQLite. (#10843) --- changelog.d/10843.bugfix | 1 + synapse/storage/database.py | 21 ++++++++++++--------- synapse/storage/databases/main/account_data.py | 2 +- synapse/storage/databases/main/events.py | 2 +- synapse/storage/databases/main/events_bg_updates.py | 4 ++-- synapse/storage/databases/main/pusher.py | 4 ++-- synapse/storage/databases/main/state.py | 4 ++-- synapse/storage/databases/main/ui_auth.py | 6 +++--- synapse/storage/databases/state/store.py | 6 +++--- 9 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 changelog.d/10843.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/10843.bugfix b/changelog.d/10843.bugfix new file mode 100644 index 0000000000..5027a1dbef --- /dev/null +++ b/changelog.d/10843.bugfix @@ -0,0 +1 @@ +Fix a bug causing the `remove_stale_pushers` background job to repeatedly fail and log errors. This bug affected Synapse servers that had been upgraded from version 1.28 or older and are using SQLite. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 0084d9f96c..f5a8f90a0f 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1632,7 +1632,7 @@ class DatabasePool: txn: LoggingTransaction, table: str, column: str, - iterable: Iterable[Any], + iterable: Collection[Any], keyvalues: Dict[str, Any], retcols: Iterable[str], ) -> List[Dict[str, Any]]: @@ -1891,29 +1891,32 @@ class DatabasePool: txn: LoggingTransaction, table: str, column: str, - iterable: Iterable[Any], + values: Collection[Any], keyvalues: Dict[str, Any], ) -> int: """Executes a DELETE query on the named table. - Filters rows by if value of `column` is in `iterable`. + Deletes the rows: + - whose value of `column` is in `values`; AND + - that match extra column-value pairs specified in `keyvalues`. Args: txn: Transaction object table: string giving the table name - column: column name to test for inclusion against `iterable` - iterable: list - keyvalues: dict of column names and values to select the rows with + column: column name to test for inclusion against `values` + values: values of `column` which choose rows to delete + keyvalues: dict of extra column names and values to select the rows + with. They will be ANDed together with the main predicate. Returns: Number rows deleted """ - if not iterable: + if not values: return 0 sql = "DELETE FROM %s" % table - clause, values = make_in_list_sql_clause(txn.database_engine, column, iterable) + clause, values = make_in_list_sql_clause(txn.database_engine, column, values) clauses = [clause] for key, value in keyvalues.items(): @@ -2098,7 +2101,7 @@ class DatabasePool: def make_in_list_sql_clause( - database_engine: BaseDatabaseEngine, column: str, iterable: Iterable + database_engine: BaseDatabaseEngine, column: str, iterable: Collection[Any] ) -> Tuple[str, list]: """Returns an SQL clause that checks the given column is in the iterable. diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 1d02795f43..d0cf3460da 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -494,7 +494,7 @@ class AccountDataWorkerStore(SQLBaseStore): txn, table="ignored_users", column="ignored_user_id", - iterable=previously_ignored_users - currently_ignored_users, + values=previously_ignored_users - currently_ignored_users, keyvalues={"ignorer_user_id": user_id}, ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 8e691678e5..dec7e8594e 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -667,7 +667,7 @@ class PersistEventsStore: table="event_auth_chain_to_calculate", keyvalues={}, column="event_id", - iterable=new_chain_tuples, + values=new_chain_tuples, ) # Now we need to calculate any new links between chains caused by diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 6fcb2b8353..1afc59fafb 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -490,7 +490,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): txn=txn, table="event_forward_extremities", column="event_id", - iterable=to_delete, + values=to_delete, keyvalues={}, ) @@ -520,7 +520,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): txn=txn, table="_extremities_to_check", column="event_id", - iterable=original_set, + values=original_set, keyvalues={}, ) diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index 63ac09c61d..a93caae8d0 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -324,7 +324,7 @@ class PusherWorkerStore(SQLBaseStore): txn, table="pushers", column="user_name", - iterable=users, + values=users, keyvalues={}, ) @@ -373,7 +373,7 @@ class PusherWorkerStore(SQLBaseStore): txn, table="pushers", column="id", - iterable=(pusher_id for pusher_id, token in pushers if token is None), + values=[pusher_id for pusher_id, token in pushers if token is None], keyvalues={}, ) diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 8e22da99ae..a8e8dd4577 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -473,7 +473,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore): txn, table="current_state_events", column="room_id", - iterable=to_delete, + values=to_delete, keyvalues={}, ) @@ -481,7 +481,7 @@ class MainStateBackgroundUpdateStore(RoomMemberWorkerStore): txn, table="event_forward_extremities", column="room_id", - iterable=to_delete, + values=to_delete, keyvalues={}, ) diff --git a/synapse/storage/databases/main/ui_auth.py b/synapse/storage/databases/main/ui_auth.py index 4d6bbc94c7..340ca9e47d 100644 --- a/synapse/storage/databases/main/ui_auth.py +++ b/synapse/storage/databases/main/ui_auth.py @@ -326,7 +326,7 @@ class UIAuthWorkerStore(SQLBaseStore): txn, table="ui_auth_sessions_ips", column="session_id", - iterable=session_ids, + values=session_ids, keyvalues={}, ) @@ -377,7 +377,7 @@ class UIAuthWorkerStore(SQLBaseStore): txn, table="ui_auth_sessions_credentials", column="session_id", - iterable=session_ids, + values=session_ids, keyvalues={}, ) @@ -386,7 +386,7 @@ class UIAuthWorkerStore(SQLBaseStore): txn, table="ui_auth_sessions", column="session_id", - iterable=session_ids, + values=session_ids, keyvalues={}, ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index f1e3a27e63..c4c8c0021b 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -664,7 +664,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): txn, table="state_groups_state", column="state_group", - iterable=state_groups_to_delete, + values=state_groups_to_delete, keyvalues={}, ) @@ -675,7 +675,7 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): txn, table="state_group_edges", column="state_group", - iterable=state_groups_to_delete, + values=state_groups_to_delete, keyvalues={}, ) @@ -686,6 +686,6 @@ class StateGroupDataStore(StateBackgroundUpdateStore, SQLBaseStore): txn, table="state_groups", column="id", - iterable=state_groups_to_delete, + values=state_groups_to_delete, keyvalues={}, ) -- cgit 1.5.1 From 60453315bdbbbd364f13ca386de965e015f1062f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 21 Sep 2021 13:02:34 +0100 Subject: Always add local users to the user directory (#10796) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's a simplification, but one that'll help make the user directory logic easier to follow with the other changes upcoming. It's not strictly required for those changes, but this will help simplify the resulting logic that listens for `m.room.member` events and generally make the logic easier to follow. This means the config option `search_all_users` ends up controlling the search query only, and not the data we store. The cost of doing so is an extra row in the `user_directory` and `user_directory_search` tables for each local user which - belongs to no public rooms - belongs to no private rooms of size ≥ 2 I think the cost of this will be marginal (since they'll already have entries in `users` and `profiles` anyway). As a small upside, a homeserver whose directory was built with this change can toggle `search_all_users` without having to rebuild their directory. Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/10796.misc | 1 + docs/sample_config.yaml | 14 +++++++----- synapse/config/user_directory.py | 14 +++++++----- synapse/handlers/deactivate_account.py | 7 ++---- synapse/handlers/profile.py | 18 +++++++--------- synapse/handlers/register.py | 9 ++++---- synapse/storage/databases/main/user_directory.py | 27 +++++++++--------------- tests/handlers/test_profile.py | 7 ++++-- tests/rest/client/test_rooms.py | 12 +++++------ 9 files changed, 54 insertions(+), 55 deletions(-) create mode 100644 changelog.d/10796.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/10796.misc b/changelog.d/10796.misc new file mode 100644 index 0000000000..1873b2386a --- /dev/null +++ b/changelog.d/10796.misc @@ -0,0 +1 @@ +Simplify the internal logic which maintains the user directory database tables. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 95cca16552..166cec38d3 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2362,12 +2362,16 @@ user_directory: #enabled: false # Defines whether to search all users visible to your HS when searching - # the user directory, rather than limiting to users visible in public - # rooms. Defaults to false. + # the user directory. If false, search results will only contain users + # visible in public rooms and users sharing a room with the requester. + # Defaults to false. # - # If you set it true, you'll have to rebuild the user_directory search - # indexes, see: - # https://matrix-org.github.io/synapse/latest/user_directory.html + # NB. If you set this to true, and the last time the user_directory search + # indexes were (re)built was before Synapse 1.44, you'll have to + # rebuild the indexes in order to search through all known users. + # These indexes are built the first time Synapse starts; admins can + # manually trigger a rebuild following the instructions at + # https://matrix-org.github.io/synapse/latest/user_directory.html # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index b10df8a232..2552f688d0 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -45,12 +45,16 @@ class UserDirectoryConfig(Config): #enabled: false # Defines whether to search all users visible to your HS when searching - # the user directory, rather than limiting to users visible in public - # rooms. Defaults to false. + # the user directory. If false, search results will only contain users + # visible in public rooms and users sharing a room with the requester. + # Defaults to false. # - # If you set it true, you'll have to rebuild the user_directory search - # indexes, see: - # https://matrix-org.github.io/synapse/latest/user_directory.html + # NB. If you set this to true, and the last time the user_directory search + # indexes were (re)built was before Synapse 1.44, you'll have to + # rebuild the indexes in order to search through all known users. + # These indexes are built the first time Synapse starts; admins can + # manually trigger a rebuild following the instructions at + # https://matrix-org.github.io/synapse/latest/user_directory.html # # Uncomment to return search results containing all known users, even if that # user does not share a room with the requester. diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index dcd320c555..a03ff9842b 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -257,11 +257,8 @@ class DeactivateAccountHandler(BaseHandler): """ # Add the user to the directory, if necessary. user = UserID.from_string(user_id) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(user.localpart) - await self.user_directory_handler.handle_local_profile_change( - user_id, profile - ) + profile = await self.store.get_profileinfo(user.localpart) + await self.user_directory_handler.handle_local_profile_change(user_id, profile) # Ensure the user is not marked as erased. await self.store.mark_user_not_erased(user_id) diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 246eb98282..f06070bfcf 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -214,11 +214,10 @@ class ProfileHandler(BaseHandler): target_user.localpart, displayname_to_set ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(target_user.localpart) - await self.user_directory_handler.handle_local_profile_change( - target_user.to_string(), profile - ) + profile = await self.store.get_profileinfo(target_user.localpart) + await self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) await self._update_join_states(requester, target_user) @@ -300,11 +299,10 @@ class ProfileHandler(BaseHandler): target_user.localpart, avatar_url_to_set ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(target_user.localpart) - await self.user_directory_handler.handle_local_profile_change( - target_user.to_string(), profile - ) + profile = await self.store.get_profileinfo(target_user.localpart) + await self.user_directory_handler.handle_local_profile_change( + target_user.to_string(), profile + ) await self._update_join_states(requester, target_user) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index efb7d26760..1c195c65db 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -295,11 +295,10 @@ class RegistrationHandler(BaseHandler): shadow_banned=shadow_banned, ) - if self.hs.config.user_directory_search_all_users: - profile = await self.store.get_profileinfo(localpart) - await self.user_directory_handler.handle_local_profile_change( - user_id, profile - ) + profile = await self.store.get_profileinfo(localpart) + await self.user_directory_handler.handle_local_profile_change( + user_id, profile + ) else: # autogen a sequential user ID diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 8aebdc2817..718f3e9976 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -85,19 +85,17 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms) del rooms - # If search all users is on, get all the users we want to add. - if self.hs.config.user_directory_search_all_users: - sql = ( - "CREATE TABLE IF NOT EXISTS " - + TEMP_TABLE - + "_users(user_id TEXT NOT NULL)" - ) - txn.execute(sql) + sql = ( + "CREATE TABLE IF NOT EXISTS " + + TEMP_TABLE + + "_users(user_id TEXT NOT NULL)" + ) + txn.execute(sql) - txn.execute("SELECT name FROM users") - users = [{"user_id": x[0]} for x in txn.fetchall()] + txn.execute("SELECT name FROM users") + users = [{"user_id": x[0]} for x in txn.fetchall()] - self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) + self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users) new_pos = await self.get_max_stream_id_in_current_state_deltas() await self.db_pool.runInteraction( @@ -265,13 +263,8 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): async def _populate_user_directory_process_users(self, progress, batch_size): """ - If search_all_users is enabled, add all of the users to the user directory. + Add all local users to the user directory. """ - if not self.hs.config.user_directory_search_all_users: - await self.db_pool.updates._end_background_update( - "populate_user_directory_process_users" - ) - return 1 def _get_next_batch(txn): sql = "SELECT user_id FROM %s LIMIT %s" % ( diff --git a/tests/handlers/test_profile.py b/tests/handlers/test_profile.py index 2928c4f48c..57cc3e2646 100644 --- a/tests/handlers/test_profile.py +++ b/tests/handlers/test_profile.py @@ -16,6 +16,7 @@ from unittest.mock import Mock import synapse.types from synapse.api.errors import AuthError, SynapseError +from synapse.rest import admin from synapse.types import UserID from tests import unittest @@ -25,6 +26,8 @@ from tests.test_utils import make_awaitable class ProfileTestCase(unittest.HomeserverTestCase): """Tests profile management.""" + servlets = [admin.register_servlets] + def make_homeserver(self, reactor, clock): self.mock_federation = Mock() self.mock_registry = Mock() @@ -46,11 +49,11 @@ class ProfileTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() - self.frank = UserID.from_string("@1234ABCD:test") + self.frank = UserID.from_string("@1234abcd:test") self.bob = UserID.from_string("@4567:test") self.alice = UserID.from_string("@alice:remote") - self.get_success(self.store.create_profile(self.frank.localpart)) + self.get_success(self.register_user(self.frank.localpart, "frankpassword")) self.handler = hs.get_profile_handler() diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 5a01765f4d..ef847f0f5f 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -869,6 +869,12 @@ class RoomJoinRatelimitTestCase(RoomBase): room.register_servlets, ] + def prepare(self, reactor, clock, homeserver): + super().prepare(reactor, clock, homeserver) + # profile changes expect that the user is actually registered + user = UserID.from_string(self.user_id) + self.get_success(self.register_user(user.localpart, "supersecretpassword")) + @unittest.override_config( {"rc_joins": {"local": {"per_second": 0.5, "burst_count": 3}}} ) @@ -898,12 +904,6 @@ class RoomJoinRatelimitTestCase(RoomBase): # join in a second. room_ids.append(self.helper.create_room_as(self.user_id)) - # Create a profile for the user, since it hasn't been done on registration. - store = self.hs.get_datastore() - self.get_success( - store.create_profile(UserID.from_string(self.user_id).localpart) - ) - # Update the display name for the user. path = "/_matrix/client/r0/profile/%s/displayname" % self.user_id channel = self.make_request("PUT", path, {"displayname": "John Doe"}) -- cgit 1.5.1 From 4054dfa409fa17b45ab8f265813994956ed97bae Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 21 Sep 2021 13:34:26 -0400 Subject: Add type hints for event streams. (#10856) --- changelog.d/10856.misc | 1 + synapse/handlers/account_data.py | 13 ++++++-- synapse/handlers/appservice.py | 6 ++-- synapse/handlers/initial_sync.py | 2 +- synapse/handlers/presence.py | 8 +++-- synapse/handlers/receipts.py | 13 ++++++-- synapse/handlers/room.py | 18 ++++++++--- synapse/handlers/sync.py | 6 ++-- synapse/handlers/typing.py | 13 ++++++-- synapse/module_api/__init__.py | 2 +- synapse/notifier.py | 2 +- synapse/storage/databases/main/receipts.py | 6 ++-- synapse/streams/__init__.py | 22 ++++++++++++++ synapse/streams/events.py | 49 ++++++++++++++++++------------ tests/handlers/test_receipts.py | 2 +- tests/handlers/test_typing.py | 46 +++++++++++++++++++++++----- tests/rest/client/test_shadow_banned.py | 10 ++++-- tests/rest/client/test_typing.py | 10 ++++-- 18 files changed, 169 insertions(+), 60 deletions(-) create mode 100644 changelog.d/10856.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/10856.misc b/changelog.d/10856.misc new file mode 100644 index 0000000000..f09af2e00a --- /dev/null +++ b/changelog.d/10856.misc @@ -0,0 +1 @@ +Add missing type hints to handlers. diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index e9e7a78546..96273e2f81 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import random -from typing import TYPE_CHECKING, Any, List, Tuple +from typing import TYPE_CHECKING, Collection, List, Optional, Tuple from synapse.replication.http.account_data import ( ReplicationAddTagRestServlet, @@ -21,6 +21,7 @@ from synapse.replication.http.account_data import ( ReplicationRoomAccountDataRestServlet, ReplicationUserAccountDataRestServlet, ) +from synapse.streams import EventSource from synapse.types import JsonDict, UserID if TYPE_CHECKING: @@ -163,7 +164,7 @@ class AccountDataHandler: return response["max_stream_id"] -class AccountDataEventSource: +class AccountDataEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -171,7 +172,13 @@ class AccountDataEventSource: return self.store.get_max_account_data_stream_id() async def get_new_events( - self, user: UserID, from_key: int, **kwargs: Any + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Collection[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: user_id = user.to_string() last_stream_id = from_key diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 8bde9ed66f..b7213b67a5 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -254,7 +254,7 @@ class ApplicationServicesHandler: async def _handle_typing( self, service: ApplicationService, new_token: int ) -> List[JsonDict]: - typing_source = self.event_sources.sources["typing"] + typing_source = self.event_sources.sources.typing # Get the typing events from just before current typing, _ = await typing_source.get_new_events_as( service=service, @@ -269,7 +269,7 @@ class ApplicationServicesHandler: from_key = await self.store.get_type_stream_id_for_appservice( service, "read_receipt" ) - receipts_source = self.event_sources.sources["receipt"] + receipts_source = self.event_sources.sources.receipt receipts, _ = await receipts_source.get_new_events_as( service=service, from_key=from_key ) @@ -279,7 +279,7 @@ class ApplicationServicesHandler: self, service: ApplicationService, users: Collection[Union[str, UserID]] ) -> List[JsonDict]: events: List[JsonDict] = [] - presence_source = self.event_sources.sources["presence"] + presence_source = self.event_sources.sources.presence from_key = await self.store.get_type_stream_id_for_appservice( service, "presence" ) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index c942086e74..9ad39a65d8 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -125,7 +125,7 @@ class InitialSyncHandler(BaseHandler): now_token = self.hs.get_event_sources().get_current_token() - presence_stream = self.hs.get_event_sources().sources["presence"] + presence_stream = self.hs.get_event_sources().sources.presence presence, _ = await presence_stream.get_new_events( user, from_key=None, include_offline=False ) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 841c8815b0..983c837c66 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -65,6 +65,7 @@ from synapse.replication.http.streams import ReplicationGetStreamUpdates from synapse.replication.tcp.commands import ClearUserSyncsCommand from synapse.replication.tcp.streams import PresenceFederationStream, PresenceStream from synapse.storage.databases.main import DataStore +from synapse.streams import EventSource from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches.descriptors import _CacheContext, cached @@ -1500,7 +1501,7 @@ def format_user_presence_state( return content -class PresenceEventSource: +class PresenceEventSource(EventSource[int, UserPresenceState]): def __init__(self, hs: "HomeServer"): # We can't call get_presence_handler here because there's a cycle: # @@ -1519,10 +1520,11 @@ class PresenceEventSource: self, user: UserID, from_key: Optional[int], + limit: Optional[int] = None, room_ids: Optional[List[str]] = None, - include_offline: bool = True, + is_guest: bool = False, explicit_room_id: Optional[str] = None, - **kwargs: Any, + include_offline: bool = True, ) -> Tuple[List[UserPresenceState], int]: # The process for getting presence events are: # 1. Get the rooms the user is in. diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index c7567ac05f..5881f09ebd 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -12,11 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, List, Optional, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple from synapse.api.constants import ReadReceiptEventFields from synapse.appservice import ApplicationService from synapse.handlers._base import BaseHandler +from synapse.streams import EventSource from synapse.types import JsonDict, ReadReceipt, UserID, get_domain_from_id if TYPE_CHECKING: @@ -162,7 +163,7 @@ class ReceiptsHandler(BaseHandler): await self.federation_sender.send_read_receipt(receipt) -class ReceiptEventSource: +class ReceiptEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() self.config = hs.config @@ -216,7 +217,13 @@ class ReceiptEventSource: return visible_events async def get_new_events( - self, from_key: int, room_ids: List[str], user: UserID, **kwargs: Any + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Iterable[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: from_key = int(from_key) to_key = self.get_current_key() diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index abdd506164..287ea2fd06 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -20,7 +20,16 @@ import math import random import string from collections import OrderedDict -from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Collection, + Dict, + List, + Optional, + Tuple, +) from synapse.api.constants import ( EventContentFields, @@ -47,6 +56,7 @@ from synapse.events import EventBase from synapse.events.utils import copy_power_levels_contents from synapse.rest.admin._base import assert_user_is_admin from synapse.storage.state import StateFilter +from synapse.streams import EventSource from synapse.types import ( JsonDict, MutableStateMap, @@ -1173,7 +1183,7 @@ class RoomContextHandler: return results -class RoomEventSource: +class RoomEventSource(EventSource[RoomStreamToken, EventBase]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() @@ -1181,8 +1191,8 @@ class RoomEventSource: self, user: UserID, from_key: RoomStreamToken, - limit: int, - room_ids: List[str], + limit: Optional[int], + room_ids: Collection[str], is_guest: bool, explicit_room_id: Optional[str] = None, ) -> Tuple[List[EventBase], RoomStreamToken]: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e93db4bdcc..2c7c6d63a9 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -443,7 +443,7 @@ class SyncHandler: room_ids = sync_result_builder.joined_room_ids - typing_source = self.event_sources.sources["typing"] + typing_source = self.event_sources.sources.typing typing, typing_key = await typing_source.get_new_events( user=sync_config.user, from_key=typing_key, @@ -465,7 +465,7 @@ class SyncHandler: receipt_key = since_token.receipt_key if since_token else 0 - receipt_source = self.event_sources.sources["receipt"] + receipt_source = self.event_sources.sources.receipt receipts, receipt_key = await receipt_source.get_new_events( user=sync_config.user, from_key=receipt_key, @@ -1415,7 +1415,7 @@ class SyncHandler: sync_config = sync_result_builder.sync_config user = sync_result_builder.sync_config.user - presence_source = self.event_sources.sources["presence"] + presence_source = self.event_sources.sources.presence since_token = sync_result_builder.since_token presence_key = None diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 4492c8567b..9326330c90 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -14,7 +14,7 @@ import logging import random from collections import namedtuple -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple from synapse.api.errors import AuthError, ShadowBanError, SynapseError from synapse.appservice import ApplicationService @@ -23,6 +23,7 @@ from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) from synapse.replication.tcp.streams import TypingStream +from synapse.streams import EventSource from synapse.types import JsonDict, Requester, UserID, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.metrics import Measure @@ -439,7 +440,7 @@ class TypingWriterHandler(FollowerTypingHandler): raise Exception("Typing writer instance got typing info over replication") -class TypingNotificationEventSource: +class TypingNotificationEventSource(EventSource[int, JsonDict]): def __init__(self, hs: "HomeServer"): self.hs = hs self.clock = hs.get_clock() @@ -485,7 +486,13 @@ class TypingNotificationEventSource: return (events, handler._latest_room_serial) async def get_new_events( - self, from_key: int, room_ids: Iterable[str], **kwargs: Any + self, + user: UserID, + from_key: int, + limit: Optional[int], + room_ids: Iterable[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: with Measure(self.clock, "typing.get_new_events"): from_key = int(from_key) diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 2d403532fa..3196c2bec6 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -91,7 +91,7 @@ class ModuleApi: self._auth = hs.get_auth() self._auth_handler = auth_handler self._server_name = hs.hostname - self._presence_stream = hs.get_event_sources().sources["presence"] + self._presence_stream = hs.get_event_sources().sources.presence self._state = hs.get_state_handler() self._clock: Clock = hs.get_clock() self._send_email_handler = hs.get_send_email_handler() diff --git a/synapse/notifier.py b/synapse/notifier.py index bbe337949a..1a9f84ba45 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -584,7 +584,7 @@ class Notifier: events: List[EventBase] = [] end_token = from_token - for name, source in self.event_sources.sources.items(): + for name, source in self.event_sources.sources.get_sources(): keyname = "%s_key" % name before_id = getattr(before_token, keyname) after_id = getattr(after_token, keyname) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index edeaacd7a6..01a4281301 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Iterable, List, Optional, Tuple from twisted.internet import defer @@ -153,12 +153,12 @@ class ReceiptsWorkerStore(SQLBaseStore): } async def get_linearized_receipts_for_rooms( - self, room_ids: List[str], to_key: int, from_key: Optional[int] = None + self, room_ids: Iterable[str], to_key: int, from_key: Optional[int] = None ) -> List[dict]: """Get receipts for multiple rooms for sending to clients. Args: - room_id: List of room_ids. + room_id: The room IDs to fetch receipts of. to_key: Max stream id to fetch receipts up to. from_key: Min stream id to fetch receipts from. None fetches from the start. diff --git a/synapse/streams/__init__.py b/synapse/streams/__init__.py index 5e83dba2ed..806b671305 100644 --- a/synapse/streams/__init__.py +++ b/synapse/streams/__init__.py @@ -11,3 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from typing import Collection, Generic, List, Optional, Tuple, TypeVar + +from synapse.types import UserID + +# The key, this is either a stream token or int. +K = TypeVar("K") +# The return type. +R = TypeVar("R") + + +class EventSource(Generic[K, R]): + async def get_new_events( + self, + user: UserID, + from_key: K, + limit: Optional[int], + room_ids: Collection[str], + is_guest: bool, + explicit_room_id: Optional[str] = None, + ) -> Tuple[List[R], K]: + ... diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 99b0aac2fb..21591d0bfd 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -12,29 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict +from typing import TYPE_CHECKING, Iterator, Tuple + +import attr from synapse.handlers.account_data import AccountDataEventSource from synapse.handlers.presence import PresenceEventSource from synapse.handlers.receipts import ReceiptEventSource from synapse.handlers.room import RoomEventSource from synapse.handlers.typing import TypingNotificationEventSource +from synapse.streams import EventSource from synapse.types import StreamToken +if TYPE_CHECKING: + from synapse.server import HomeServer -class EventSources: - SOURCE_TYPES = { - "room": RoomEventSource, - "presence": PresenceEventSource, - "typing": TypingNotificationEventSource, - "receipt": ReceiptEventSource, - "account_data": AccountDataEventSource, - } - def __init__(self, hs): - self.sources: Dict[str, Any] = { - name: cls(hs) for name, cls in EventSources.SOURCE_TYPES.items() - } +@attr.s(frozen=True, slots=True, auto_attribs=True) +class _EventSourcesInner: + room: RoomEventSource + presence: PresenceEventSource + typing: TypingNotificationEventSource + receipt: ReceiptEventSource + account_data: AccountDataEventSource + + def get_sources(self) -> Iterator[Tuple[str, EventSource]]: + for attribute in _EventSourcesInner.__attrs_attrs__: # type: ignore[attr-defined] + yield attribute.name, getattr(self, attribute.name) + + +class EventSources: + def __init__(self, hs: "HomeServer"): + self.sources = _EventSourcesInner( + *(attribute.type(hs) for attribute in _EventSourcesInner.__attrs_attrs__) # type: ignore[attr-defined] + ) self.store = hs.get_datastore() def get_current_token(self) -> StreamToken: @@ -44,11 +55,11 @@ class EventSources: groups_key = self.store.get_group_stream_token() token = StreamToken( - room_key=self.sources["room"].get_current_key(), - presence_key=self.sources["presence"].get_current_key(), - typing_key=self.sources["typing"].get_current_key(), - receipt_key=self.sources["receipt"].get_current_key(), - account_data_key=self.sources["account_data"].get_current_key(), + room_key=self.sources.room.get_current_key(), + presence_key=self.sources.presence.get_current_key(), + typing_key=self.sources.typing.get_current_key(), + receipt_key=self.sources.receipt.get_current_key(), + account_data_key=self.sources.account_data.get_current_key(), push_rules_key=push_rules_key, to_device_key=to_device_key, device_list_key=device_list_key, @@ -67,7 +78,7 @@ class EventSources: The current token for pagination. """ token = StreamToken( - room_key=self.sources["room"].get_current_key(), + room_key=self.sources.room.get_current_key(), presence_key=0, typing_key=0, receipt_key=0, diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index 732a12c9bd..5de89c873b 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -23,7 +23,7 @@ from tests import unittest class ReceiptsTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): - self.event_source = hs.get_event_sources().sources["receipt"] + self.event_source = hs.get_event_sources().sources.receipt # In the first param of _test_filters_hidden we use "hidden" instead of # ReadReceiptEventFields.MSC2285_HIDDEN. We do this because we're mocking diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index fa3cff598e..000f9b9fde 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -89,7 +89,7 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.handler = hs.get_typing_handler() - self.event_source = hs.get_event_sources().sources["typing"] + self.event_source = hs.get_event_sources().sources.typing self.datastore = hs.get_datastore() self.datastore.get_destination_retry_timings = Mock( @@ -171,7 +171,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, from_key=0, limit=None, room_ids=[ROOM_ID], is_guest=False + ) ) self.assertEquals( events[0], @@ -239,7 +241,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, from_key=0, limit=None, room_ids=[ROOM_ID], is_guest=False + ) ) self.assertEquals( events[0], @@ -276,7 +280,13 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.assertEquals(self.event_source.get_current_key(), 0) events = self.get_success( - self.event_source.get_new_events(room_ids=[OTHER_ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, + from_key=0, + limit=None, + room_ids=[OTHER_ROOM_ID], + is_guest=False, + ) ) self.assertEquals(events[0], []) self.assertEquals(events[1], 0) @@ -324,7 +334,9 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, from_key=0, limit=None, room_ids=[ROOM_ID], is_guest=False + ) ) self.assertEquals( events[0], @@ -350,7 +362,13 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, + from_key=0, + limit=None, + room_ids=[ROOM_ID], + is_guest=False, + ) ) self.assertEquals( events[0], @@ -369,7 +387,13 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.assertEquals(self.event_source.get_current_key(), 2) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=1) + self.event_source.get_new_events( + user=U_APPLE, + from_key=1, + limit=None, + room_ids=[ROOM_ID], + is_guest=False, + ) ) self.assertEquals( events[0], @@ -392,7 +416,13 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.assertEquals(self.event_source.get_current_key(), 3) events = self.get_success( - self.event_source.get_new_events(room_ids=[ROOM_ID], from_key=0) + self.event_source.get_new_events( + user=U_APPLE, + from_key=0, + limit=None, + room_ids=[ROOM_ID], + is_guest=False, + ) ) self.assertEquals( events[0], diff --git a/tests/rest/client/test_shadow_banned.py b/tests/rest/client/test_shadow_banned.py index 6a0d9a82be..b0c44af033 100644 --- a/tests/rest/client/test_shadow_banned.py +++ b/tests/rest/client/test_shadow_banned.py @@ -193,7 +193,7 @@ class RoomTestCase(_ShadowBannedBase): self.assertEquals(200, channel.code) # There should be no typing events. - event_source = self.hs.get_event_sources().sources["typing"] + event_source = self.hs.get_event_sources().sources.typing self.assertEquals(event_source.get_current_key(), 0) # The other user can join and send typing events. @@ -210,7 +210,13 @@ class RoomTestCase(_ShadowBannedBase): # These appear in the room. self.assertEquals(event_source.get_current_key(), 1) events = self.get_success( - event_source.get_new_events(from_key=0, room_ids=[room_id]) + event_source.get_new_events( + user=UserID.from_string(self.other_user_id), + from_key=0, + limit=None, + room_ids=[room_id], + is_guest=False, + ) ) self.assertEquals( events[0], diff --git a/tests/rest/client/test_typing.py b/tests/rest/client/test_typing.py index b54b004733..ee0abd5295 100644 --- a/tests/rest/client/test_typing.py +++ b/tests/rest/client/test_typing.py @@ -41,7 +41,7 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): federation_client=Mock(), ) - self.event_source = hs.get_event_sources().sources["typing"] + self.event_source = hs.get_event_sources().sources.typing hs.get_federation_handler = Mock() @@ -76,7 +76,13 @@ class RoomTypingTestCase(unittest.HomeserverTestCase): self.assertEquals(self.event_source.get_current_key(), 1) events = self.get_success( - self.event_source.get_new_events(from_key=0, room_ids=[self.room_id]) + self.event_source.get_new_events( + user=UserID.from_string(self.user_id), + from_key=0, + limit=None, + room_ids=[self.room_id], + is_guest=False, + ) ) self.assertEquals( events[0], -- cgit 1.5.1 From 51e2db35983953b13e536331ec2f6ad4cae7e0f1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 21 Sep 2021 15:06:28 -0500 Subject: Rename MSC2716 things from `chunk` to `batch` to match `/batch_send` endpoint (#10838) See https://github.com/matrix-org/matrix-doc/pull/2716#discussion_r684574497 Dropping support for older MSC2716 room versions so we don't have to worry about supporting both chunk and batch events. --- changelog.d/10838.misc | 1 + synapse/api/constants.py | 10 +-- synapse/api/room_versions.py | 22 +----- synapse/event_auth.py | 8 +- synapse/events/utils.py | 6 +- synapse/handlers/message.py | 2 +- synapse/rest/client/room_batch.py | 86 +++++++++++----------- synapse/storage/databases/main/event_federation.py | 30 ++++---- synapse/storage/databases/main/events.py | 46 ++++++------ synapse/storage/databases/main/room_batch.py | 6 +- synapse/storage/schema/__init__.py | 2 +- .../01msc2716_chunk_to_batch_rename.sql.postgres | 23 ++++++ .../64/01msc2716_chunk_to_batch_rename.sql.sqlite | 37 ++++++++++ 13 files changed, 162 insertions(+), 117 deletions(-) create mode 100644 changelog.d/10838.misc create mode 100644 synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.postgres create mode 100644 synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.sqlite (limited to 'synapse/storage/databases') diff --git a/changelog.d/10838.misc b/changelog.d/10838.misc new file mode 100644 index 0000000000..b1977d0a2e --- /dev/null +++ b/changelog.d/10838.misc @@ -0,0 +1 @@ +Rename [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) fields and event types from `chunk` to `batch` to match the `/batch_send` endpoint. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 236f0c7f99..39fd9954d5 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -121,7 +121,7 @@ class EventTypes: SpaceParent = "m.space.parent" MSC2716_INSERTION = "org.matrix.msc2716.insertion" - MSC2716_CHUNK = "org.matrix.msc2716.chunk" + MSC2716_BATCH = "org.matrix.msc2716.batch" MSC2716_MARKER = "org.matrix.msc2716.marker" @@ -209,11 +209,11 @@ class EventContentFields: # Used on normal messages to indicate they were historically imported after the fact MSC2716_HISTORICAL = "org.matrix.msc2716.historical" - # For "insertion" events to indicate what the next chunk ID should be in + # For "insertion" events to indicate what the next batch ID should be in # order to connect to it - MSC2716_NEXT_CHUNK_ID = "org.matrix.msc2716.next_chunk_id" - # Used on "chunk" events to indicate which insertion event it connects to - MSC2716_CHUNK_ID = "org.matrix.msc2716.chunk_id" + MSC2716_NEXT_BATCH_ID = "org.matrix.msc2716.next_batch_id" + # Used on "batch" events to indicate which insertion event it connects to + MSC2716_BATCH_ID = "org.matrix.msc2716.batch_id" # For "marker" events MSC2716_MARKER_INSERTION = "org.matrix.msc2716.marker.insertion" diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index 61d9c658a9..0a895bba48 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -244,24 +244,8 @@ class RoomVersions: msc2716_historical=False, msc2716_redactions=False, ) - MSC2716 = RoomVersion( - "org.matrix.msc2716", - RoomDisposition.UNSTABLE, - EventFormatVersions.V3, - StateResolutionVersions.V2, - enforce_key_validity=True, - special_case_aliases_auth=False, - strict_canonicaljson=True, - limit_notifications_power_levels=True, - msc2176_redaction_rules=False, - msc3083_join_rules=False, - msc3375_redaction_rules=False, - msc2403_knocking=True, - msc2716_historical=True, - msc2716_redactions=False, - ) - MSC2716v2 = RoomVersion( - "org.matrix.msc2716v2", + MSC2716v3 = RoomVersion( + "org.matrix.msc2716v3", RoomDisposition.UNSTABLE, EventFormatVersions.V3, StateResolutionVersions.V2, @@ -289,9 +273,9 @@ KNOWN_ROOM_VERSIONS: Dict[str, RoomVersion] = { RoomVersions.V6, RoomVersions.MSC2176, RoomVersions.V7, - RoomVersions.MSC2716, RoomVersions.V8, RoomVersions.V9, + RoomVersions.MSC2716v3, ) } diff --git a/synapse/event_auth.py b/synapse/event_auth.py index cb133f3f84..fc50a0e71a 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -213,7 +213,7 @@ def check( if ( event.type == EventTypes.MSC2716_INSERTION - or event.type == EventTypes.MSC2716_CHUNK + or event.type == EventTypes.MSC2716_BATCH or event.type == EventTypes.MSC2716_MARKER ): check_historical(room_version_obj, event, auth_events) @@ -552,14 +552,14 @@ def check_historical( auth_events: StateMap[EventBase], ) -> None: """Check whether the event sender is allowed to send historical related - events like "insertion", "chunk", and "marker". + events like "insertion", "batch", and "marker". Returns: None Raises: AuthError if the event sender is not allowed to send historical related events - ("insertion", "chunk", and "marker"). + ("insertion", "batch", and "marker"). """ # Ignore the auth checks in room versions that do not support historical # events @@ -573,7 +573,7 @@ def check_historical( if user_level < historical_level: raise AuthError( 403, - 'You don\'t have permission to send send historical related events ("insertion", "chunk", and "marker")', + 'You don\'t have permission to send send historical related events ("insertion", "batch", and "marker")', ) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index fb22337e27..f86113a448 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -141,9 +141,9 @@ def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: elif event_type == EventTypes.Redaction and room_version.msc2176_redaction_rules: add_fields("redacts") elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_INSERTION: - add_fields(EventContentFields.MSC2716_NEXT_CHUNK_ID) - elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_CHUNK: - add_fields(EventContentFields.MSC2716_CHUNK_ID) + add_fields(EventContentFields.MSC2716_NEXT_BATCH_ID) + elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_BATCH: + add_fields(EventContentFields.MSC2716_BATCH_ID) elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER: add_fields(EventContentFields.MSC2716_MARKER_INSERTION) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index bf48536308..6cd694b2da 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1425,7 +1425,7 @@ class EventCreationHandler: # structural protocol level). is_msc2716_event = ( original_event.type == EventTypes.MSC2716_INSERTION - or original_event.type == EventTypes.MSC2716_CHUNK + or original_event.type == EventTypes.MSC2716_BATCH or original_event.type == EventTypes.MSC2716_MARKER ) if not room_version_obj.msc2716_historical and is_msc2716_event: diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index f73ccc7f65..bf14ec384e 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -43,25 +43,25 @@ logger = logging.getLogger(__name__) class RoomBatchSendEventRestServlet(RestServlet): """ - API endpoint which can insert a chunk of events historically back in time + API endpoint which can insert a batch of events historically back in time next to the given `prev_event`. - `chunk_id` comes from `next_chunk_id `in the response of the batch send - endpoint and is derived from the "insertion" events added to each chunk. + `batch_id` comes from `next_batch_id `in the response of the batch send + endpoint and is derived from the "insertion" events added to each batch. It's not required for the first batch send. `state_events_at_start` is used to define the historical state events needed to auth the events like join events. These events will float outside of the normal DAG as outlier's and won't be visible in the chat - history which also allows us to insert multiple chunks without having a bunch - of `@mxid joined the room` noise between each chunk. + history which also allows us to insert multiple batches without having a bunch + of `@mxid joined the room` noise between each batch. - `events` is chronological chunk/list of events you want to insert. - There is a reverse-chronological constraint on chunks so once you insert + `events` is chronological list of events you want to insert. + There is a reverse-chronological constraint on batches so once you insert some messages, you can only insert older ones after that. - tldr; Insert chunks from your most recent history -> oldest history. + tldr; Insert batches from your most recent history -> oldest history. - POST /_matrix/client/unstable/org.matrix.msc2716/rooms//batch_send?prev_event_id=&chunk_id= + POST /_matrix/client/unstable/org.matrix.msc2716/rooms//batch_send?prev_event_id=&batch_id= { "events": [ ... ], "state_events_at_start": [ ... ] @@ -129,7 +129,7 @@ class RoomBatchSendEventRestServlet(RestServlet): self, sender: str, room_id: str, origin_server_ts: int ) -> JsonDict: """Creates an event dict for an "insertion" event with the proper fields - and a random chunk ID. + and a random batch ID. Args: sender: The event author MXID @@ -140,13 +140,13 @@ class RoomBatchSendEventRestServlet(RestServlet): The new event dictionary to insert. """ - next_chunk_id = random_string(8) + next_batch_id = random_string(8) insertion_event = { "type": EventTypes.MSC2716_INSERTION, "sender": sender, "room_id": room_id, "content": { - EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, + EventContentFields.MSC2716_NEXT_BATCH_ID: next_batch_id, EventContentFields.MSC2716_HISTORICAL: True, }, "origin_server_ts": origin_server_ts, @@ -191,7 +191,7 @@ class RoomBatchSendEventRestServlet(RestServlet): prev_event_ids_from_query = parse_strings_from_args( request.args, "prev_event_id" ) - chunk_id_from_query = parse_string(request, "chunk_id") + batch_id_from_query = parse_string(request, "batch_id") if prev_event_ids_from_query is None: raise SynapseError( @@ -291,27 +291,27 @@ class RoomBatchSendEventRestServlet(RestServlet): prev_event_ids_from_query ) - # Figure out which chunk to connect to. If they passed in - # chunk_id_from_query let's use it. The chunk ID passed in comes - # from the chunk_id in the "insertion" event from the previous chunk. - last_event_in_chunk = events_to_create[-1] - chunk_id_to_connect_to = chunk_id_from_query + # Figure out which batch to connect to. If they passed in + # batch_id_from_query let's use it. The batch ID passed in comes + # from the batch_id in the "insertion" event from the previous batch. + last_event_in_batch = events_to_create[-1] + batch_id_to_connect_to = batch_id_from_query base_insertion_event = None - if chunk_id_from_query: + if batch_id_from_query: # All but the first base insertion event should point at a fake # event, which causes the HS to ask for the state at the start of - # the chunk later. + # the batch later. prev_event_ids = [fake_prev_event_id] - # Verify the chunk_id_from_query corresponds to an actual insertion event - # and have the chunk connected. + # Verify the batch_id_from_query corresponds to an actual insertion event + # and have the batch connected. corresponding_insertion_event_id = ( - await self.store.get_insertion_event_by_chunk_id(chunk_id_from_query) + await self.store.get_insertion_event_by_batch_id(batch_id_from_query) ) if corresponding_insertion_event_id is None: raise SynapseError( 400, - "No insertion event corresponds to the given ?chunk_id", + "No insertion event corresponds to the given ?batch_id", errcode=Codes.INVALID_PARAM, ) pass @@ -328,7 +328,7 @@ class RoomBatchSendEventRestServlet(RestServlet): base_insertion_event_dict = self._create_insertion_event_dict( sender=requester.user.to_string(), room_id=room_id, - origin_server_ts=last_event_in_chunk["origin_server_ts"], + origin_server_ts=last_event_in_batch["origin_server_ts"], ) base_insertion_event_dict["prev_events"] = prev_event_ids.copy() @@ -347,38 +347,38 @@ class RoomBatchSendEventRestServlet(RestServlet): depth=inherited_depth, ) - chunk_id_to_connect_to = base_insertion_event["content"][ - EventContentFields.MSC2716_NEXT_CHUNK_ID + batch_id_to_connect_to = base_insertion_event["content"][ + EventContentFields.MSC2716_NEXT_BATCH_ID ] - # Connect this current chunk to the insertion event from the previous chunk - chunk_event = { - "type": EventTypes.MSC2716_CHUNK, + # Connect this current batch to the insertion event from the previous batch + batch_event = { + "type": EventTypes.MSC2716_BATCH, "sender": requester.user.to_string(), "room_id": room_id, "content": { - EventContentFields.MSC2716_CHUNK_ID: chunk_id_to_connect_to, + EventContentFields.MSC2716_BATCH_ID: batch_id_to_connect_to, EventContentFields.MSC2716_HISTORICAL: True, }, - # Since the chunk event is put at the end of the chunk, + # Since the batch event is put at the end of the batch, # where the newest-in-time event is, copy the origin_server_ts from # the last event we're inserting - "origin_server_ts": last_event_in_chunk["origin_server_ts"], + "origin_server_ts": last_event_in_batch["origin_server_ts"], } - # Add the chunk event to the end of the chunk (newest-in-time) - events_to_create.append(chunk_event) + # Add the batch event to the end of the batch (newest-in-time) + events_to_create.append(batch_event) - # Add an "insertion" event to the start of each chunk (next to the oldest-in-time - # event in the chunk) so the next chunk can be connected to this one. + # Add an "insertion" event to the start of each batch (next to the oldest-in-time + # event in the batch) so the next batch can be connected to this one. insertion_event = self._create_insertion_event_dict( sender=requester.user.to_string(), room_id=room_id, - # Since the insertion event is put at the start of the chunk, + # Since the insertion event is put at the start of the batch, # where the oldest-in-time event is, copy the origin_server_ts from # the first event we're inserting origin_server_ts=events_to_create[0]["origin_server_ts"], ) - # Prepend the insertion event to the start of the chunk (oldest-in-time) + # Prepend the insertion event to the start of the batch (oldest-in-time) events_to_create = [insertion_event] + events_to_create event_ids = [] @@ -439,17 +439,17 @@ class RoomBatchSendEventRestServlet(RestServlet): ) insertion_event_id = event_ids[0] - chunk_event_id = event_ids[-1] + batch_event_id = event_ids[-1] historical_event_ids = event_ids[1:-1] response_dict = { "state_event_ids": state_event_ids_at_start, "event_ids": historical_event_ids, - "next_chunk_id": insertion_event["content"][ - EventContentFields.MSC2716_NEXT_CHUNK_ID + "next_batch_id": insertion_event["content"][ + EventContentFields.MSC2716_NEXT_BATCH_ID ], "insertion_event_id": insertion_event_id, - "chunk_event_id": chunk_event_id, + "batch_event_id": batch_event_id, } if base_insertion_event is not None: response_dict["base_insertion_event_id"] = base_insertion_event.event_id diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 047782eb06..10184d6ae7 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1034,13 +1034,13 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas LIMIT ? """ - # Find any chunk connections of a given insertion event - chunk_connection_query = """ + # Find any batch connections of a given insertion event + batch_connection_query = """ SELECT e.depth, c.event_id FROM insertion_events AS i - /* Find the chunk that connects to the given insertion event */ - INNER JOIN chunk_events AS c - ON i.next_chunk_id = c.chunk_id - /* Get the depth of the chunk start event from the events table */ + /* Find the batch that connects to the given insertion event */ + INNER JOIN batch_events AS c + ON i.next_batch_id = c.batch_id + /* Get the depth of the batch start event from the events table */ INNER JOIN events AS e USING (event_id) /* Find an insertion event which matches the given event_id */ WHERE i.event_id = ? @@ -1077,12 +1077,12 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas event_results.add(event_id) - # Try and find any potential historical chunks of message history. + # Try and find any potential historical batches of message history. # # First we look for an insertion event connected to the current # event (by prev_event). If we find any, we need to go and try to - # find any chunk events connected to the insertion event (by - # chunk_id). If we find any, we'll add them to the queue and + # find any batch events connected to the insertion event (by + # batch_id). If we find any, we'll add them to the queue and # navigate up the DAG like normal in the next iteration of the loop. txn.execute( connected_insertion_event_query, (event_id, limit - len(event_results)) @@ -1097,17 +1097,17 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas connected_insertion_event = row[1] queue.put((-connected_insertion_event_depth, connected_insertion_event)) - # Find any chunk connections for the given insertion event + # Find any batch connections for the given insertion event txn.execute( - chunk_connection_query, + batch_connection_query, (connected_insertion_event, limit - len(event_results)), ) - chunk_start_event_id_results = txn.fetchall() + batch_start_event_id_results = txn.fetchall() logger.debug( - "_get_backfill_events: chunk_start_event_id_results %s", - chunk_start_event_id_results, + "_get_backfill_events: batch_start_event_id_results %s", + batch_start_event_id_results, ) - for row in chunk_start_event_id_results: + for row in batch_start_event_id_results: if row[1] not in event_results: queue.put((-row[0], row[1])) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index dec7e8594e..584f818ff3 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1509,7 +1509,7 @@ class PersistEventsStore: self._handle_event_relations(txn, event) self._handle_insertion_event(txn, event) - self._handle_chunk_event(txn, event) + self._handle_batch_event(txn, event) # Store the labels for this event. labels = event.content.get(EventContentFields.LABELS) @@ -1790,23 +1790,23 @@ class PersistEventsStore: ): return - next_chunk_id = event.content.get(EventContentFields.MSC2716_NEXT_CHUNK_ID) - if next_chunk_id is None: - # Invalid insertion event without next chunk ID + next_batch_id = event.content.get(EventContentFields.MSC2716_NEXT_BATCH_ID) + if next_batch_id is None: + # Invalid insertion event without next batch ID return logger.debug( - "_handle_insertion_event (next_chunk_id=%s) %s", next_chunk_id, event + "_handle_insertion_event (next_batch_id=%s) %s", next_batch_id, event ) - # Keep track of the insertion event and the chunk ID + # Keep track of the insertion event and the batch ID self.db_pool.simple_insert_txn( txn, table="insertion_events", values={ "event_id": event.event_id, "room_id": event.room_id, - "next_chunk_id": next_chunk_id, + "next_batch_id": next_batch_id, }, ) @@ -1822,8 +1822,8 @@ class PersistEventsStore: }, ) - def _handle_chunk_event(self, txn: LoggingTransaction, event: EventBase): - """Handles inserting the chunk edges/connections between the chunk event + def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase): + """Handles inserting the batch edges/connections between the batch event and an insertion event. Part of MSC2716. Args: @@ -1831,11 +1831,11 @@ class PersistEventsStore: event: The event to process """ - if event.type != EventTypes.MSC2716_CHUNK: - # Not a chunk event + if event.type != EventTypes.MSC2716_BATCH: + # Not a batch event return - # Skip processing a chunk event if the room version doesn't + # Skip processing a batch event if the room version doesn't # support it or the event is not from the room creator. room_version = self.store.get_room_version_txn(txn, event.room_id) room_creator = self.db_pool.simple_select_one_onecol_txn( @@ -1852,35 +1852,35 @@ class PersistEventsStore: ): return - chunk_id = event.content.get(EventContentFields.MSC2716_CHUNK_ID) - if chunk_id is None: - # Invalid chunk event without a chunk ID + batch_id = event.content.get(EventContentFields.MSC2716_BATCH_ID) + if batch_id is None: + # Invalid batch event without a batch ID return - logger.debug("_handle_chunk_event chunk_id=%s %s", chunk_id, event) + logger.debug("_handle_batch_event batch_id=%s %s", batch_id, event) - # Keep track of the insertion event and the chunk ID + # Keep track of the insertion event and the batch ID self.db_pool.simple_insert_txn( txn, - table="chunk_events", + table="batch_events", values={ "event_id": event.event_id, "room_id": event.room_id, - "chunk_id": chunk_id, + "batch_id": batch_id, }, ) - # When we receive an event with a `chunk_id` referencing the - # `next_chunk_id` of the insertion event, we can remove it from the + # When we receive an event with a `batch_id` referencing the + # `next_batch_id` of the insertion event, we can remove it from the # `insertion_event_extremities` table. sql = """ DELETE FROM insertion_event_extremities WHERE event_id IN ( SELECT event_id FROM insertion_events - WHERE next_chunk_id = ? + WHERE next_batch_id = ? ) """ - txn.execute(sql, (chunk_id,)) + txn.execute(sql, (batch_id,)) def _handle_redaction(self, txn, redacted_event_id): """Handles receiving a redaction and checking whether we need to remove diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index 54fa361d3e..a383388757 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -18,11 +18,11 @@ from synapse.storage._base import SQLBaseStore class RoomBatchStore(SQLBaseStore): - async def get_insertion_event_by_chunk_id(self, chunk_id: str) -> Optional[str]: + async def get_insertion_event_by_batch_id(self, batch_id: str) -> Optional[str]: """Retrieve a insertion event ID. Args: - chunk_id: The chunk ID of the insertion event to retrieve. + batch_id: The batch ID of the insertion event to retrieve. Returns: The event_id of an insertion event, or None if there is no known @@ -30,7 +30,7 @@ class RoomBatchStore(SQLBaseStore): """ return await self.db_pool.simple_select_one_onecol( table="insertion_events", - keyvalues={"next_chunk_id": chunk_id}, + keyvalues={"next_batch_id": batch_id}, retcol="event_id", allow_none=True, ) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index af9cc69949..aa2ce44c6c 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -14,7 +14,7 @@ # When updating these values, please leave a short summary of the changes below. -SCHEMA_VERSION = 63 +SCHEMA_VERSION = 64 """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the diff --git a/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.postgres b/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.postgres new file mode 100644 index 0000000000..5f38993208 --- /dev/null +++ b/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.postgres @@ -0,0 +1,23 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE insertion_events RENAME COLUMN next_chunk_id TO next_batch_id; +DROP INDEX insertion_events_next_chunk_id; +CREATE INDEX IF NOT EXISTS insertion_events_next_batch_id ON insertion_events(next_batch_id); + +ALTER TABLE chunk_events RENAME TO batch_events; +ALTER TABLE batch_events RENAME COLUMN chunk_id TO batch_id; +DROP INDEX chunk_events_chunk_id; +CREATE INDEX IF NOT EXISTS batch_events_batch_id ON batch_events(batch_id); diff --git a/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.sqlite b/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.sqlite new file mode 100644 index 0000000000..4989563995 --- /dev/null +++ b/synapse/storage/schema/main/delta/64/01msc2716_chunk_to_batch_rename.sql.sqlite @@ -0,0 +1,37 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Re-create the insertion_events table since SQLite doesn't support better +-- renames for columns (next_chunk_id -> next_batch_id) +DROP TABLE insertion_events; +CREATE TABLE IF NOT EXISTS insertion_events( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + next_batch_id TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS insertion_events_event_id ON insertion_events(event_id); +CREATE INDEX IF NOT EXISTS insertion_events_next_batch_id ON insertion_events(next_batch_id); + +-- Re-create the chunk_events table since SQLite doesn't support better renames +-- for columns (chunk_id -> batch_id) +DROP TABLE chunk_events; +CREATE TABLE IF NOT EXISTS batch_events( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + batch_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX IF NOT EXISTS batch_events_event_id ON batch_events(event_id); +CREATE INDEX IF NOT EXISTS batch_events_batch_id ON batch_events(batch_id); -- cgit 1.5.1