From 64ef25391d22795463ebf3c48604f7aee1690fe4 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 11 Nov 2021 08:47:31 -0500 Subject: Add type hints to some storage classes (#11307) --- synapse/storage/databases/main/censor_events.py | 30 +++++++------- synapse/storage/databases/main/deviceinbox.py | 52 +++++++++++++++++-------- synapse/storage/databases/main/filtering.py | 6 ++- synapse/storage/databases/main/lock.py | 6 ++- synapse/storage/databases/main/openid.py | 17 +++++++- synapse/storage/databases/main/tags.py | 27 ++++++++----- 6 files changed, 93 insertions(+), 45 deletions(-) (limited to 'synapse/storage/databases') diff --git a/synapse/storage/databases/main/censor_events.py b/synapse/storage/databases/main/censor_events.py index eee07227ef..0f56e10220 100644 --- a/synapse/storage/databases/main/censor_events.py +++ b/synapse/storage/databases/main/censor_events.py @@ -13,12 +13,12 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from synapse.events.utils import prune_event_dict from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.util import json_encoder @@ -41,7 +41,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase hs.get_clock().looping_call(self._censor_redactions, 5 * 60 * 1000) @wrap_as_background_process("_censor_redactions") - async def _censor_redactions(self): + async def _censor_redactions(self) -> None: """Censors all redactions older than the configured period that haven't been censored yet. @@ -105,7 +105,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase and original_event.internal_metadata.is_redacted() ): # Redaction was allowed - pruned_json = json_encoder.encode( + pruned_json: Optional[str] = json_encoder.encode( prune_event_dict( original_event.room_version, original_event.get_dict() ) @@ -116,7 +116,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase updates.append((redaction_id, event_id, pruned_json)) - def _update_censor_txn(txn): + def _update_censor_txn(txn: LoggingTransaction) -> None: for redaction_id, event_id, pruned_json in updates: if pruned_json: self._censor_event_txn(txn, event_id, pruned_json) @@ -130,14 +130,16 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase await self.db_pool.runInteraction("_update_censor_txn", _update_censor_txn) - def _censor_event_txn(self, txn, event_id, pruned_json): + def _censor_event_txn( + self, txn: LoggingTransaction, event_id: str, pruned_json: str + ) -> None: """Censor an event by replacing its JSON in the event_json table with the provided pruned JSON. Args: - txn (LoggingTransaction): The database transaction. - event_id (str): The ID of the event to censor. - pruned_json (str): The pruned JSON + txn: The database transaction. + event_id: The ID of the event to censor. + pruned_json: The pruned JSON """ self.db_pool.simple_update_one_txn( txn, @@ -157,7 +159,7 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase # Try to retrieve the event's content from the database or the event cache. event = await self.get_event(event_id) - def delete_expired_event_txn(txn): + def delete_expired_event_txn(txn: LoggingTransaction) -> None: # Delete the expiry timestamp associated with this event from the database. self._delete_event_expiry_txn(txn, event_id) @@ -194,14 +196,14 @@ class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBase "delete_expired_event", delete_expired_event_txn ) - def _delete_event_expiry_txn(self, txn, event_id): + def _delete_event_expiry_txn(self, txn: LoggingTransaction, event_id: str) -> None: """Delete the expiry timestamp associated with an event ID without deleting the actual event. Args: - txn (LoggingTransaction): The transaction to use to perform the deletion. - event_id (str): The event ID to delete the associated expiry timestamp of. + txn: The transaction to use to perform the deletion. + event_id: The event ID to delete the associated expiry timestamp of. """ - return self.db_pool.simple_delete_txn( + self.db_pool.simple_delete_txn( txn=txn, table="event_expiry", keyvalues={"event_id": event_id} ) diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index ae3afdd5d2..7c0f953365 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -1,4 +1,5 @@ # Copyright 2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,9 +20,17 @@ from synapse.logging import issue9533_logger from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.replication.tcp.streams import ToDeviceStream from synapse.storage._base import SQLBaseStore, db_to_json -from synapse.storage.database import DatabasePool, LoggingTransaction +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.engines import PostgresEngine -from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator +from synapse.storage.util.id_generators import ( + AbstractStreamIdGenerator, + MultiWriterIdGenerator, + StreamIdGenerator, +) from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.expiringcache import ExpiringCache @@ -34,14 +43,21 @@ logger = logging.getLogger(__name__) class DeviceInboxWorkerStore(SQLBaseStore): - def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) self._instance_name = hs.get_instance_name() # Map of (user_id, device_id) to the last stream_id that has been # deleted up to. This is so that we can no op deletions. - self._last_device_delete_cache = ExpiringCache( + self._last_device_delete_cache: ExpiringCache[ + Tuple[str, Optional[str]], int + ] = ExpiringCache( cache_name="last_device_delete_cache", clock=self._clock, max_len=10000, @@ -53,14 +69,16 @@ class DeviceInboxWorkerStore(SQLBaseStore): self._instance_name in hs.config.worker.writers.to_device ) - self._device_inbox_id_gen = MultiWriterIdGenerator( - db_conn=db_conn, - db=database, - stream_name="to_device", - instance_name=self._instance_name, - tables=[("device_inbox", "instance_name", "stream_id")], - sequence_name="device_inbox_sequence", - writers=hs.config.worker.writers.to_device, + self._device_inbox_id_gen: AbstractStreamIdGenerator = ( + MultiWriterIdGenerator( + db_conn=db_conn, + db=database, + stream_name="to_device", + instance_name=self._instance_name, + tables=[("device_inbox", "instance_name", "stream_id")], + sequence_name="device_inbox_sequence", + writers=hs.config.worker.writers.to_device, + ) ) else: self._can_write_to_device = True @@ -101,6 +119,8 @@ class DeviceInboxWorkerStore(SQLBaseStore): def process_replication_rows(self, stream_name, instance_name, token, rows): if stream_name == ToDeviceStream.NAME: + # If replication is happening than postgres must be being used. + assert isinstance(self._device_inbox_id_gen, MultiWriterIdGenerator) self._device_inbox_id_gen.advance(instance_name, token) for row in rows: if row.entity.startswith("@"): @@ -220,11 +240,11 @@ class DeviceInboxWorkerStore(SQLBaseStore): log_kv({"message": f"deleted {count} messages for device", "count": count}) # Update the cache, ensuring that we only ever increase the value - last_deleted_stream_id = self._last_device_delete_cache.get( + updated_last_deleted_stream_id = self._last_device_delete_cache.get( (user_id, device_id), 0 ) self._last_device_delete_cache[(user_id, device_id)] = max( - last_deleted_stream_id, up_to_stream_id + updated_last_deleted_stream_id, up_to_stream_id ) return count @@ -432,7 +452,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): ) async with self._device_inbox_id_gen.get_next() as stream_id: - now_ms = self.clock.time_msec() + now_ms = self._clock.time_msec() await self.db_pool.runInteraction( "add_messages_to_device_inbox", add_messages_txn, now_ms, stream_id ) @@ -483,7 +503,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): ) async with self._device_inbox_id_gen.get_next() as stream_id: - now_ms = self.clock.time_msec() + now_ms = self._clock.time_msec() await self.db_pool.runInteraction( "add_messages_from_remote_to_device_inbox", add_messages_txn, diff --git a/synapse/storage/databases/main/filtering.py b/synapse/storage/databases/main/filtering.py index 434986fa64..cf842803bc 100644 --- a/synapse/storage/databases/main/filtering.py +++ b/synapse/storage/databases/main/filtering.py @@ -1,4 +1,5 @@ # Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +19,7 @@ from canonicaljson import encode_canonical_json from synapse.api.errors import Codes, SynapseError from synapse.storage._base import SQLBaseStore, db_to_json +from synapse.storage.database import LoggingTransaction from synapse.types import JsonDict from synapse.util.caches.descriptors import cached @@ -49,7 +51,7 @@ class FilteringStore(SQLBaseStore): # Need an atomic transaction to SELECT the maximal ID so far then # INSERT a new one - def _do_txn(txn): + def _do_txn(txn: LoggingTransaction) -> int: sql = ( "SELECT filter_id FROM user_filters " "WHERE user_id = ? AND filter_json = ?" @@ -61,7 +63,7 @@ class FilteringStore(SQLBaseStore): sql = "SELECT MAX(filter_id) FROM user_filters WHERE user_id = ?" txn.execute(sql, (user_localpart,)) - max_id = txn.fetchone()[0] + max_id = txn.fetchone()[0] # type: ignore[index] if max_id is None: filter_id = 0 else: diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py index 3d0df0cbd4..a540f7fb26 100644 --- a/synapse/storage/databases/main/lock.py +++ b/synapse/storage/databases/main/lock.py @@ -13,7 +13,7 @@ # limitations under the License. import logging from types import TracebackType -from typing import TYPE_CHECKING, Dict, Optional, Tuple, Type +from typing import TYPE_CHECKING, Optional, Tuple, Type from weakref import WeakValueDictionary from twisted.internet.interfaces import IReactorCore @@ -62,7 +62,9 @@ class LockStore(SQLBaseStore): # A map from `(lock_name, lock_key)` to the token of any locks that we # think we currently hold. - self._live_tokens: Dict[Tuple[str, str], Lock] = WeakValueDictionary() + self._live_tokens: WeakValueDictionary[ + Tuple[str, str], Lock + ] = WeakValueDictionary() # When we shut down we want to remove the locks. Technically this can # lead to a race, as we may drop the lock while we are still processing. diff --git a/synapse/storage/databases/main/openid.py b/synapse/storage/databases/main/openid.py index 2aac64901b..a46685219f 100644 --- a/synapse/storage/databases/main/openid.py +++ b/synapse/storage/databases/main/openid.py @@ -1,6 +1,21 @@ +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Optional from synapse.storage._base import SQLBaseStore +from synapse.storage.database import LoggingTransaction class OpenIdStore(SQLBaseStore): @@ -20,7 +35,7 @@ class OpenIdStore(SQLBaseStore): async def get_user_id_for_open_id_token( self, token: str, ts_now_ms: int ) -> Optional[str]: - def get_user_id_for_token_txn(txn): + def get_user_id_for_token_txn(txn: LoggingTransaction) -> Optional[str]: sql = ( "SELECT user_id FROM open_id_tokens" " WHERE token = ? AND ? <= ts_valid_until_ms" diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py index f93ff0a545..8f510de53d 100644 --- a/synapse/storage/databases/main/tags.py +++ b/synapse/storage/databases/main/tags.py @@ -1,5 +1,6 @@ # Copyright 2014-2016 OpenMarket Ltd # Copyright 2018 New Vector Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,9 +15,10 @@ # limitations under the License. import logging -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, cast from synapse.storage._base import db_to_json +from synapse.storage.database import LoggingTransaction from synapse.storage.databases.main.account_data import AccountDataWorkerStore from synapse.types import JsonDict from synapse.util import json_encoder @@ -50,7 +52,7 @@ class TagsWorkerStore(AccountDataWorkerStore): async def get_all_updated_tags( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> Tuple[List[Tuple[int, tuple]], int, bool]: + ) -> Tuple[List[Tuple[int, Tuple[str, str, str]]], int, bool]: """Get updates for tags replication stream. Args: @@ -75,7 +77,9 @@ class TagsWorkerStore(AccountDataWorkerStore): if last_id == current_id: return [], current_id, False - def get_all_updated_tags_txn(txn): + def get_all_updated_tags_txn( + txn: LoggingTransaction, + ) -> List[Tuple[int, str, str]]: sql = ( "SELECT stream_id, user_id, room_id" " FROM room_tags_revisions as r" @@ -83,13 +87,16 @@ class TagsWorkerStore(AccountDataWorkerStore): " ORDER BY stream_id ASC LIMIT ?" ) txn.execute(sql, (last_id, current_id, limit)) - return txn.fetchall() + # mypy doesn't understand what the query is selecting. + return cast(List[Tuple[int, str, str]], txn.fetchall()) tag_ids = await self.db_pool.runInteraction( "get_all_updated_tags", get_all_updated_tags_txn ) - def get_tag_content(txn, tag_ids): + def get_tag_content( + txn: LoggingTransaction, tag_ids + ) -> List[Tuple[int, Tuple[str, str, str]]]: sql = "SELECT tag, content FROM room_tags WHERE user_id=? AND room_id=?" results = [] for stream_id, user_id, room_id in tag_ids: @@ -127,15 +134,15 @@ class TagsWorkerStore(AccountDataWorkerStore): given version Args: - user_id(str): The user to get the tags for. - stream_id(int): The earliest update to get for the user. + user_id: The user to get the tags for. + stream_id: The earliest update to get for the user. Returns: A mapping from room_id strings to lists of tag strings for all the rooms that changed since the stream_id token. """ - def get_updated_tags_txn(txn): + def get_updated_tags_txn(txn: LoggingTransaction) -> List[str]: sql = ( "SELECT room_id from room_tags_revisions" " WHERE user_id = ? AND stream_id > ?" @@ -200,7 +207,7 @@ class TagsWorkerStore(AccountDataWorkerStore): content_json = json_encoder.encode(content) - def add_tag_txn(txn, next_id): + def add_tag_txn(txn: LoggingTransaction, next_id: int) -> None: self.db_pool.simple_upsert_txn( txn, table="room_tags", @@ -224,7 +231,7 @@ class TagsWorkerStore(AccountDataWorkerStore): """ assert self._can_write_to_account_data - def remove_tag_txn(txn, next_id): + def remove_tag_txn(txn: LoggingTransaction, next_id: int) -> None: sql = ( "DELETE FROM room_tags " " WHERE user_id = ? AND room_id = ? AND tag = ?" -- cgit 1.5.1 From 8dc666f785c653e958728a74758bdb8afb494de4 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 11 Nov 2021 16:49:28 +0000 Subject: Correct type hint for room_batch.py (#11310) Co-authored-by: Patrick Cloke --- changelog.d/11310.misc | 1 + mypy.ini | 4 +++- synapse/storage/databases/main/room_batch.py | 18 ++++++++---------- 3 files changed, 12 insertions(+), 11 deletions(-) create mode 100644 changelog.d/11310.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11310.misc b/changelog.d/11310.misc new file mode 100644 index 0000000000..e5c12445d5 --- /dev/null +++ b/changelog.d/11310.misc @@ -0,0 +1 @@ +Add type hints to storage classes. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index a06c7fc66b..48dfdfa0e0 100644 --- a/mypy.ini +++ b/mypy.ini @@ -46,7 +46,6 @@ exclude = (?x) |synapse/storage/databases/main/push_rule.py |synapse/storage/databases/main/receipts.py |synapse/storage/databases/main/room.py - |synapse/storage/databases/main/room_batch.py |synapse/storage/databases/main/roommember.py |synapse/storage/databases/main/search.py |synapse/storage/databases/main/signatures.py @@ -183,6 +182,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.client_ips] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.room_batch] +disallow_untyped_defs = True + [mypy-synapse.storage.util.*] disallow_untyped_defs = True diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index 97b2618437..39e80f6f5b 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -39,13 +39,11 @@ class RoomBatchStore(SQLBaseStore): async def store_state_group_id_for_event_id( self, event_id: str, state_group_id: int - ) -> Optional[str]: - { - await self.db_pool.simple_upsert( - table="event_to_state_groups", - keyvalues={"event_id": event_id}, - values={"state_group": state_group_id, "event_id": event_id}, - # Unique constraint on event_id so we don't have to lock - lock=False, - ) - } + ) -> None: + await self.db_pool.simple_upsert( + table="event_to_state_groups", + keyvalues={"event_id": event_id}, + values={"state_group": state_group_id, "event_id": event_id}, + # Unique constraint on event_id so we don't have to lock + lock=False, + ) -- cgit 1.5.1 From 6a605f4a77bebbbffa2ac812fbff4fe5f252d88e Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 11 Nov 2021 17:04:44 +0000 Subject: Get db signatures file to pass mypy (#11312) --- changelog.d/11312.misc | 1 + mypy.ini | 1 - synapse/events/builder.py | 12 +++++------- synapse/storage/databases/main/signatures.py | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) create mode 100644 changelog.d/11312.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11312.misc b/changelog.d/11312.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11312.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index 48dfdfa0e0..3b7e1eb708 100644 --- a/mypy.ini +++ b/mypy.ini @@ -48,7 +48,6 @@ exclude = (?x) |synapse/storage/databases/main/room.py |synapse/storage/databases/main/roommember.py |synapse/storage/databases/main/search.py - |synapse/storage/databases/main/signatures.py |synapse/storage/databases/main/state.py |synapse/storage/databases/main/state_deltas.py |synapse/storage/databases/main/stats.py diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 4f409f31e1..eb39e0ae32 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -128,14 +128,12 @@ class EventBuilder: ) format_version = self.room_version.event_format + # The types of auth/prev events changes between event versions. + prev_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] + auth_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] if format_version == EventFormatVersions.V1: - # The types of auth/prev events changes between event versions. - auth_events: Union[ - List[str], List[Tuple[str, Dict[str, str]]] - ] = await self._store.add_event_hashes(auth_event_ids) - prev_events: Union[ - List[str], List[Tuple[str, Dict[str, str]]] - ] = await self._store.add_event_hashes(prev_event_ids) + auth_events = await self._store.add_event_hashes(auth_event_ids) + prev_events = await self._store.add_event_hashes(prev_event_ids) else: auth_events = auth_event_ids prev_events = prev_event_ids diff --git a/synapse/storage/databases/main/signatures.py b/synapse/storage/databases/main/signatures.py index ab2159c2d3..3201623fe4 100644 --- a/synapse/storage/databases/main/signatures.py +++ b/synapse/storage/databases/main/signatures.py @@ -63,12 +63,12 @@ class SignatureWorkerStore(SQLBaseStore): A list of tuples of event ID and a mapping of algorithm to base-64 encoded hash. """ hashes = await self.get_event_reference_hashes(event_ids) - hashes = { + encoded_hashes = { e_id: {k: encode_base64(v) for k, v in h.items() if k == "sha256"} for e_id, h in hashes.items() } - return list(hashes.items()) + return list(encoded_hashes.items()) def _get_event_reference_hashes_txn( self, txn: Cursor, event_id: str -- cgit 1.5.1 From c99da2d0794de7240e791eba1a2f2d703fdecb60 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 11 Nov 2021 19:22:19 +0000 Subject: Annotations for user_erasure_store (#11313) I'm not sure why this was excluded---it seemed to be passing for me. But it's easy enough to fixup. --- changelog.d/11313.misc | 1 + mypy.ini | 4 +++- synapse/storage/databases/main/user_erasure_store.py | 9 +++++---- 3 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 changelog.d/11313.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11313.misc b/changelog.d/11313.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11313.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index 3b7e1eb708..f0af4ab289 100644 --- a/mypy.ini +++ b/mypy.ini @@ -53,7 +53,6 @@ exclude = (?x) |synapse/storage/databases/main/stats.py |synapse/storage/databases/main/transactions.py |synapse/storage/databases/main/user_directory.py - |synapse/storage/databases/main/user_erasure_store.py |synapse/storage/schema/ |tests/api/test_auth.py @@ -184,6 +183,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.room_batch] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.user_erasure_store] +disallow_untyped_defs = True + [mypy-synapse.storage.util.*] disallow_untyped_defs = True diff --git a/synapse/storage/databases/main/user_erasure_store.py b/synapse/storage/databases/main/user_erasure_store.py index 1ecdd40c38..f79006533f 100644 --- a/synapse/storage/databases/main/user_erasure_store.py +++ b/synapse/storage/databases/main/user_erasure_store.py @@ -14,11 +14,12 @@ from typing import Dict, Iterable -from synapse.storage._base import SQLBaseStore +from synapse.storage.database import LoggingTransaction +from synapse.storage.databases.main import CacheInvalidationWorkerStore from synapse.util.caches.descriptors import cached, cachedList -class UserErasureWorkerStore(SQLBaseStore): +class UserErasureWorkerStore(CacheInvalidationWorkerStore): @cached() async def is_user_erased(self, user_id: str) -> bool: """ @@ -69,7 +70,7 @@ class UserErasureStore(UserErasureWorkerStore): user_id: full user_id to be erased """ - def f(txn): + def f(txn: LoggingTransaction) -> None: # first check if they are already in the list txn.execute("SELECT 1 FROM erased_users WHERE user_id = ?", (user_id,)) if txn.fetchone(): @@ -89,7 +90,7 @@ class UserErasureStore(UserErasureWorkerStore): user_id: full user_id to be un-erased """ - def f(txn): + def f(txn: LoggingTransaction) -> None: # first check if they are already in the list txn.execute("SELECT 1 FROM erased_users WHERE user_id = ?", (user_id,)) if not txn.fetchone(): -- cgit 1.5.1 From 6f8f3d4bc5283abdbc52f7376975d7f3bacfe159 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 12 Nov 2021 15:58:17 +0000 Subject: Attempt to annotate events_forward_extremities (#11314) * Make DataStore inherit from EventForwardExtremitiesStore before CacheInvalidationWorkerStore the former implicitly inherits from the latter, so they should be ordered like this when used. --- changelog.d/11314.misc | 1 + mypy.ini | 1 - synapse/storage/databases/main/__init__.py | 2 +- .../databases/main/events_forward_extremities.py | 21 +++++++++++++++------ 4 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 changelog.d/11314.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11314.misc b/changelog.d/11314.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11314.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index 60672ea5c8..d81e964dc7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -34,7 +34,6 @@ exclude = (?x) |synapse/storage/databases/main/event_federation.py |synapse/storage/databases/main/event_push_actions.py |synapse/storage/databases/main/events_bg_updates.py - |synapse/storage/databases/main/events_forward_extremities.py |synapse/storage/databases/main/events_worker.py |synapse/storage/databases/main/group_server.py |synapse/storage/databases/main/media_repository.py diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 259cae5b37..e22aa0b9bc 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -123,9 +123,9 @@ class DataStore( RelationsStore, CensorEventsStore, UIAuthStore, + EventForwardExtremitiesStore, CacheInvalidationWorkerStore, ServerMetricsStore, - EventForwardExtremitiesStore, LockStore, SessionStore, ): diff --git a/synapse/storage/databases/main/events_forward_extremities.py b/synapse/storage/databases/main/events_forward_extremities.py index 6d2688d711..68901b4335 100644 --- a/synapse/storage/databases/main/events_forward_extremities.py +++ b/synapse/storage/databases/main/events_forward_extremities.py @@ -13,15 +13,20 @@ # limitations under the License. import logging -from typing import Dict, List +from typing import Any, Dict, List from synapse.api.errors import SynapseError -from synapse.storage._base import SQLBaseStore +from synapse.storage.database import LoggingTransaction +from synapse.storage.databases.main import CacheInvalidationWorkerStore +from synapse.storage.databases.main.event_federation import EventFederationWorkerStore logger = logging.getLogger(__name__) -class EventForwardExtremitiesStore(SQLBaseStore): +class EventForwardExtremitiesStore( + EventFederationWorkerStore, + CacheInvalidationWorkerStore, +): async def delete_forward_extremities_for_room(self, room_id: str) -> int: """Delete any extra forward extremities for a room. @@ -31,7 +36,7 @@ class EventForwardExtremitiesStore(SQLBaseStore): Returns count deleted. """ - def delete_forward_extremities_for_room_txn(txn): + def delete_forward_extremities_for_room_txn(txn: LoggingTransaction) -> int: # First we need to get the event_id to not delete sql = """ SELECT event_id FROM event_forward_extremities @@ -82,10 +87,14 @@ class EventForwardExtremitiesStore(SQLBaseStore): delete_forward_extremities_for_room_txn, ) - async def get_forward_extremities_for_room(self, room_id: str) -> List[Dict]: + async def get_forward_extremities_for_room( + self, room_id: str + ) -> List[Dict[str, Any]]: """Get list of forward extremities for a room.""" - def get_forward_extremities_for_room_txn(txn): + def get_forward_extremities_for_room_txn( + txn: LoggingTransaction, + ) -> List[Dict[str, Any]]: sql = """ SELECT event_id, state_group, depth, received_ts FROM event_forward_extremities -- cgit 1.5.1 From 9b90b9454b8855e0575785560662d8e47378094d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 12 Nov 2021 11:05:26 -0500 Subject: Add type hints to media repository storage module (#11311) --- changelog.d/11311.misc | 1 + mypy.ini | 1 - synapse/rest/media/v1/preview_url_resource.py | 8 +- synapse/storage/databases/main/media_repository.py | 141 ++++++++++++--------- 4 files changed, 89 insertions(+), 62 deletions(-) create mode 100644 changelog.d/11311.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11311.misc b/changelog.d/11311.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11311.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index d81e964dc7..56a62bb9b7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -36,7 +36,6 @@ exclude = (?x) |synapse/storage/databases/main/events_bg_updates.py |synapse/storage/databases/main/events_worker.py |synapse/storage/databases/main/group_server.py - |synapse/storage/databases/main/media_repository.py |synapse/storage/databases/main/metrics.py |synapse/storage/databases/main/monthly_active_users.py |synapse/storage/databases/main/presence.py diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 8ca97b5b18..054f3c296d 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -45,7 +45,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.media.v1._base import get_filename_from_headers from synapse.rest.media.v1.media_storage import MediaStorage from synapse.rest.media.v1.oembed import OEmbedProvider -from synapse.types import JsonDict +from synapse.types import JsonDict, UserID from synapse.util import json_encoder from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.expiringcache import ExpiringCache @@ -231,7 +231,7 @@ class PreviewUrlResource(DirectServeJsonResource): og = await make_deferred_yieldable(observable.observe()) respond_with_json_bytes(request, 200, og, send_cors=True) - async def _do_preview(self, url: str, user: str, ts: int) -> bytes: + async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes: """Check the db, and download the URL and build a preview Args: @@ -360,7 +360,7 @@ class PreviewUrlResource(DirectServeJsonResource): return jsonog.encode("utf8") - async def _download_url(self, url: str, user: str) -> MediaInfo: + async def _download_url(self, url: str, user: UserID) -> MediaInfo: # TODO: we should probably honour robots.txt... except in practice # we're most likely being explicitly triggered by a human rather than a # bot, so are we really a robot? @@ -450,7 +450,7 @@ class PreviewUrlResource(DirectServeJsonResource): ) async def _precache_image_url( - self, user: str, media_info: MediaInfo, og: JsonDict + self, user: UserID, media_info: MediaInfo, og: JsonDict ) -> None: """ Pre-cache the image (if one exists) for posterity diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index 717487be28..1b076683f7 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -13,10 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. from enum import Enum -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, +) from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) +from synapse.types import JsonDict, UserID if TYPE_CHECKING: from synapse.server import HomeServer @@ -46,7 +61,12 @@ class MediaSortOrder(Enum): class MediaRepositoryBackgroundUpdateStore(SQLBaseStore): - def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) self.db_pool.updates.register_background_index_update( @@ -102,13 +122,15 @@ class MediaRepositoryBackgroundUpdateStore(SQLBaseStore): self._drop_media_index_without_method, ) - async def _drop_media_index_without_method(self, progress, batch_size): + async def _drop_media_index_without_method( + self, progress: JsonDict, batch_size: int + ) -> int: """background update handler which removes the old constraints. Note that this is only run on postgres. """ - def f(txn): + def f(txn: LoggingTransaction) -> None: txn.execute( "ALTER TABLE local_media_repository_thumbnails DROP CONSTRAINT IF EXISTS local_media_repository_thumbn_media_id_thumbnail_width_thum_key" ) @@ -126,7 +148,12 @@ class MediaRepositoryBackgroundUpdateStore(SQLBaseStore): class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): """Persistence for attachments and avatars""" - def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) self.server_name = hs.hostname @@ -174,7 +201,9 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): plus the total count of all the user's media """ - def get_local_media_by_user_paginate_txn(txn): + def get_local_media_by_user_paginate_txn( + txn: LoggingTransaction, + ) -> Tuple[List[Dict[str, Any]], int]: # Set ordering order_by_column = MediaSortOrder(order_by).value @@ -184,14 +213,14 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): else: order = "ASC" - args = [user_id] + args: List[Union[str, int]] = [user_id] sql = """ SELECT COUNT(*) as total_media FROM local_media_repository WHERE user_id = ? """ txn.execute(sql, args) - count = txn.fetchone()[0] + count = txn.fetchone()[0] # type: ignore[index] sql = """ SELECT @@ -268,7 +297,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): ) sql += sql_keep - def _get_local_media_before_txn(txn): + def _get_local_media_before_txn(txn: LoggingTransaction) -> List[str]: txn.execute(sql, (before_ts, before_ts, size_gt)) return [row[0] for row in txn] @@ -278,13 +307,13 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): async def store_local_media( self, - media_id, - media_type, - time_now_ms, - upload_name, - media_length, - user_id, - url_cache=None, + media_id: str, + media_type: str, + time_now_ms: int, + upload_name: Optional[str], + media_length: int, + user_id: UserID, + url_cache: Optional[str] = None, ) -> None: await self.db_pool.simple_insert( "local_media_repository", @@ -315,7 +344,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): None if the URL isn't cached. """ - def get_url_cache_txn(txn): + def get_url_cache_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]: # get the most recently cached result (relative to the given ts) sql = ( "SELECT response_code, etag, expires_ts, og, media_id, download_ts" @@ -359,7 +388,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): async def store_url_cache( self, url, response_code, etag, expires_ts, og, media_id, download_ts - ): + ) -> None: await self.db_pool.simple_insert( "local_media_repository_url_cache", { @@ -390,13 +419,13 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): async def store_local_thumbnail( self, - media_id, - thumbnail_width, - thumbnail_height, - thumbnail_type, - thumbnail_method, - thumbnail_length, - ): + media_id: str, + thumbnail_width: int, + thumbnail_height: int, + thumbnail_type: str, + thumbnail_method: str, + thumbnail_length: int, + ) -> None: await self.db_pool.simple_upsert( table="local_media_repository_thumbnails", keyvalues={ @@ -430,14 +459,14 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): async def store_cached_remote_media( self, - origin, - media_id, - media_type, - media_length, - time_now_ms, - upload_name, - filesystem_id, - ): + origin: str, + media_id: str, + media_type: str, + media_length: int, + time_now_ms: int, + upload_name: Optional[str], + filesystem_id: str, + ) -> None: await self.db_pool.simple_insert( "remote_media_cache", { @@ -458,7 +487,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): local_media: Iterable[str], remote_media: Iterable[Tuple[str, str]], time_ms: int, - ): + ) -> None: """Updates the last access time of the given media Args: @@ -467,7 +496,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): time_ms: Current time in milliseconds """ - def update_cache_txn(txn): + def update_cache_txn(txn: LoggingTransaction) -> None: sql = ( "UPDATE remote_media_cache SET last_access_ts = ?" " WHERE media_origin = ? AND media_id = ?" @@ -488,7 +517,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): txn.execute_batch(sql, ((time_ms, media_id) for media_id in local_media)) - return await self.db_pool.runInteraction( + await self.db_pool.runInteraction( "update_cached_last_access_time", update_cache_txn ) @@ -542,15 +571,15 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): async def store_remote_media_thumbnail( self, - origin, - media_id, - filesystem_id, - thumbnail_width, - thumbnail_height, - thumbnail_type, - thumbnail_method, - thumbnail_length, - ): + origin: str, + media_id: str, + filesystem_id: str, + thumbnail_width: int, + thumbnail_height: int, + thumbnail_type: str, + thumbnail_method: str, + thumbnail_length: int, + ) -> None: await self.db_pool.simple_upsert( table="remote_media_cache_thumbnails", keyvalues={ @@ -566,7 +595,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): desc="store_remote_media_thumbnail", ) - async def get_remote_media_before(self, before_ts): + async def get_remote_media_before(self, before_ts: int) -> List[Dict[str, str]]: sql = ( "SELECT media_origin, media_id, filesystem_id" " FROM remote_media_cache" @@ -602,7 +631,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): " LIMIT 500" ) - def _get_expired_url_cache_txn(txn): + def _get_expired_url_cache_txn(txn: LoggingTransaction) -> List[str]: txn.execute(sql, (now_ts,)) return [row[0] for row in txn] @@ -610,18 +639,16 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): "get_expired_url_cache", _get_expired_url_cache_txn ) - async def delete_url_cache(self, media_ids): + async def delete_url_cache(self, media_ids: Collection[str]) -> None: if len(media_ids) == 0: return sql = "DELETE FROM local_media_repository_url_cache WHERE media_id = ?" - def _delete_url_cache_txn(txn): + def _delete_url_cache_txn(txn: LoggingTransaction) -> None: txn.execute_batch(sql, [(media_id,) for media_id in media_ids]) - return await self.db_pool.runInteraction( - "delete_url_cache", _delete_url_cache_txn - ) + await self.db_pool.runInteraction("delete_url_cache", _delete_url_cache_txn) async def get_url_cache_media_before(self, before_ts: int) -> List[str]: sql = ( @@ -631,7 +658,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): " LIMIT 500" ) - def _get_url_cache_media_before_txn(txn): + def _get_url_cache_media_before_txn(txn: LoggingTransaction) -> List[str]: txn.execute(sql, (before_ts,)) return [row[0] for row in txn] @@ -639,11 +666,11 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): "get_url_cache_media_before", _get_url_cache_media_before_txn ) - async def delete_url_cache_media(self, media_ids): + async def delete_url_cache_media(self, media_ids: Collection[str]) -> None: if len(media_ids) == 0: return - def _delete_url_cache_media_txn(txn): + def _delete_url_cache_media_txn(txn: LoggingTransaction) -> None: sql = "DELETE FROM local_media_repository WHERE media_id = ?" txn.execute_batch(sql, [(media_id,) for media_id in media_ids]) @@ -652,6 +679,6 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): txn.execute_batch(sql, [(media_id,) for media_id in media_ids]) - return await self.db_pool.runInteraction( + await self.db_pool.runInteraction( "delete_url_cache_media", _delete_url_cache_media_txn ) -- cgit 1.5.1 From 0bcae8ad56a64da72f278b4ec425d89c068b5df0 Mon Sep 17 00:00:00 2001 From: Shay Date: Fri, 12 Nov 2021 10:38:24 -0800 Subject: Change display names/avatar URLs to None if they contain null bytes before storing in DB (#11230) * change display names/avatar URLS to None if they contain null bytes * add changelog * add POC test, requested changes * add a saner test and remove old one * update test to verify that display name has been changed to None * make test less fragile --- changelog.d/11230.bugfix | 2 ++ synapse/storage/databases/main/events.py | 10 ++++--- tests/storage/test_roommember.py | 48 ++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 changelog.d/11230.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/11230.bugfix b/changelog.d/11230.bugfix new file mode 100644 index 0000000000..b2d6d4d024 --- /dev/null +++ b/changelog.d/11230.bugfix @@ -0,0 +1,2 @@ +Fix a long-standing bug wherein display names or avatar URLs containing null bytes cause an internal server error +when stored in the DB. \ No newline at end of file diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 596275c23c..120e4807d1 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1641,8 +1641,8 @@ class PersistEventsStore: def _store_room_members_txn(self, txn, events, backfilled): """Store a room member in the database.""" - def str_or_none(val: Any) -> Optional[str]: - return val if isinstance(val, str) else None + def non_null_str_or_none(val: Any) -> Optional[str]: + return val if isinstance(val, str) and "\u0000" not in val else None self.db_pool.simple_insert_many_txn( txn, @@ -1654,8 +1654,10 @@ class PersistEventsStore: "sender": event.user_id, "room_id": event.room_id, "membership": event.membership, - "display_name": str_or_none(event.content.get("displayname")), - "avatar_url": str_or_none(event.content.get("avatar_url")), + "display_name": non_null_str_or_none( + event.content.get("displayname") + ), + "avatar_url": non_null_str_or_none(event.content.get("avatar_url")), } for event in events ], diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 2873e22ccf..fccab733c0 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -161,6 +161,54 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): ) self.assertEqual(users.keys(), {self.u_alice, self.u_bob}) + def test__null_byte_in_display_name_properly_handled(self): + room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + + res = self.get_success( + self.store.db_pool.simple_select_list( + "room_memberships", + {"user_id": "@alice:test"}, + ["display_name", "event_id"], + ) + ) + # Check that we only got one result back + self.assertEqual(len(res), 1) + + # Check that alice's display name is "alice" + self.assertEqual(res[0]["display_name"], "alice") + + # Grab the event_id to use later + event_id = res[0]["event_id"] + + # Create a profile with the offending null byte in the display name + new_profile = {"displayname": "ali\u0000ce"} + + # Ensure that the change goes smoothly and does not fail due to the null byte + self.helper.change_membership( + room, + self.u_alice, + self.u_alice, + "join", + extra_data=new_profile, + tok=self.t_alice, + ) + + res2 = self.get_success( + self.store.db_pool.simple_select_list( + "room_memberships", + {"user_id": "@alice:test"}, + ["display_name", "event_id"], + ) + ) + # Check that we only have two results + self.assertEqual(len(res2), 2) + + # Filter out the previous event using the event_id we grabbed above + row = [row for row in res2 if row["event_id"] != event_id] + + # Check that alice's display name is now None + self.assertEqual(row[0]["display_name"], None) + class CurrentStateMembershipUpdateTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, homeserver): -- cgit 1.5.1 From bea815cec82096efc15e75875d3b8372fcb4f28b Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 12 Nov 2021 19:56:00 +0000 Subject: Test room alias deletion (#11327) * Prefer `HTTPStatus` over plain `int` This is an Opinion that no-one has seemed to object to yet. * `--disallow-untyped-defs` for `tests.rest.client.test_directory` * Improve synapse's annotations for deleting aliases * Test case for deleting a room alias * Changelog --- changelog.d/11327.misc | 1 + mypy.ini | 3 + synapse/handlers/directory.py | 6 +- synapse/storage/databases/main/directory.py | 7 +- tests/rest/client/test_directory.py | 105 ++++++++++++++++++++-------- 5 files changed, 91 insertions(+), 31 deletions(-) create mode 100644 changelog.d/11327.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11327.misc b/changelog.d/11327.misc new file mode 100644 index 0000000000..389e360457 --- /dev/null +++ b/changelog.d/11327.misc @@ -0,0 +1 @@ +Test that room alias deletion works as intended. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 56a62bb9b7..67dde5991b 100644 --- a/mypy.ini +++ b/mypy.ini @@ -271,6 +271,9 @@ disallow_untyped_defs = True [mypy-tests] disallow_untyped_defs = True +[mypy-tests.rest.client.test_directory] +disallow_untyped_defs = True + ;; Dependencies without annotations ;; Before ignoring a module, check to see if type stubs are available. ;; The `typeshed` project maintains stubs here: diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8ca5f60b1c..7ee5c47fd9 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -204,6 +204,10 @@ class DirectoryHandler: ) room_id = await self._delete_association(room_alias) + if room_id is None: + # It's possible someone else deleted the association after the + # checks above, but before we did the deletion. + raise NotFoundError("Unknown room alias") try: await self._update_canonical_alias(requester, user_id, room_id, room_alias) @@ -225,7 +229,7 @@ class DirectoryHandler: ) await self._delete_association(room_alias) - async def _delete_association(self, room_alias: RoomAlias) -> str: + async def _delete_association(self, room_alias: RoomAlias) -> Optional[str]: if not self.hs.is_mine(room_alias): raise SynapseError(400, "Room alias must be local") diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py index 6daf8b8ffb..25131b1ea9 100644 --- a/synapse/storage/databases/main/directory.py +++ b/synapse/storage/databases/main/directory.py @@ -17,6 +17,7 @@ from typing import Iterable, List, Optional from synapse.api.errors import SynapseError from synapse.storage._base import SQLBaseStore +from synapse.storage.database import LoggingTransaction from synapse.types import RoomAlias from synapse.util.caches.descriptors import cached @@ -126,14 +127,16 @@ class DirectoryWorkerStore(SQLBaseStore): class DirectoryStore(DirectoryWorkerStore): - async def delete_room_alias(self, room_alias: RoomAlias) -> str: + async def delete_room_alias(self, room_alias: RoomAlias) -> Optional[str]: room_id = await self.db_pool.runInteraction( "delete_room_alias", self._delete_room_alias_txn, room_alias ) return room_id - def _delete_room_alias_txn(self, txn, room_alias: RoomAlias) -> str: + def _delete_room_alias_txn( + self, txn: LoggingTransaction, room_alias: RoomAlias + ) -> Optional[str]: txn.execute( "SELECT room_id FROM room_aliases WHERE room_alias = ?", (room_alias.to_string(),), diff --git a/tests/rest/client/test_directory.py b/tests/rest/client/test_directory.py index d2181ea907..aca03afd0e 100644 --- a/tests/rest/client/test_directory.py +++ b/tests/rest/client/test_directory.py @@ -11,12 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import json +from http import HTTPStatus + +from twisted.test.proto_helpers import MemoryReactor from synapse.rest import admin from synapse.rest.client import directory, login, room +from synapse.server import HomeServer from synapse.types import RoomAlias +from synapse.util import Clock from synapse.util.stringutils import random_string from tests import unittest @@ -32,7 +36,7 @@ class DirectoryTestCase(unittest.HomeserverTestCase): room.register_servlets, ] - def make_homeserver(self, reactor, clock): + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: config = self.default_config() config["require_membership_for_aliases"] = True @@ -40,7 +44,11 @@ class DirectoryTestCase(unittest.HomeserverTestCase): return self.hs - def prepare(self, reactor, clock, homeserver): + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + """Create two local users and access tokens for them. + One of them creates a room.""" self.room_owner = self.register_user("room_owner", "test") self.room_owner_tok = self.login("room_owner", "test") @@ -51,39 +59,39 @@ class DirectoryTestCase(unittest.HomeserverTestCase): self.user = self.register_user("user", "test") self.user_tok = self.login("user", "test") - def test_state_event_not_in_room(self): + def test_state_event_not_in_room(self) -> None: self.ensure_user_left_room() - self.set_alias_via_state_event(403) + self.set_alias_via_state_event(HTTPStatus.FORBIDDEN) - def test_directory_endpoint_not_in_room(self): + def test_directory_endpoint_not_in_room(self) -> None: self.ensure_user_left_room() - self.set_alias_via_directory(403) + self.set_alias_via_directory(HTTPStatus.FORBIDDEN) - def test_state_event_in_room_too_long(self): + def test_state_event_in_room_too_long(self) -> None: self.ensure_user_joined_room() - self.set_alias_via_state_event(400, alias_length=256) + self.set_alias_via_state_event(HTTPStatus.BAD_REQUEST, alias_length=256) - def test_directory_in_room_too_long(self): + def test_directory_in_room_too_long(self) -> None: self.ensure_user_joined_room() - self.set_alias_via_directory(400, alias_length=256) + self.set_alias_via_directory(HTTPStatus.BAD_REQUEST, alias_length=256) @override_config({"default_room_version": 5}) - def test_state_event_user_in_v5_room(self): + def test_state_event_user_in_v5_room(self) -> None: """Test that a regular user can add alias events before room v6""" self.ensure_user_joined_room() - self.set_alias_via_state_event(200) + self.set_alias_via_state_event(HTTPStatus.OK) @override_config({"default_room_version": 6}) - def test_state_event_v6_room(self): + def test_state_event_v6_room(self) -> None: """Test that a regular user can *not* add alias events from room v6""" self.ensure_user_joined_room() - self.set_alias_via_state_event(403) + self.set_alias_via_state_event(HTTPStatus.FORBIDDEN) - def test_directory_in_room(self): + def test_directory_in_room(self) -> None: self.ensure_user_joined_room() - self.set_alias_via_directory(200) + self.set_alias_via_directory(HTTPStatus.OK) - def test_room_creation_too_long(self): + def test_room_creation_too_long(self) -> None: url = "/_matrix/client/r0/createRoom" # We use deliberately a localpart under the length threshold so @@ -93,9 +101,9 @@ class DirectoryTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", url, request_data, access_token=self.user_tok ) - self.assertEqual(channel.code, 400, channel.result) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST, channel.result) - def test_room_creation(self): + def test_room_creation(self) -> None: url = "/_matrix/client/r0/createRoom" # Check with an alias of allowed length. There should already be @@ -106,9 +114,46 @@ class DirectoryTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", url, request_data, access_token=self.user_tok ) - self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + + def test_deleting_alias_via_directory(self) -> None: + # Add an alias for the room. We must be joined to do so. + self.ensure_user_joined_room() + alias = self.set_alias_via_directory(HTTPStatus.OK) + + # Then try to remove the alias + channel = self.make_request( + "DELETE", + f"/_matrix/client/r0/directory/room/{alias}", + access_token=self.user_tok, + ) + self.assertEqual(channel.code, HTTPStatus.OK, channel.result) + + def test_deleting_nonexistant_alias(self) -> None: + # Check that no alias exists + alias = "#potato:test" + channel = self.make_request( + "GET", + f"/_matrix/client/r0/directory/room/{alias}", + access_token=self.user_tok, + ) + self.assertEqual(channel.code, HTTPStatus.NOT_FOUND, channel.result) + self.assertIn("error", channel.json_body, channel.json_body) + self.assertEqual(channel.json_body["errcode"], "M_NOT_FOUND", channel.json_body) + + # Then try to remove the alias + channel = self.make_request( + "DELETE", + f"/_matrix/client/r0/directory/room/{alias}", + access_token=self.user_tok, + ) + self.assertEqual(channel.code, HTTPStatus.NOT_FOUND, channel.result) + self.assertIn("error", channel.json_body, channel.json_body) + self.assertEqual(channel.json_body["errcode"], "M_NOT_FOUND", channel.json_body) - def set_alias_via_state_event(self, expected_code, alias_length=5): + def set_alias_via_state_event( + self, expected_code: HTTPStatus, alias_length: int = 5 + ) -> None: url = "/_matrix/client/r0/rooms/%s/state/m.room.aliases/%s" % ( self.room_id, self.hs.hostname, @@ -122,8 +167,11 @@ class DirectoryTestCase(unittest.HomeserverTestCase): ) self.assertEqual(channel.code, expected_code, channel.result) - def set_alias_via_directory(self, expected_code, alias_length=5): - url = "/_matrix/client/r0/directory/room/%s" % self.random_alias(alias_length) + def set_alias_via_directory( + self, expected_code: HTTPStatus, alias_length: int = 5 + ) -> str: + alias = self.random_alias(alias_length) + url = "/_matrix/client/r0/directory/room/%s" % alias data = {"room_id": self.room_id} request_data = json.dumps(data) @@ -131,17 +179,18 @@ class DirectoryTestCase(unittest.HomeserverTestCase): "PUT", url, request_data, access_token=self.user_tok ) self.assertEqual(channel.code, expected_code, channel.result) + return alias - def random_alias(self, length): + def random_alias(self, length: int) -> str: return RoomAlias(random_string(length), self.hs.hostname).to_string() - def ensure_user_left_room(self): + def ensure_user_left_room(self) -> None: self.ensure_membership("leave") - def ensure_user_joined_room(self): + def ensure_user_joined_room(self) -> None: self.ensure_membership("join") - def ensure_membership(self, membership): + def ensure_membership(self, membership: str) -> None: try: if membership == "leave": self.helper.leave(room=self.room_id, user=self.user, tok=self.user_tok) -- cgit 1.5.1 From fe58672546ce6e43e3cb4f9924efee51c905d801 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 12 Nov 2021 20:24:12 +0000 Subject: Annotations for state_deltas.py (#11316) I was sad that I couldn't do better for `_curr_state_delta_stream_cache`. At least it's explicitly called out in a comment with #TODO. --- changelog.d/11316.misc | 1 + mypy.ini | 4 +++- synapse/storage/databases/main/state_deltas.py | 16 +++++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 changelog.d/11316.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11316.misc b/changelog.d/11316.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11316.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index 108e1e895c..d07273e63c 100644 --- a/mypy.ini +++ b/mypy.ini @@ -47,7 +47,6 @@ exclude = (?x) |synapse/storage/databases/main/roommember.py |synapse/storage/databases/main/search.py |synapse/storage/databases/main/state.py - |synapse/storage/databases/main/state_deltas.py |synapse/storage/databases/main/stats.py |synapse/storage/databases/main/transactions.py |synapse/storage/databases/main/user_directory.py @@ -181,6 +180,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.room_batch] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.state_deltas] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.user_erasure_store] disallow_untyped_defs = True diff --git a/synapse/storage/databases/main/state_deltas.py b/synapse/storage/databases/main/state_deltas.py index a89747d741..7f3624b128 100644 --- a/synapse/storage/databases/main/state_deltas.py +++ b/synapse/storage/databases/main/state_deltas.py @@ -16,11 +16,17 @@ import logging from typing import Any, Dict, List, Tuple from synapse.storage._base import SQLBaseStore +from synapse.storage.database import LoggingTransaction +from synapse.util.caches.stream_change_cache import StreamChangeCache logger = logging.getLogger(__name__) class StateDeltasStore(SQLBaseStore): + # This class must be mixed in with a child class which provides the following + # attribute. TODO: can we get static analysis to enforce this? + _curr_state_delta_stream_cache: StreamChangeCache + async def get_current_state_deltas( self, prev_stream_id: int, max_stream_id: int ) -> Tuple[int, List[Dict[str, Any]]]: @@ -60,7 +66,9 @@ class StateDeltasStore(SQLBaseStore): # max_stream_id. return max_stream_id, [] - def get_current_state_deltas_txn(txn): + def get_current_state_deltas_txn( + txn: LoggingTransaction, + ) -> Tuple[int, List[Dict[str, Any]]]: # First we calculate the max stream id that will give us less than # N results. # We arbitrarily limit to 100 stream_id entries to ensure we don't @@ -106,7 +114,9 @@ class StateDeltasStore(SQLBaseStore): "get_current_state_deltas", get_current_state_deltas_txn ) - def _get_max_stream_id_in_current_state_deltas_txn(self, txn): + def _get_max_stream_id_in_current_state_deltas_txn( + self, txn: LoggingTransaction + ) -> int: return self.db_pool.simple_select_one_onecol_txn( txn, table="current_state_delta_stream", @@ -114,7 +124,7 @@ class StateDeltasStore(SQLBaseStore): retcol="COALESCE(MAX(stream_id), -1)", ) - async def get_max_stream_id_in_current_state_deltas(self): + async def get_max_stream_id_in_current_state_deltas(self) -> int: return await self.db_pool.runInteraction( "get_max_stream_id_in_current_state_deltas", self._get_max_stream_id_in_current_state_deltas_txn, -- cgit 1.5.1 From 5562ce6a534db61777ad81338aa5dd0a9a54032f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 15 Nov 2021 12:59:05 +0000 Subject: Get directory db file to pass mypy (#11339) --- changelog.d/11339.misc | 1 + mypy.ini | 4 +++- synapse/storage/databases/main/__init__.py | 1 + synapse/storage/databases/main/directory.py | 12 ++++++------ 4 files changed, 11 insertions(+), 7 deletions(-) create mode 100644 changelog.d/11339.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11339.misc b/changelog.d/11339.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11339.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index d07273e63c..710b1f3a4b 100644 --- a/mypy.ini +++ b/mypy.ini @@ -28,7 +28,6 @@ exclude = (?x) |synapse/storage/databases/main/account_data.py |synapse/storage/databases/main/cache.py |synapse/storage/databases/main/devices.py - |synapse/storage/databases/main/directory.py |synapse/storage/databases/main/e2e_room_keys.py |synapse/storage/databases/main/end_to_end_keys.py |synapse/storage/databases/main/event_federation.py @@ -177,6 +176,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.client_ips] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.directory] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.room_batch] disallow_untyped_defs = True diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index e22aa0b9bc..9ff2d8d8c3 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -154,6 +154,7 @@ class DataStore( db_conn, "local_group_updates", "stream_id" ) + self._cache_id_gen: Optional[MultiWriterIdGenerator] if isinstance(self.database_engine, PostgresEngine): # We set the `writers` to an empty list here as we don't care about # missing updates over restarts, as we'll not have anything in our diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py index 25131b1ea9..a3442814d7 100644 --- a/synapse/storage/databases/main/directory.py +++ b/synapse/storage/databases/main/directory.py @@ -13,18 +13,18 @@ # limitations under the License. from collections import namedtuple -from typing import Iterable, List, Optional +from typing import Iterable, List, Optional, Tuple from synapse.api.errors import SynapseError -from synapse.storage._base import SQLBaseStore from synapse.storage.database import LoggingTransaction +from synapse.storage.databases.main import CacheInvalidationWorkerStore from synapse.types import RoomAlias from synapse.util.caches.descriptors import cached RoomAliasMapping = namedtuple("RoomAliasMapping", ("room_id", "room_alias", "servers")) -class DirectoryWorkerStore(SQLBaseStore): +class DirectoryWorkerStore(CacheInvalidationWorkerStore): async def get_association_from_room_alias( self, room_alias: RoomAlias ) -> Optional[RoomAliasMapping]: @@ -92,7 +92,7 @@ class DirectoryWorkerStore(SQLBaseStore): creator: Optional user_id of creator. """ - def alias_txn(txn): + def alias_txn(txn: LoggingTransaction) -> None: self.db_pool.simple_insert_txn( txn, "room_aliases", @@ -176,9 +176,9 @@ class DirectoryStore(DirectoryWorkerStore): If None, the creator will be left unchanged. """ - def _update_aliases_for_room_txn(txn): + def _update_aliases_for_room_txn(txn: LoggingTransaction) -> None: update_creator_sql = "" - sql_params = (new_room_id, old_room_id) + sql_params: Tuple[str, ...] = (new_room_id, old_room_id) if creator: update_creator_sql = ", creator = ?" sql_params = (new_room_id, creator, old_room_id) -- cgit 1.5.1 From e605e4b8f2447f0b6afa9acc104ae1882a732090 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 15 Nov 2021 12:59:33 +0000 Subject: Database storage profile passes mypy (#11342) It already seems to pass mypy. I wonder what changed, given that it was on the exclusion list. So this commit consists of me ensuring `--disallow-untyped-defs` passes and a minor fixup to a function that returned either `True` or `None`. --- changelog.d/11342.misc | 1 + mypy.ini | 7 ++++++- synapse/storage/databases/main/profile.py | 12 ++++++++---- tests/storage/test_profile.py | 9 ++++++--- 4 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 changelog.d/11342.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11342.misc b/changelog.d/11342.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11342.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index 710b1f3a4b..b2953974ea 100644 --- a/mypy.ini +++ b/mypy.ini @@ -38,7 +38,6 @@ exclude = (?x) |synapse/storage/databases/main/metrics.py |synapse/storage/databases/main/monthly_active_users.py |synapse/storage/databases/main/presence.py - |synapse/storage/databases/main/profile.py |synapse/storage/databases/main/purge_events.py |synapse/storage/databases/main/push_rule.py |synapse/storage/databases/main/receipts.py @@ -182,6 +181,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.room_batch] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.profile] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.state_deltas] disallow_untyped_defs = True @@ -284,6 +286,9 @@ disallow_untyped_defs = True [mypy-tests.handlers.test_user_directory] disallow_untyped_defs = True +[mypy-tests.storage.test_profile] +disallow_untyped_defs = True + [mypy-tests.storage.test_user_directory] disallow_untyped_defs = True diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py index dd8e27e226..e197b7203e 100644 --- a/synapse/storage/databases/main/profile.py +++ b/synapse/storage/databases/main/profile.py @@ -15,6 +15,7 @@ from typing import Any, Dict, List, Optional from synapse.api.errors import StoreError from synapse.storage._base import SQLBaseStore +from synapse.storage.database import LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo @@ -104,7 +105,7 @@ class ProfileWorkerStore(SQLBaseStore): desc="update_remote_profile_cache", ) - async def maybe_delete_remote_profile_cache(self, user_id): + async def maybe_delete_remote_profile_cache(self, user_id: str) -> None: """Check if we still care about the remote user's profile, and if we don't then remove their profile from the cache """ @@ -116,9 +117,9 @@ class ProfileWorkerStore(SQLBaseStore): desc="delete_remote_profile_cache", ) - async def is_subscribed_remote_profile_for_user(self, user_id): + async def is_subscribed_remote_profile_for_user(self, user_id: str) -> bool: """Check whether we are interested in a remote user's profile.""" - res = await self.db_pool.simple_select_one_onecol( + res: Optional[str] = await self.db_pool.simple_select_one_onecol( table="group_users", keyvalues={"user_id": user_id}, retcol="user_id", @@ -139,13 +140,16 @@ class ProfileWorkerStore(SQLBaseStore): if res: return True + return False async def get_remote_profile_cache_entries_that_expire( self, last_checked: int ) -> List[Dict[str, str]]: """Get all users who haven't been checked since `last_checked`""" - def _get_remote_profile_cache_entries_that_expire_txn(txn): + def _get_remote_profile_cache_entries_that_expire_txn( + txn: LoggingTransaction, + ) -> List[Dict[str, str]]: sql = """ SELECT user_id, displayname, avatar_url FROM remote_profile_cache diff --git a/tests/storage/test_profile.py b/tests/storage/test_profile.py index a1ba99ff14..d37736edf8 100644 --- a/tests/storage/test_profile.py +++ b/tests/storage/test_profile.py @@ -11,19 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from twisted.test.proto_helpers import MemoryReactor +from synapse.server import HomeServer from synapse.types import UserID +from synapse.util import Clock from tests import unittest class ProfileStoreTestCase(unittest.HomeserverTestCase): - def prepare(self, reactor, clock, hs): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastore() self.u_frank = UserID.from_string("@frank:test") - def test_displayname(self): + def test_displayname(self) -> None: self.get_success(self.store.create_profile(self.u_frank.localpart)) self.get_success( @@ -48,7 +51,7 @@ class ProfileStoreTestCase(unittest.HomeserverTestCase): self.get_success(self.store.get_profile_displayname(self.u_frank.localpart)) ) - def test_avatar_url(self): + def test_avatar_url(self) -> None: self.get_success(self.store.create_profile(self.u_frank.localpart)) self.get_success( -- cgit 1.5.1 From 24b61f379ac1fc740e1b569b85363e2a0411883a Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 16 Nov 2021 07:43:53 -0500 Subject: Add ability to un-shadow-ban via the admin API. (#11347) --- changelog.d/11347.feature | 1 + docs/admin_api/user_admin_api.md | 12 +++++++++--- synapse/rest/admin/users.py | 24 ++++++++++++++++++++++-- synapse/storage/databases/main/registration.py | 2 +- tests/rest/admin/test_user.py | 26 ++++++++++++++++++++------ 5 files changed, 53 insertions(+), 12 deletions(-) create mode 100644 changelog.d/11347.feature (limited to 'synapse/storage/databases') diff --git a/changelog.d/11347.feature b/changelog.d/11347.feature new file mode 100644 index 0000000000..b0cb5345a0 --- /dev/null +++ b/changelog.d/11347.feature @@ -0,0 +1 @@ +Add admin API to un-shadow-ban a user. diff --git a/docs/admin_api/user_admin_api.md b/docs/admin_api/user_admin_api.md index 16ec33b3c1..ba574d795f 100644 --- a/docs/admin_api/user_admin_api.md +++ b/docs/admin_api/user_admin_api.md @@ -948,7 +948,7 @@ The following fields are returned in the JSON response body: See also the [Client-Server API Spec on pushers](https://matrix.org/docs/spec/client_server/latest#get-matrix-client-r0-pushers). -## Shadow-banning users +## Controlling whether a user is shadow-banned Shadow-banning is a useful tool for moderating malicious or egregiously abusive users. A shadow-banned users receives successful responses to their client-server API requests, @@ -961,16 +961,22 @@ or broken behaviour for the client. A shadow-banned user will not receive any notification and it is generally more appropriate to ban or kick abusive users. A shadow-banned user will be unable to contact anyone on the server. -The API is: +To shadow-ban a user the API is: ``` POST /_synapse/admin/v1/users//shadow_ban ``` +To un-shadow-ban a user the API is: + +``` +DELETE /_synapse/admin/v1/users//shadow_ban +``` + To use it, you will need to authenticate by providing an `access_token` for a server admin: [Admin API](../usage/administration/admin_api) -An empty JSON dict is returned. +An empty JSON dict is returned in both cases. **Parameters** diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index d14fafbbc9..23a8bf1fdb 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -909,7 +909,7 @@ class UserTokenRestServlet(RestServlet): class ShadowBanRestServlet(RestServlet): - """An admin API for shadow-banning a user. + """An admin API for controlling whether a user is shadow-banned. A shadow-banned users receives successful responses to their client-server API requests, but the events are not propagated into rooms. @@ -917,11 +917,19 @@ class ShadowBanRestServlet(RestServlet): Shadow-banning a user should be used as a tool of last resort and may lead to confusing or broken behaviour for the client. - Example: + Example of shadow-banning a user: POST /_synapse/admin/v1/users/@test:example.com/shadow_ban {} + 200 OK + {} + + Example of removing a user from being shadow-banned: + + DELETE /_synapse/admin/v1/users/@test:example.com/shadow_ban + {} + 200 OK {} """ @@ -945,6 +953,18 @@ class ShadowBanRestServlet(RestServlet): return 200, {} + async def on_DELETE( + self, request: SynapseRequest, user_id: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + + if not self.hs.is_mine_id(user_id): + raise SynapseError(400, "Only local users can be shadow-banned") + + await self.store.set_shadow_banned(UserID.from_string(user_id), False) + + return 200, {} + class RateLimitRestServlet(RestServlet): """An admin API to override ratelimiting for an user. diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 6c7d6ba508..5e55440570 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -476,7 +476,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): shadow_banned: true iff the user is to be shadow-banned, false otherwise. """ - def set_shadow_banned_txn(txn): + def set_shadow_banned_txn(txn: LoggingTransaction) -> None: user_id = user.to_string() self.db_pool.simple_update_one_txn( txn, diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 25e8d6cf27..c9fe0f06c2 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -3592,31 +3592,34 @@ class ShadowBanRestTestCase(unittest.HomeserverTestCase): self.other_user ) - def test_no_auth(self): + @parameterized.expand(["POST", "DELETE"]) + def test_no_auth(self, method: str): """ Try to get information of an user without authentication. """ - channel = self.make_request("POST", self.url) + channel = self.make_request(method, self.url) self.assertEqual(401, channel.code, msg=channel.json_body) self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) - def test_requester_is_not_admin(self): + @parameterized.expand(["POST", "DELETE"]) + def test_requester_is_not_admin(self, method: str): """ If the user is not a server admin, an error is returned. """ other_user_token = self.login("user", "pass") - channel = self.make_request("POST", self.url, access_token=other_user_token) + channel = self.make_request(method, self.url, access_token=other_user_token) self.assertEqual(403, channel.code, msg=channel.json_body) self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) - def test_user_is_not_local(self): + @parameterized.expand(["POST", "DELETE"]) + def test_user_is_not_local(self, method: str): """ Tests that shadow-banning for a user that is not a local returns a 400 """ url = "/_synapse/admin/v1/whois/@unknown_person:unknown_domain" - channel = self.make_request("POST", url, access_token=self.admin_user_tok) + channel = self.make_request(method, url, access_token=self.admin_user_tok) self.assertEqual(400, channel.code, msg=channel.json_body) def test_success(self): @@ -3636,6 +3639,17 @@ class ShadowBanRestTestCase(unittest.HomeserverTestCase): result = self.get_success(self.store.get_user_by_access_token(other_user_token)) self.assertTrue(result.shadow_banned) + # Un-shadow-ban the user. + channel = self.make_request( + "DELETE", self.url, access_token=self.admin_user_tok + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual({}, channel.json_body) + + # Ensure the user is no longer shadow-banned (and the cache was cleared). + result = self.get_success(self.store.get_user_by_access_token(other_user_token)) + self.assertFalse(result.shadow_banned) + class RateLimitTestCase(unittest.HomeserverTestCase): -- cgit 1.5.1 From 4bd54b263ef7e2ac29acdc85e0c6392684c44281 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 18 Nov 2021 08:43:09 -0500 Subject: Do not allow MSC3440 threads to fork threads (#11161) Adds validation to the Client-Server API to ensure that the potential thread head does not relate to another event already. This results in not allowing a thread to "fork" into other threads. If the target event is unknown for some reason (maybe it isn't visible to your homeserver), but is the target of other events it is assumed that the thread can be created from it. Otherwise, it is rejected as an unknown event. --- changelog.d/11161.feature | 1 + synapse/handlers/message.py | 54 ++++++++++++++++++++--- synapse/storage/databases/main/relations.py | 67 ++++++++++++++++++++++++++++- tests/rest/client/test_relations.py | 62 ++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 8 deletions(-) create mode 100644 changelog.d/11161.feature (limited to 'synapse/storage/databases') diff --git a/changelog.d/11161.feature b/changelog.d/11161.feature new file mode 100644 index 0000000000..76b0d28084 --- /dev/null +++ b/changelog.d/11161.feature @@ -0,0 +1 @@ +Experimental support for the thread relation defined in [MSC3440](https://github.com/matrix-org/matrix-doc/pull/3440). diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d4c2a6ab7a..22dd4cf5fd 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1001,13 +1001,52 @@ class EventCreationHandler: ) self.validator.validate_new(event, self.config) + await self._validate_event_relation(event) + logger.debug("Created event %s", event.event_id) + + return event, context + + async def _validate_event_relation(self, event: EventBase) -> None: + """ + Ensure the relation data on a new event is not bogus. + + Args: + event: The event being created. + + Raises: + SynapseError if the event is invalid. + """ + + relation = event.content.get("m.relates_to") + if not relation: + return + + relation_type = relation.get("rel_type") + if not relation_type: + return + + # Ensure the parent is real. + relates_to = relation.get("event_id") + if not relates_to: + return + + parent_event = await self.store.get_event(relates_to, allow_none=True) + if parent_event: + # And in the same room. + if parent_event.room_id != event.room_id: + raise SynapseError(400, "Relations must be in the same room") + + else: + # There must be some reason that the client knows the event exists, + # see if there are existing relations. If so, assume everything is fine. + if not await self.store.event_is_target_of_relation(relates_to): + # Otherwise, the client can't know about the parent event! + raise SynapseError(400, "Can't send relation to unknown event") # If this event is an annotation then we check that that the sender # can't annotate the same way twice (e.g. stops users from liking an # event multiple times). - relation = event.content.get("m.relates_to", {}) - if relation.get("rel_type") == RelationTypes.ANNOTATION: - relates_to = relation["event_id"] + if relation_type == RelationTypes.ANNOTATION: aggregation_key = relation["key"] already_exists = await self.store.has_user_annotated_event( @@ -1016,9 +1055,12 @@ class EventCreationHandler: if already_exists: raise SynapseError(400, "Can't send same reaction twice") - logger.debug("Created event %s", event.event_id) - - return event, context + # Don't attempt to start a thread if the parent event is a relation. + elif relation_type == RelationTypes.THREAD: + if await self.store.event_includes_relation(relates_to): + raise SynapseError( + 400, "Cannot start threads from an event with a relation" + ) @measure_func("handle_new_client_event") async def handle_new_client_event( diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 907af10995..0a43acda07 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -132,6 +132,69 @@ class RelationsWorkerStore(SQLBaseStore): "get_recent_references_for_event", _get_recent_references_for_event_txn ) + async def event_includes_relation(self, event_id: str) -> bool: + """Check if the given event relates to another event. + + An event has a relation if it has a valid m.relates_to with a rel_type + and event_id in the content: + + { + "content": { + "m.relates_to": { + "rel_type": "m.replace", + "event_id": "$other_event_id" + } + } + } + + Args: + event_id: The event to check. + + Returns: + True if the event includes a valid relation. + """ + + result = await self.db_pool.simple_select_one_onecol( + table="event_relations", + keyvalues={"event_id": event_id}, + retcol="event_id", + allow_none=True, + desc="event_includes_relation", + ) + return result is not None + + async def event_is_target_of_relation(self, parent_id: str) -> bool: + """Check if the given event is the target of another event's relation. + + An event is the target of an event relation if it has a valid + m.relates_to with a rel_type and event_id pointing to parent_id in the + content: + + { + "content": { + "m.relates_to": { + "rel_type": "m.replace", + "event_id": "$parent_id" + } + } + } + + Args: + parent_id: The event to check. + + Returns: + True if the event is the target of another event's relation. + """ + + result = await self.db_pool.simple_select_one_onecol( + table="event_relations", + keyvalues={"relates_to_id": parent_id}, + retcol="event_id", + allow_none=True, + desc="event_is_target_of_relation", + ) + return result is not None + @cached(tree=True) async def get_aggregation_groups_for_event( self, @@ -362,7 +425,7 @@ class RelationsWorkerStore(SQLBaseStore): %s; """ - def _get_if_event_has_relations(txn) -> List[str]: + def _get_if_events_have_relations(txn) -> List[str]: clauses: List[str] = [] clause, args = make_in_list_sql_clause( txn.database_engine, "relates_to_id", parent_ids @@ -387,7 +450,7 @@ class RelationsWorkerStore(SQLBaseStore): return [row[0] for row in txn] return await self.db_pool.runInteraction( - "get_if_event_has_relations", _get_if_event_has_relations + "get_if_events_have_relations", _get_if_events_have_relations ) async def has_user_annotated_event( diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 78c2fb86b9..b8a1b92a89 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -91,6 +91,49 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, EventTypes.Member) self.assertEquals(400, channel.code, channel.json_body) + def test_deny_invalid_event(self): + """Test that we deny relations on non-existant events""" + channel = self._send_relation( + RelationTypes.ANNOTATION, + EventTypes.Message, + parent_id="foo", + content={"body": "foo", "msgtype": "m.text"}, + ) + self.assertEquals(400, channel.code, channel.json_body) + + # Unless that event is referenced from another event! + self.get_success( + self.hs.get_datastore().db_pool.simple_insert( + table="event_relations", + values={ + "event_id": "bar", + "relates_to_id": "foo", + "relation_type": RelationTypes.THREAD, + }, + desc="test_deny_invalid_event", + ) + ) + channel = self._send_relation( + RelationTypes.THREAD, + EventTypes.Message, + parent_id="foo", + content={"body": "foo", "msgtype": "m.text"}, + ) + self.assertEquals(200, channel.code, channel.json_body) + + def test_deny_invalid_room(self): + """Test that we deny relations on non-existant events""" + # Create another room and send a message in it. + room2 = self.helper.create_room_as(self.user_id, tok=self.user_token) + res = self.helper.send(room2, body="Hi!", tok=self.user_token) + parent_id = res["event_id"] + + # Attempt to send an annotation to that event. + channel = self._send_relation( + RelationTypes.ANNOTATION, "m.reaction", parent_id=parent_id, key="A" + ) + self.assertEquals(400, channel.code, channel.json_body) + def test_deny_double_react(self): """Test that we deny relations on membership events""" channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="a") @@ -99,6 +142,25 @@ class RelationsTestCase(unittest.HomeserverTestCase): channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") self.assertEquals(400, channel.code, channel.json_body) + def test_deny_forked_thread(self): + """It is invalid to start a thread off a thread.""" + channel = self._send_relation( + RelationTypes.THREAD, + "m.room.message", + content={"msgtype": "m.text", "body": "foo"}, + parent_id=self.parent_id, + ) + self.assertEquals(200, channel.code, channel.json_body) + parent_id = channel.json_body["event_id"] + + channel = self._send_relation( + RelationTypes.THREAD, + "m.room.message", + content={"msgtype": "m.text", "body": "foo"}, + parent_id=parent_id, + ) + self.assertEquals(400, channel.code, channel.json_body) + def test_basic_paginate_relations(self): """Tests that calling pagination API correctly the latest relations.""" channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") -- cgit 1.5.1 From 539e44139911dc95c34784f3df2b3706c00b7db9 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 18 Nov 2021 14:40:26 +0000 Subject: Use auto_attribs for RefreshTokenLookupResult (#11386) --- changelog.d/11386.misc | 1 + synapse/storage/databases/main/registration.py | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 changelog.d/11386.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11386.misc b/changelog.d/11386.misc new file mode 100644 index 0000000000..3178d53f05 --- /dev/null +++ b/changelog.d/11386.misc @@ -0,0 +1 @@ +Use `auto_attribs` on the `attrs` class `RefreshTokenLookupResult`. diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 5e55440570..7ee699f981 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -84,26 +84,26 @@ class TokenLookupResult: return self.user_id -@attr.s(frozen=True, slots=True) +@attr.s(auto_attribs=True, frozen=True, slots=True) class RefreshTokenLookupResult: """Result of looking up a refresh token.""" - user_id = attr.ib(type=str) + user_id: str """The user this token belongs to.""" - device_id = attr.ib(type=str) + device_id: str """The device associated with this refresh token.""" - token_id = attr.ib(type=int) + token_id: int """The ID of this refresh token.""" - next_token_id = attr.ib(type=Optional[int]) + next_token_id: Optional[int] """The ID of the refresh token which replaced this one.""" - has_next_refresh_token_been_refreshed = attr.ib(type=bool) + has_next_refresh_token_been_refreshed: bool """True if the next refresh token was used for another refresh.""" - has_next_access_token_been_used = attr.ib(type=bool) + has_next_access_token_been_used: bool """True if the next access token was already used at least once.""" -- cgit 1.5.1 From 81b18fe5c060a0532ab64b9575d54b84ddbad278 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Thu, 18 Nov 2021 18:43:49 +0100 Subject: Add dedicated admin API for blocking a room (#11324) --- changelog.d/11324.feature | 1 + docs/admin_api/rooms.md | 78 +++++++++++ synapse/rest/admin/__init__.py | 2 + synapse/rest/admin/rooms.py | 63 +++++++++ synapse/storage/databases/main/room.py | 32 +++++ tests/rest/admin/test_room.py | 228 +++++++++++++++++++++++++++++++++ 6 files changed, 404 insertions(+) create mode 100644 changelog.d/11324.feature (limited to 'synapse/storage/databases') diff --git a/changelog.d/11324.feature b/changelog.d/11324.feature new file mode 100644 index 0000000000..55494358bb --- /dev/null +++ b/changelog.d/11324.feature @@ -0,0 +1 @@ +Add dedicated admin API for blocking a room. \ No newline at end of file diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md index 6a6ae92d66..0f1a74134f 100644 --- a/docs/admin_api/rooms.md +++ b/docs/admin_api/rooms.md @@ -3,6 +3,7 @@ - [Room Details API](#room-details-api) - [Room Members API](#room-members-api) - [Room State API](#room-state-api) +- [Block Room API](#block-room-api) - [Delete Room API](#delete-room-api) * [Version 1 (old version)](#version-1-old-version) * [Version 2 (new version)](#version-2-new-version) @@ -386,6 +387,83 @@ A response body like the following is returned: } ``` +# Block Room API +The Block Room admin API allows server admins to block and unblock rooms, +and query to see if a given room is blocked. +This API can be used to pre-emptively block a room, even if it's unknown to this +homeserver. Users will be prevented from joining a blocked room. + +## Block or unblock a room + +The API is: + +``` +PUT /_synapse/admin/v1/rooms//block +``` + +with a body of: + +```json +{ + "block": true +} +``` + +A response body like the following is returned: + +```json +{ + "block": true +} +``` + +**Parameters** + +The following parameters should be set in the URL: + +- `room_id` - The ID of the room. + +The following JSON body parameters are available: + +- `block` - If `true` the room will be blocked and if `false` the room will be unblocked. + +**Response** + +The following fields are possible in the JSON response body: + +- `block` - A boolean. `true` if the room is blocked, otherwise `false` + +## Get block status + +The API is: + +``` +GET /_synapse/admin/v1/rooms//block +``` + +A response body like the following is returned: + +```json +{ + "block": true, + "user_id": "" +} +``` + +**Parameters** + +The following parameters should be set in the URL: + +- `room_id` - The ID of the room. + +**Response** + +The following fields are possible in the JSON response body: + +- `block` - A boolean. `true` if the room is blocked, otherwise `false` +- `user_id` - An optional string. If the room is blocked (`block` is `true`) shows + the user who has add the room to blocking list. Otherwise it is not displayed. + # Delete Room API The Delete Room admin API allows server admins to remove rooms from the server diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index d78fe406c4..65b76fa10c 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -46,6 +46,7 @@ from synapse.rest.admin.registration_tokens import ( RegistrationTokenRestServlet, ) from synapse.rest.admin.rooms import ( + BlockRoomRestServlet, DeleteRoomStatusByDeleteIdRestServlet, DeleteRoomStatusByRoomIdRestServlet, ForwardExtremitiesRestServlet, @@ -223,6 +224,7 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: Register all the admin servlets. """ register_servlets_for_client_rest_resource(hs, http_server) + BlockRoomRestServlet(hs).register(http_server) ListRoomRestServlet(hs).register(http_server) RoomStateRestServlet(hs).register(http_server) RoomRestServlet(hs).register(http_server) diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 37cb4d0796..5b8ec1e5ca 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -782,3 +782,66 @@ class RoomEventContextServlet(RestServlet): ) return 200, results + + +class BlockRoomRestServlet(RestServlet): + """ + Manage blocking of rooms. + On PUT: Add or remove a room from blocking list. + On GET: Get blocking status of room and user who has blocked this room. + """ + + PATTERNS = admin_patterns("/rooms/(?P[^/]+)/block$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + if not RoomID.is_valid(room_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) + ) + + blocked_by = await self._store.room_is_blocked_by(room_id) + # Test `not None` if `user_id` is an empty string + # if someone add manually an entry in database + if blocked_by is not None: + response = {"block": True, "user_id": blocked_by} + else: + response = {"block": False} + + return HTTPStatus.OK, response + + async def on_PUT( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await assert_user_is_admin(self._auth, requester.user) + + content = parse_json_object_from_request(request) + + if not RoomID.is_valid(room_id): + raise SynapseError( + HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) + ) + + assert_params_in_dict(content, ["block"]) + block = content.get("block") + if not isinstance(block, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Param 'block' must be a boolean.", + Codes.BAD_JSON, + ) + + if block: + await self._store.block_room(room_id, requester.user.to_string()) + else: + await self._store.unblock_room(room_id) + + return HTTPStatus.OK, {"block": block} diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 17b398bb69..7d694d852d 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -397,6 +397,20 @@ class RoomWorkerStore(SQLBaseStore): desc="is_room_blocked", ) + async def room_is_blocked_by(self, room_id: str) -> Optional[str]: + """ + Function to retrieve user who has blocked the room. + user_id is non-nullable + It returns None if the room is not blocked. + """ + return await self.db_pool.simple_select_one_onecol( + table="blocked_rooms", + keyvalues={"room_id": room_id}, + retcol="user_id", + allow_none=True, + desc="room_is_blocked_by", + ) + async def get_rooms_paginate( self, start: int, @@ -1775,3 +1789,21 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore): self.is_room_blocked, (room_id,), ) + + async def unblock_room(self, room_id: str) -> None: + """Remove the room from blocking list. + + Args: + room_id: Room to unblock + """ + await self.db_pool.simple_delete( + table="blocked_rooms", + keyvalues={"room_id": room_id}, + desc="unblock_room", + ) + await self.db_pool.runInteraction( + "block_room_invalidation", + self._invalidate_cache_and_stream, + self.is_room_blocked, + (room_id,), + ) diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index b48fc12e5f..07077aff78 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -2226,6 +2226,234 @@ class MakeRoomAdminTestCase(unittest.HomeserverTestCase): ) +class BlockRoomTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self._store = hs.get_datastore() + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.other_user_tok = self.login("user", "pass") + + self.room_id = self.helper.create_room_as( + self.other_user, tok=self.other_user_tok + ) + self.url = "/_synapse/admin/v1/rooms/%s/block" + + @parameterized.expand([("PUT",), ("GET",)]) + def test_requester_is_no_admin(self, method: str): + """If the user is not a server admin, an error 403 is returned.""" + + channel = self.make_request( + method, + self.url % self.room_id, + content={}, + access_token=self.other_user_tok, + ) + + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + @parameterized.expand([("PUT",), ("GET",)]) + def test_room_is_not_valid(self, method: str): + """Check that invalid room names, return an error 400.""" + + channel = self.make_request( + method, + self.url % "invalidroom", + content={}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual( + "invalidroom is not a legal room ID", + channel.json_body["error"], + ) + + def test_block_is_not_valid(self): + """If parameter `block` is not valid, return an error.""" + + # `block` is not valid + channel = self.make_request( + "PUT", + self.url % self.room_id, + content={"block": "NotBool"}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.BAD_JSON, channel.json_body["errcode"]) + + # `block` is not set + channel = self.make_request( + "PUT", + self.url % self.room_id, + content={}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.MISSING_PARAM, channel.json_body["errcode"]) + + # no content is send + channel = self.make_request( + "PUT", + self.url % self.room_id, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_JSON, channel.json_body["errcode"]) + + def test_block_room(self): + """Test that block a room is successful.""" + + def _request_and_test_block_room(room_id: str) -> None: + self._is_blocked(room_id, expect=False) + channel = self.make_request( + "PUT", + self.url % room_id, + content={"block": True}, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertTrue(channel.json_body["block"]) + self._is_blocked(room_id, expect=True) + + # known internal room + _request_and_test_block_room(self.room_id) + + # unknown internal room + _request_and_test_block_room("!unknown:test") + + # unknown remote room + _request_and_test_block_room("!unknown:remote") + + def test_block_room_twice(self): + """Test that block a room that is already blocked is successful.""" + + self._is_blocked(self.room_id, expect=False) + for _ in range(2): + channel = self.make_request( + "PUT", + self.url % self.room_id, + content={"block": True}, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertTrue(channel.json_body["block"]) + self._is_blocked(self.room_id, expect=True) + + def test_unblock_room(self): + """Test that unblock a room is successful.""" + + def _request_and_test_unblock_room(room_id: str) -> None: + self._block_room(room_id) + + channel = self.make_request( + "PUT", + self.url % room_id, + content={"block": False}, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertFalse(channel.json_body["block"]) + self._is_blocked(room_id, expect=False) + + # known internal room + _request_and_test_unblock_room(self.room_id) + + # unknown internal room + _request_and_test_unblock_room("!unknown:test") + + # unknown remote room + _request_and_test_unblock_room("!unknown:remote") + + def test_unblock_room_twice(self): + """Test that unblock a room that is not blocked is successful.""" + + self._block_room(self.room_id) + for _ in range(2): + channel = self.make_request( + "PUT", + self.url % self.room_id, + content={"block": False}, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertFalse(channel.json_body["block"]) + self._is_blocked(self.room_id, expect=False) + + def test_get_blocked_room(self): + """Test get status of a blocked room""" + + def _request_blocked_room(room_id: str) -> None: + self._block_room(room_id) + + channel = self.make_request( + "GET", + self.url % room_id, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertTrue(channel.json_body["block"]) + self.assertEqual(self.other_user, channel.json_body["user_id"]) + + # known internal room + _request_blocked_room(self.room_id) + + # unknown internal room + _request_blocked_room("!unknown:test") + + # unknown remote room + _request_blocked_room("!unknown:remote") + + def test_get_unblocked_room(self): + """Test get status of a unblocked room""" + + def _request_unblocked_room(room_id: str) -> None: + self._is_blocked(room_id, expect=False) + + channel = self.make_request( + "GET", + self.url % room_id, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertFalse(channel.json_body["block"]) + self.assertNotIn("user_id", channel.json_body) + + # known internal room + _request_unblocked_room(self.room_id) + + # unknown internal room + _request_unblocked_room("!unknown:test") + + # unknown remote room + _request_unblocked_room("!unknown:remote") + + def _is_blocked(self, room_id: str, expect: bool = True) -> None: + """Assert that the room is blocked or not""" + d = self._store.is_room_blocked(room_id) + if expect: + self.assertTrue(self.get_success(d)) + else: + self.assertIsNone(self.get_success(d)) + + def _block_room(self, room_id: str) -> None: + """Block a room in database""" + self.get_success(self._store.block_room(room_id, self.other_user)) + self._is_blocked(room_id, expect=True) + + PURGE_TABLES = [ "current_state_events", "event_backward_extremities", -- cgit 1.5.1 From eca7cffb73fce77d025a0d7a08badb855b6df133 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Fri, 19 Nov 2021 06:40:12 -0500 Subject: Keep fallback key marked as used if it's re-uploaded (#11382) --- changelog.d/11382.misc | 1 + synapse/storage/databases/main/end_to_end_keys.py | 51 ++++++++++++++++++----- tests/handlers/test_e2e_keys.py | 32 +++++++++++++- 3 files changed, 72 insertions(+), 12 deletions(-) create mode 100644 changelog.d/11382.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11382.misc b/changelog.d/11382.misc new file mode 100644 index 0000000000..d812ef309e --- /dev/null +++ b/changelog.d/11382.misc @@ -0,0 +1 @@ +Keep fallback key marked as used if it's re-uploaded. diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index a95ac34f09..b06c1dc45b 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -408,29 +408,58 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore): fallback_keys: the keys to set. This is a map from key ID (which is of the form "algorithm:id") to key data. """ + await self.db_pool.runInteraction( + "set_e2e_fallback_keys_txn", + self._set_e2e_fallback_keys_txn, + user_id, + device_id, + fallback_keys, + ) + + await self.invalidate_cache_and_stream( + "get_e2e_unused_fallback_key_types", (user_id, device_id) + ) + + def _set_e2e_fallback_keys_txn( + self, txn: Connection, user_id: str, device_id: str, fallback_keys: JsonDict + ) -> None: # fallback_keys will usually only have one item in it, so using a for # loop (as opposed to calling simple_upsert_many_txn) won't be too bad # FIXME: make sure that only one key per algorithm is uploaded for key_id, fallback_key in fallback_keys.items(): algorithm, key_id = key_id.split(":", 1) - await self.db_pool.simple_upsert( - "e2e_fallback_keys_json", + old_key_json = self.db_pool.simple_select_one_onecol_txn( + txn, + table="e2e_fallback_keys_json", keyvalues={ "user_id": user_id, "device_id": device_id, "algorithm": algorithm, }, - values={ - "key_id": key_id, - "key_json": json_encoder.encode(fallback_key), - "used": False, - }, - desc="set_e2e_fallback_key", + retcol="key_json", + allow_none=True, ) - await self.invalidate_cache_and_stream( - "get_e2e_unused_fallback_key_types", (user_id, device_id) - ) + new_key_json = encode_canonical_json(fallback_key).decode("utf-8") + + # If the uploaded key is the same as the current fallback key, + # don't do anything. This prevents marking the key as unused if it + # was already used. + if old_key_json != new_key_json: + self.db_pool.simple_upsert_txn( + txn, + table="e2e_fallback_keys_json", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + "algorithm": algorithm, + }, + values={ + "key_id": key_id, + "key_json": json_encoder.encode(fallback_key), + "used": False, + }, + ) @cached(max_entries=10000) async def get_e2e_unused_fallback_key_types( diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 0c3b86fda9..f0723892e4 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -162,6 +162,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): local_user = "@boris:" + self.hs.hostname device_id = "xyz" fallback_key = {"alg1:k1": "key1"} + fallback_key2 = {"alg1:k2": "key2"} otk = {"alg1:k2": "key2"} # we shouldn't have any unused fallback keys yet @@ -213,6 +214,35 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key}}}, ) + # re-uploading the same fallback key should still result in no unused fallback + # keys + self.get_success( + self.handler.upload_keys_for_user( + local_user, + device_id, + {"org.matrix.msc2732.fallback_keys": fallback_key}, + ) + ) + + res = self.get_success( + self.store.get_e2e_unused_fallback_key_types(local_user, device_id) + ) + self.assertEqual(res, []) + + # uploading a new fallback key should result in an unused fallback key + self.get_success( + self.handler.upload_keys_for_user( + local_user, + device_id, + {"org.matrix.msc2732.fallback_keys": fallback_key2}, + ) + ) + + res = self.get_success( + self.store.get_e2e_unused_fallback_key_types(local_user, device_id) + ) + self.assertEqual(res, ["alg1"]) + # if the user uploads a one-time key, the next claim should fetch the # one-time key, and then go back to the fallback self.get_success( @@ -238,7 +268,7 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): ) self.assertEqual( res, - {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key}}}, + {"failures": {}, "one_time_keys": {local_user: {device_id: fallback_key2}}}, ) def test_replace_master_key(self): -- cgit 1.5.1 From 3d893b8cf2358f947678dfb995b73f426200b099 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 22 Nov 2021 12:01:47 -0500 Subject: Store arbitrary relations from events. (#11391) Instead of only known relation types. This also reworks the background update for thread relations to crawl events and search for any relation type, not just threaded relations. --- changelog.d/11391.feature | 1 + synapse/storage/databases/main/events.py | 29 +++--- .../storage/databases/main/events_bg_updates.py | 88 ++++++++++------ .../schema/main/delta/65/02_thread_relations.sql | 18 ---- .../main/delta/65/07_arbitrary_relations.sql | 18 ++++ tests/rest/client/test_relations.py | 111 +++++++++++++++++++++ tests/unittest.py | 7 +- 7 files changed, 210 insertions(+), 62 deletions(-) create mode 100644 changelog.d/11391.feature delete mode 100644 synapse/storage/schema/main/delta/65/02_thread_relations.sql create mode 100644 synapse/storage/schema/main/delta/65/07_arbitrary_relations.sql (limited to 'synapse/storage/databases') diff --git a/changelog.d/11391.feature b/changelog.d/11391.feature new file mode 100644 index 0000000000..4f696285a7 --- /dev/null +++ b/changelog.d/11391.feature @@ -0,0 +1 @@ +Store and allow querying of arbitrary event relations. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 120e4807d1..06832221ad 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1,6 +1,6 @@ # Copyright 2014-2016 OpenMarket Ltd # Copyright 2018-2019 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -1696,34 +1696,33 @@ class PersistEventsStore: }, ) - def _handle_event_relations(self, txn, event): - """Handles inserting relation data during peristence of events + def _handle_event_relations( + self, txn: LoggingTransaction, event: EventBase + ) -> None: + """Handles inserting relation data during persistence of events Args: - txn - event (EventBase) + txn: The current database transaction. + event: The event which might have relations. """ relation = event.content.get("m.relates_to") if not relation: # No relations return + # Relations must have a type and parent event ID. rel_type = relation.get("rel_type") - if rel_type not in ( - RelationTypes.ANNOTATION, - RelationTypes.REFERENCE, - RelationTypes.REPLACE, - RelationTypes.THREAD, - ): - # Unknown relation type + if not isinstance(rel_type, str): return parent_id = relation.get("event_id") - if not parent_id: - # Invalid relation + if not isinstance(parent_id, str): return - aggregation_key = relation.get("key") + # Annotations have a key field. + aggregation_key = None + if rel_type == RelationTypes.ANNOTATION: + aggregation_key = relation.get("key") self.db_pool.simple_insert_txn( txn, diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index ae3a8a63e4..c88fd35e7f 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -1,4 +1,4 @@ -# Copyright 2019 The Matrix.org Foundation C.I.C. +# Copyright 2019-2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -171,8 +171,14 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): self._purged_chain_cover_index, ) + # The event_thread_relation background update was replaced with the + # event_arbitrary_relations one, which handles any relation to avoid + # needed to potentially crawl the entire events table in the future. + self.db_pool.updates.register_noop_background_update("event_thread_relation") + self.db_pool.updates.register_background_update_handler( - "event_thread_relation", self._event_thread_relation + "event_arbitrary_relations", + self._event_arbitrary_relations, ) ################################################################################ @@ -1099,23 +1105,27 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): return result - async def _event_thread_relation(self, progress: JsonDict, batch_size: int) -> int: - """Background update handler which will store thread relations for existing events.""" + async def _event_arbitrary_relations( + self, progress: JsonDict, batch_size: int + ) -> int: + """Background update handler which will store previously unknown relations for existing events.""" last_event_id = progress.get("last_event_id", "") - def _event_thread_relation_txn(txn: LoggingTransaction) -> int: + def _event_arbitrary_relations_txn(txn: LoggingTransaction) -> int: + # Fetch events and then filter based on whether the event has a + # relation or not. txn.execute( """ SELECT event_id, json FROM event_json - LEFT JOIN event_relations USING (event_id) - WHERE event_id > ? AND event_relations.event_id IS NULL + WHERE event_id > ? ORDER BY event_id LIMIT ? """, (last_event_id, batch_size), ) results = list(txn) - missing_thread_relations = [] + # (event_id, parent_id, rel_type) for each relation + relations_to_insert: List[Tuple[str, str, str]] = [] for (event_id, event_json_raw) in results: try: event_json = db_to_json(event_json_raw) @@ -1127,48 +1137,70 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): ) continue - # If there's no relation (or it is not a thread), skip! + # If there's no relation, skip! relates_to = event_json["content"].get("m.relates_to") if not relates_to or not isinstance(relates_to, dict): continue - if relates_to.get("rel_type") != RelationTypes.THREAD: + + # If the relation type or parent event ID is not a string, skip it. + # + # Do not consider relation types that have existed for a long time, + # since they will already be listed in the `event_relations` table. + rel_type = relates_to.get("rel_type") + if not isinstance(rel_type, str) or rel_type in ( + RelationTypes.ANNOTATION, + RelationTypes.REFERENCE, + RelationTypes.REPLACE, + ): continue - # Get the parent ID. parent_id = relates_to.get("event_id") if not isinstance(parent_id, str): continue - missing_thread_relations.append((event_id, parent_id)) + relations_to_insert.append((event_id, parent_id, rel_type)) + + # Insert the missing data, note that we upsert here in case the event + # has already been processed. + if relations_to_insert: + self.db_pool.simple_upsert_many_txn( + txn=txn, + table="event_relations", + key_names=("event_id",), + key_values=[(r[0],) for r in relations_to_insert], + value_names=("relates_to_id", "relation_type"), + value_values=[r[1:] for r in relations_to_insert], + ) - # Insert the missing data. - self.db_pool.simple_insert_many_txn( - txn=txn, - table="event_relations", - values=[ - { - "event_id": event_id, - "relates_to_Id": parent_id, - "relation_type": RelationTypes.THREAD, - } - for event_id, parent_id in missing_thread_relations - ], - ) + # Iterate the parent IDs and invalidate caches. + for parent_id in {r[1] for r in relations_to_insert}: + cache_tuple = (parent_id,) + self._invalidate_cache_and_stream( + txn, self.get_relations_for_event, cache_tuple + ) + self._invalidate_cache_and_stream( + txn, self.get_aggregation_groups_for_event, cache_tuple + ) + self._invalidate_cache_and_stream( + txn, self.get_thread_summary, cache_tuple + ) if results: latest_event_id = results[-1][0] self.db_pool.updates._background_update_progress_txn( - txn, "event_thread_relation", {"last_event_id": latest_event_id} + txn, "event_arbitrary_relations", {"last_event_id": latest_event_id} ) return len(results) num_rows = await self.db_pool.runInteraction( - desc="event_thread_relation", func=_event_thread_relation_txn + desc="event_arbitrary_relations", func=_event_arbitrary_relations_txn ) if not num_rows: - await self.db_pool.updates._end_background_update("event_thread_relation") + await self.db_pool.updates._end_background_update( + "event_arbitrary_relations" + ) return num_rows diff --git a/synapse/storage/schema/main/delta/65/02_thread_relations.sql b/synapse/storage/schema/main/delta/65/02_thread_relations.sql deleted file mode 100644 index d60517f7b4..0000000000 --- a/synapse/storage/schema/main/delta/65/02_thread_relations.sql +++ /dev/null @@ -1,18 +0,0 @@ -/* Copyright 2021 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- Check old events for thread relations. -INSERT INTO background_updates (ordering, update_name, progress_json) VALUES - (6502, 'event_thread_relation', '{}'); diff --git a/synapse/storage/schema/main/delta/65/07_arbitrary_relations.sql b/synapse/storage/schema/main/delta/65/07_arbitrary_relations.sql new file mode 100644 index 0000000000..267b2cb539 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/07_arbitrary_relations.sql @@ -0,0 +1,18 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Check old events for thread relations. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6507, 'event_arbitrary_relations', '{}'); diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index b8a1b92a89..eb10d43217 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -1,4 +1,5 @@ # Copyright 2019 New Vector Ltd +# Copyright 2021 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,6 +47,8 @@ class RelationsTestCase(unittest.HomeserverTestCase): return config def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.user_id, self.user_token = self._create_user("alice") self.user2_id, self.user2_token = self._create_user("bob") @@ -765,6 +768,52 @@ class RelationsTestCase(unittest.HomeserverTestCase): self.assertIn("chunk", channel.json_body) self.assertEquals(channel.json_body["chunk"], []) + def test_unknown_relations(self): + """Unknown relations should be accepted.""" + channel = self._send_relation("m.relation.test", "m.room.test") + self.assertEquals(200, channel.code, channel.json_body) + event_id = channel.json_body["event_id"] + + channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/relations/%s?limit=1" + % (self.room, self.parent_id), + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + + # We expect to get back a single pagination result, which is the full + # relation event we sent above. + self.assertEquals(len(channel.json_body["chunk"]), 1, channel.json_body) + self.assert_dict( + {"event_id": event_id, "sender": self.user_id, "type": "m.room.test"}, + channel.json_body["chunk"][0], + ) + + # We also expect to get the original event (the id of which is self.parent_id) + self.assertEquals( + channel.json_body["original_event"]["event_id"], self.parent_id + ) + + # When bundling the unknown relation is not included. + channel = self.make_request( + "GET", + "/rooms/%s/event/%s" % (self.room, self.parent_id), + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + self.assertNotIn("m.relations", channel.json_body["unsigned"]) + + # But unknown relations can be directly queried. + channel = self.make_request( + "GET", + "/_matrix/client/unstable/rooms/%s/aggregations/%s?limit=1" + % (self.room, self.parent_id), + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + self.assertEquals(channel.json_body["chunk"], []) + def _send_relation( self, relation_type: str, @@ -811,3 +860,65 @@ class RelationsTestCase(unittest.HomeserverTestCase): access_token = self.login(localpart, "abc123") return user_id, access_token + + def test_background_update(self): + """Test the event_arbitrary_relations background update.""" + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="👍") + self.assertEquals(200, channel.code, channel.json_body) + annotation_event_id_good = channel.json_body["event_id"] + + channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", key="A") + self.assertEquals(200, channel.code, channel.json_body) + annotation_event_id_bad = channel.json_body["event_id"] + + channel = self._send_relation(RelationTypes.THREAD, "m.room.test") + self.assertEquals(200, channel.code, channel.json_body) + thread_event_id = channel.json_body["event_id"] + + # Clean-up the table as if the inserts did not happen during event creation. + self.get_success( + self.store.db_pool.simple_delete_many( + table="event_relations", + column="event_id", + iterable=(annotation_event_id_bad, thread_event_id), + keyvalues={}, + desc="RelationsTestCase.test_background_update", + ) + ) + + # Only the "good" annotation should be found. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/rooms/{self.room}/relations/{self.parent_id}?limit=10", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + self.assertEquals( + [ev["event_id"] for ev in channel.json_body["chunk"]], + [annotation_event_id_good], + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + {"update_name": "event_arbitrary_relations", "progress_json": "{}"}, + ) + ) + + # Ugh, have to reset this flag + self.store.db_pool.updates._all_done = False + self.wait_for_background_updates() + + # The "good" annotation and the thread should be found, but not the "bad" + # annotation. + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/rooms/{self.room}/relations/{self.parent_id}?limit=10", + access_token=self.user_token, + ) + self.assertEquals(200, channel.code, channel.json_body) + self.assertCountEqual( + [ev["event_id"] for ev in channel.json_body["chunk"]], + [annotation_event_id_good, thread_event_id], + ) diff --git a/tests/unittest.py b/tests/unittest.py index c9a08a3420..165aafc574 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -331,7 +331,12 @@ class HomeserverTestCase(TestCase): time.sleep(0.01) def wait_for_background_updates(self) -> None: - """Block until all background database updates have completed.""" + """ + Block until all background database updates have completed. + + Note that callers must ensure that's a store property created on the + testcase. + """ while not self.get_success( self.store.db_pool.updates.has_completed_background_updates() ): -- cgit 1.5.1 From 7cebaf96447a8ff50c4525ba7667f58127876c5e Mon Sep 17 00:00:00 2001 From: Shay Date: Tue, 23 Nov 2021 06:46:40 -0800 Subject: Remove code invalidated by deprecated config flag 'trust_identity_servers_for_password_resets' (#11395) * remove background update code related to deprecated config flag * changelog entry * update changelog * Delete 11394.removal Duplicate, wrong number * add no-op background update and change newfragment so it will be consolidated with associated work * remove unused code * Remove code associated with deprecated flag from legacy docker dynamic config file Co-authored-by: reivilibre --- changelog.d/11395.removal | 1 + docker/conf/homeserver.yaml | 8 ------ synapse/storage/databases/main/registration.py | 35 +++----------------------- tests/utils.py | 1 - 4 files changed, 4 insertions(+), 41 deletions(-) create mode 100644 changelog.d/11395.removal (limited to 'synapse/storage/databases') diff --git a/changelog.d/11395.removal b/changelog.d/11395.removal new file mode 100644 index 0000000000..6c1fd560ad --- /dev/null +++ b/changelog.d/11395.removal @@ -0,0 +1 @@ +Remove deprecated `trust_identity_server_for_password_resets` configuration flag. \ No newline at end of file diff --git a/docker/conf/homeserver.yaml b/docker/conf/homeserver.yaml index 3cba594d02..f10f78a48c 100644 --- a/docker/conf/homeserver.yaml +++ b/docker/conf/homeserver.yaml @@ -148,14 +148,6 @@ bcrypt_rounds: 12 allow_guest_access: {{ "True" if SYNAPSE_ALLOW_GUEST else "False" }} enable_group_creation: true -# The list of identity servers trusted to verify third party -# identifiers by this server. -# -# Also defines the ID server which will be called when an account is -# deactivated (one will be picked arbitrarily). -trusted_third_party_id_servers: - - matrix.org - - vector.im ## Metrics ### diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 7ee699f981..8478463a2a 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -1728,11 +1728,11 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): ) self.db_pool.updates.register_background_update_handler( - "user_threepids_grandfather", self._bg_user_threepids_grandfather + "users_set_deactivated_flag", self._background_update_set_deactivated_flag ) - self.db_pool.updates.register_background_update_handler( - "users_set_deactivated_flag", self._background_update_set_deactivated_flag + self.db_pool.updates.register_noop_background_update( + "user_threepids_grandfather" ) self.db_pool.updates.register_background_index_update( @@ -1805,35 +1805,6 @@ class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): return nb_processed - async def _bg_user_threepids_grandfather(self, progress, batch_size): - """We now track which identity servers a user binds their 3PID to, so - we need to handle the case of existing bindings where we didn't track - this. - - We do this by grandfathering in existing user threepids assuming that - they used one of the server configured trusted identity servers. - """ - id_servers = set(self.config.registration.trusted_third_party_id_servers) - - def _bg_user_threepids_grandfather_txn(txn): - sql = """ - INSERT INTO user_threepid_id_server - (user_id, medium, address, id_server) - SELECT user_id, medium, address, ? - FROM user_threepids - """ - - txn.execute_batch(sql, [(id_server,) for id_server in id_servers]) - - if id_servers: - await self.db_pool.runInteraction( - "_bg_user_threepids_grandfather", _bg_user_threepids_grandfather_txn - ) - - await self.db_pool.updates._end_background_update("user_threepids_grandfather") - - return 1 - async def set_user_deactivated_status( self, user_id: str, deactivated: bool ) -> None: diff --git a/tests/utils.py b/tests/utils.py index cf8ba5c5db..983859120f 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -119,7 +119,6 @@ def default_config(name, parse=False): "enable_registration": True, "enable_registration_captcha": False, "macaroon_secret_key": "not even a little secret", - "trusted_third_party_id_servers": [], "password_providers": [], "worker_replication_url": "", "worker_app": None, -- cgit 1.5.1 From 55669bd3de7137553085e9f1c16a686ff657108c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 23 Nov 2021 10:21:19 -0500 Subject: Add missing type hints to config base classes (#11377) --- changelog.d/11377.bugfix | 1 + changelog.d/11377.misc | 1 + mypy.ini | 3 + synapse/config/_base.py | 157 +++++++++++++++---------- synapse/config/_base.pyi | 87 +++++++++----- synapse/config/cache.py | 4 +- synapse/config/key.py | 3 +- synapse/config/logger.py | 4 +- synapse/config/server.py | 4 +- synapse/config/tls.py | 2 +- synapse/module_api/__init__.py | 2 +- synapse/storage/databases/main/registration.py | 3 +- tests/config/test_load.py | 22 ++-- 13 files changed, 184 insertions(+), 109 deletions(-) create mode 100644 changelog.d/11377.bugfix create mode 100644 changelog.d/11377.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11377.bugfix b/changelog.d/11377.bugfix new file mode 100644 index 0000000000..9831fb7bbe --- /dev/null +++ b/changelog.d/11377.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in v1.45.0 where the `read_templates` method of the module API would error. diff --git a/changelog.d/11377.misc b/changelog.d/11377.misc new file mode 100644 index 0000000000..3dac625576 --- /dev/null +++ b/changelog.d/11377.misc @@ -0,0 +1 @@ +Add type hints to configuration classes. diff --git a/mypy.ini b/mypy.ini index 308cfd95d8..bc4f59154d 100644 --- a/mypy.ini +++ b/mypy.ini @@ -151,6 +151,9 @@ disallow_untyped_defs = True [mypy-synapse.app.*] disallow_untyped_defs = True +[mypy-synapse.config._base] +disallow_untyped_defs = True + [mypy-synapse.crypto.*] disallow_untyped_defs = True diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 7c4428a138..1265738dc1 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -20,7 +20,18 @@ import os from collections import OrderedDict from hashlib import sha256 from textwrap import dedent -from typing import Any, Iterable, List, MutableMapping, Optional, Union +from typing import ( + Any, + Dict, + Iterable, + List, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) import attr import jinja2 @@ -78,7 +89,7 @@ CONFIG_FILE_HEADER = """\ """ -def path_exists(file_path): +def path_exists(file_path: str) -> bool: """Check if a file exists Unlike os.path.exists, this throws an exception if there is an error @@ -86,7 +97,7 @@ def path_exists(file_path): the parent dir). Returns: - bool: True if the file exists; False if not. + True if the file exists; False if not. """ try: os.stat(file_path) @@ -102,15 +113,15 @@ class Config: A configuration section, containing configuration keys and values. Attributes: - section (str): The section title of this config object, such as + section: The section title of this config object, such as "tls" or "logger". This is used to refer to it on the root logger (for example, `config.tls.some_option`). Must be defined in subclasses. """ - section = None + section: str - def __init__(self, root_config=None): + def __init__(self, root_config: "RootConfig" = None): self.root = root_config # Get the path to the default Synapse template directory @@ -119,7 +130,7 @@ class Config: ) @staticmethod - def parse_size(value): + def parse_size(value: Union[str, int]) -> int: if isinstance(value, int): return value sizes = {"K": 1024, "M": 1024 * 1024} @@ -162,15 +173,15 @@ class Config: return int(value) * size @staticmethod - def abspath(file_path): + def abspath(file_path: str) -> str: return os.path.abspath(file_path) if file_path else file_path @classmethod - def path_exists(cls, file_path): + def path_exists(cls, file_path: str) -> bool: return path_exists(file_path) @classmethod - def check_file(cls, file_path, config_name): + def check_file(cls, file_path: Optional[str], config_name: str) -> str: if file_path is None: raise ConfigError("Missing config for %s." % (config_name,)) try: @@ -183,7 +194,7 @@ class Config: return cls.abspath(file_path) @classmethod - def ensure_directory(cls, dir_path): + def ensure_directory(cls, dir_path: str) -> str: dir_path = cls.abspath(dir_path) os.makedirs(dir_path, exist_ok=True) if not os.path.isdir(dir_path): @@ -191,7 +202,7 @@ class Config: return dir_path @classmethod - def read_file(cls, file_path, config_name): + def read_file(cls, file_path: Any, config_name: str) -> str: """Deprecated: call read_file directly""" return read_file(file_path, (config_name,)) @@ -284,6 +295,9 @@ class Config: return [env.get_template(filename) for filename in filenames] +TRootConfig = TypeVar("TRootConfig", bound="RootConfig") + + class RootConfig: """ Holder of an application's configuration. @@ -308,7 +322,9 @@ class RootConfig: raise Exception("Failed making %s: %r" % (config_class.section, e)) setattr(self, config_class.section, conf) - def invoke_all(self, func_name: str, *args, **kwargs) -> MutableMapping[str, Any]: + def invoke_all( + self, func_name: str, *args: Any, **kwargs: Any + ) -> MutableMapping[str, Any]: """ Invoke a function on all instantiated config objects this RootConfig is configured to use. @@ -317,6 +333,7 @@ class RootConfig: func_name: Name of function to invoke *args **kwargs + Returns: ordered dictionary of config section name and the result of the function from it. @@ -332,7 +349,7 @@ class RootConfig: return res @classmethod - def invoke_all_static(cls, func_name: str, *args, **kwargs): + def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: any) -> None: """ Invoke a static function on config objects this RootConfig is configured to use. @@ -341,6 +358,7 @@ class RootConfig: func_name: Name of function to invoke *args **kwargs + Returns: ordered dictionary of config section name and the result of the function from it. @@ -351,16 +369,16 @@ class RootConfig: def generate_config( self, - config_dir_path, - data_dir_path, - server_name, - generate_secrets=False, - report_stats=None, - open_private_ports=False, - listeners=None, - tls_certificate_path=None, - tls_private_key_path=None, - ): + config_dir_path: str, + data_dir_path: str, + server_name: str, + generate_secrets: bool = False, + report_stats: Optional[bool] = None, + open_private_ports: bool = False, + listeners: Optional[List[dict]] = None, + tls_certificate_path: Optional[str] = None, + tls_private_key_path: Optional[str] = None, + ) -> str: """ Build a default configuration file @@ -368,27 +386,27 @@ class RootConfig: (eg with --generate_config). Args: - config_dir_path (str): The path where the config files are kept. Used to + config_dir_path: The path where the config files are kept. Used to create filenames for things like the log config and the signing key. - data_dir_path (str): The path where the data files are kept. Used to create + data_dir_path: The path where the data files are kept. Used to create filenames for things like the database and media store. - server_name (str): The server name. Used to initialise the server_name + server_name: The server name. Used to initialise the server_name config param, but also used in the names of some of the config files. - generate_secrets (bool): True if we should generate new secrets for things + generate_secrets: True if we should generate new secrets for things like the macaroon_secret_key. If False, these parameters will be left unset. - report_stats (bool|None): Initial setting for the report_stats setting. + report_stats: Initial setting for the report_stats setting. If None, report_stats will be left unset. - open_private_ports (bool): True to leave private ports (such as the non-TLS + open_private_ports: True to leave private ports (such as the non-TLS HTTP listener) open to the internet. - listeners (list(dict)|None): A list of descriptions of the listeners - synapse should start with each of which specifies a port (str), a list of + listeners: A list of descriptions of the listeners synapse should + start with each of which specifies a port (int), a list of resources (list(str)), tls (bool) and type (str). For example: [{ "port": 8448, @@ -403,16 +421,12 @@ class RootConfig: "type": "http", }], + tls_certificate_path: The path to the tls certificate. - database (str|None): The database type to configure, either `psycog2` - or `sqlite3`. - - tls_certificate_path (str|None): The path to the tls certificate. - - tls_private_key_path (str|None): The path to the tls private key. + tls_private_key_path: The path to the tls private key. Returns: - str: the yaml config file + The yaml config file """ return CONFIG_FILE_HEADER + "\n\n".join( @@ -432,12 +446,15 @@ class RootConfig: ) @classmethod - def load_config(cls, description, argv): + def load_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> TRootConfig: """Parse the commandline and config files Doesn't support config-file-generation: used by the worker apps. - Returns: Config object. + Returns: + Config object. """ config_parser = argparse.ArgumentParser(description=description) cls.add_arguments_to_parser(config_parser) @@ -446,7 +463,7 @@ class RootConfig: return obj @classmethod - def add_arguments_to_parser(cls, config_parser): + def add_arguments_to_parser(cls, config_parser: argparse.ArgumentParser) -> None: """Adds all the config flags to an ArgumentParser. Doesn't support config-file-generation: used by the worker apps. @@ -454,7 +471,7 @@ class RootConfig: Used for workers where we want to add extra flags/subcommands. Args: - config_parser (ArgumentParser): App description + config_parser: App description """ config_parser.add_argument( @@ -477,7 +494,9 @@ class RootConfig: cls.invoke_all_static("add_arguments", config_parser) @classmethod - def load_config_with_parser(cls, parser, argv): + def load_config_with_parser( + cls: Type[TRootConfig], parser: argparse.ArgumentParser, argv: List[str] + ) -> Tuple[TRootConfig, argparse.Namespace]: """Parse the commandline and config files with the given parser Doesn't support config-file-generation: used by the worker apps. @@ -485,13 +504,12 @@ class RootConfig: Used for workers where we want to add extra flags/subcommands. Args: - parser (ArgumentParser) - argv (list[str]) + parser + argv Returns: - tuple[HomeServerConfig, argparse.Namespace]: Returns the parsed - config object and the parsed argparse.Namespace object from - `parser.parse_args(..)` + Returns the parsed config object and the parsed argparse.Namespace + object from parser.parse_args(..)` """ obj = cls() @@ -520,12 +538,15 @@ class RootConfig: return obj, config_args @classmethod - def load_or_generate_config(cls, description, argv): + def load_or_generate_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> Optional[TRootConfig]: """Parse the commandline and config files Supports generation of config files, so is used for the main homeserver app. - Returns: Config object, or None if --generate-config or --generate-keys was set + Returns: + Config object, or None if --generate-config or --generate-keys was set """ parser = argparse.ArgumentParser(description=description) parser.add_argument( @@ -680,16 +701,21 @@ class RootConfig: return obj - def parse_config_dict(self, config_dict, config_dir_path=None, data_dir_path=None): + def parse_config_dict( + self, + config_dict: Dict[str, Any], + config_dir_path: Optional[str] = None, + data_dir_path: Optional[str] = None, + ) -> None: """Read the information from the config dict into this Config object. Args: - config_dict (dict): Configuration data, as read from the yaml + config_dict: Configuration data, as read from the yaml - config_dir_path (str): The path where the config files are kept. Used to + config_dir_path: The path where the config files are kept. Used to create filenames for things like the log config and the signing key. - data_dir_path (str): The path where the data files are kept. Used to create + data_dir_path: The path where the data files are kept. Used to create filenames for things like the database and media store. """ self.invoke_all( @@ -699,17 +725,20 @@ class RootConfig: data_dir_path=data_dir_path, ) - def generate_missing_files(self, config_dict, config_dir_path): + def generate_missing_files( + self, config_dict: Dict[str, Any], config_dir_path: str + ) -> None: self.invoke_all("generate_files", config_dict, config_dir_path) -def read_config_files(config_files): +def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: """Read the config files into a dict Args: - config_files (iterable[str]): A list of the config files to read + config_files: A list of the config files to read - Returns: dict + Returns: + The configuration dictionary. """ specified_config = {} for config_file in config_files: @@ -733,17 +762,17 @@ def read_config_files(config_files): return specified_config -def find_config_files(search_paths): +def find_config_files(search_paths: List[str]) -> List[str]: """Finds config files using a list of search paths. If a path is a file then that file path is added to the list. If a search path is a directory then all the "*.yaml" files in that directory are added to the list in sorted order. Args: - search_paths(list(str)): A list of paths to search. + search_paths: A list of paths to search. Returns: - list(str): A list of file paths. + A list of file paths. """ config_files = [] @@ -777,7 +806,7 @@ def find_config_files(search_paths): return config_files -@attr.s +@attr.s(auto_attribs=True) class ShardedWorkerHandlingConfig: """Algorithm for choosing which instance is responsible for handling some sharded work. @@ -787,7 +816,7 @@ class ShardedWorkerHandlingConfig: below). """ - instances = attr.ib(type=List[str]) + instances: List[str] def should_handle(self, instance_name: str, key: str) -> bool: """Whether this instance is responsible for handling the given key.""" diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index c1d9069798..1eb5f5a68c 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -1,4 +1,18 @@ -from typing import Any, Iterable, List, Optional +import argparse +from typing import ( + Any, + Dict, + Iterable, + List, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) + +import jinja2 from synapse.config import ( account_validity, @@ -19,6 +33,7 @@ from synapse.config import ( logger, metrics, modules, + oembed, oidc, password_auth_providers, push, @@ -27,6 +42,7 @@ from synapse.config import ( registration, repository, retention, + room, room_directory, saml2, server, @@ -51,7 +67,9 @@ MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str MISSING_REPORT_STATS_SPIEL: str MISSING_SERVER_NAME: str -def path_exists(file_path: str): ... +def path_exists(file_path: str) -> bool: ... + +TRootConfig = TypeVar("TRootConfig", bound="RootConfig") class RootConfig: server: server.ServerConfig @@ -61,6 +79,7 @@ class RootConfig: logging: logger.LoggingConfig ratelimiting: ratelimiting.RatelimitConfig media: repository.ContentRepositoryConfig + oembed: oembed.OembedConfig captcha: captcha.CaptchaConfig voip: voip.VoipConfig registration: registration.RegistrationConfig @@ -80,6 +99,7 @@ class RootConfig: authproviders: password_auth_providers.PasswordAuthProviderConfig push: push.PushConfig spamchecker: spam_checker.SpamCheckerConfig + room: room.RoomConfig groups: groups.GroupsConfig userdirectory: user_directory.UserDirectoryConfig consent: consent.ConsentConfig @@ -87,72 +107,85 @@ class RootConfig: servernotices: server_notices.ServerNoticesConfig roomdirectory: room_directory.RoomDirectoryConfig thirdpartyrules: third_party_event_rules.ThirdPartyRulesConfig - tracer: tracer.TracerConfig + tracing: tracer.TracerConfig redis: redis.RedisConfig modules: modules.ModulesConfig caches: cache.CacheConfig federation: federation.FederationConfig retention: retention.RetentionConfig - config_classes: List = ... + config_classes: List[Type["Config"]] = ... def __init__(self) -> None: ... - def invoke_all(self, func_name: str, *args: Any, **kwargs: Any): ... + def invoke_all( + self, func_name: str, *args: Any, **kwargs: Any + ) -> MutableMapping[str, Any]: ... @classmethod def invoke_all_static(cls, func_name: str, *args: Any, **kwargs: Any) -> None: ... - def __getattr__(self, item: str): ... def parse_config_dict( self, - config_dict: Any, - config_dir_path: Optional[Any] = ..., - data_dir_path: Optional[Any] = ..., + config_dict: Dict[str, Any], + config_dir_path: Optional[str] = ..., + data_dir_path: Optional[str] = ..., ) -> None: ... - read_config: Any = ... def generate_config( self, config_dir_path: str, data_dir_path: str, server_name: str, generate_secrets: bool = ..., - report_stats: Optional[str] = ..., + report_stats: Optional[bool] = ..., open_private_ports: bool = ..., listeners: Optional[Any] = ..., - database_conf: Optional[Any] = ..., tls_certificate_path: Optional[str] = ..., tls_private_key_path: Optional[str] = ..., - ): ... + ) -> str: ... @classmethod - def load_or_generate_config(cls, description: Any, argv: Any): ... + def load_or_generate_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> Optional[TRootConfig]: ... @classmethod - def load_config(cls, description: Any, argv: Any): ... + def load_config( + cls: Type[TRootConfig], description: str, argv: List[str] + ) -> TRootConfig: ... @classmethod - def add_arguments_to_parser(cls, config_parser: Any) -> None: ... + def add_arguments_to_parser( + cls, config_parser: argparse.ArgumentParser + ) -> None: ... @classmethod - def load_config_with_parser(cls, parser: Any, argv: Any): ... + def load_config_with_parser( + cls: Type[TRootConfig], parser: argparse.ArgumentParser, argv: List[str] + ) -> Tuple[TRootConfig, argparse.Namespace]: ... def generate_missing_files( self, config_dict: dict, config_dir_path: str ) -> None: ... class Config: root: RootConfig + default_template_dir: str def __init__(self, root_config: Optional[RootConfig] = ...) -> None: ... - def __getattr__(self, item: str, from_root: bool = ...): ... @staticmethod - def parse_size(value: Any): ... + def parse_size(value: Union[str, int]) -> int: ... @staticmethod - def parse_duration(value: Any): ... + def parse_duration(value: Union[str, int]) -> int: ... @staticmethod - def abspath(file_path: Optional[str]): ... + def abspath(file_path: Optional[str]) -> str: ... @classmethod - def path_exists(cls, file_path: str): ... + def path_exists(cls, file_path: str) -> bool: ... @classmethod - def check_file(cls, file_path: str, config_name: str): ... + def check_file(cls, file_path: str, config_name: str) -> str: ... @classmethod - def ensure_directory(cls, dir_path: str): ... + def ensure_directory(cls, dir_path: str) -> str: ... @classmethod - def read_file(cls, file_path: str, config_name: str): ... + def read_file(cls, file_path: str, config_name: str) -> str: ... + def read_template(self, filenames: str) -> jinja2.Template: ... + def read_templates( + self, + filenames: List[str], + custom_template_directories: Optional[Iterable[str]] = None, + ) -> List[jinja2.Template]: ... -def read_config_files(config_files: List[str]): ... -def find_config_files(search_paths: List[str]): ... +def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: ... +def find_config_files(search_paths: List[str]) -> List[str]: ... class ShardedWorkerHandlingConfig: instances: List[str] diff --git a/synapse/config/cache.py b/synapse/config/cache.py index d119427ad8..f054455534 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -15,7 +15,7 @@ import os import re import threading -from typing import Callable, Dict +from typing import Callable, Dict, Optional from synapse.python_dependencies import DependencyException, check_requirements @@ -217,7 +217,7 @@ class CacheConfig(Config): expiry_time = cache_config.get("expiry_time") if expiry_time: - self.expiry_time_msec = self.parse_duration(expiry_time) + self.expiry_time_msec: Optional[int] = self.parse_duration(expiry_time) else: self.expiry_time_msec = None diff --git a/synapse/config/key.py b/synapse/config/key.py index 015dbb8a67..035ee2416b 100644 --- a/synapse/config/key.py +++ b/synapse/config/key.py @@ -16,6 +16,7 @@ import hashlib import logging import os +from typing import Any, Dict import attr import jsonschema @@ -312,7 +313,7 @@ class KeyConfig(Config): ) return keys - def generate_files(self, config, config_dir_path): + def generate_files(self, config: Dict[str, Any], config_dir_path: str) -> None: if "signing_key" in config: return diff --git a/synapse/config/logger.py b/synapse/config/logger.py index 5252e61a99..63aab0babe 100644 --- a/synapse/config/logger.py +++ b/synapse/config/logger.py @@ -18,7 +18,7 @@ import os import sys import threading from string import Template -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict import yaml from zope.interface import implementer @@ -185,7 +185,7 @@ class LoggingConfig(Config): help=argparse.SUPPRESS, ) - def generate_files(self, config, config_dir_path): + def generate_files(self, config: Dict[str, Any], config_dir_path: str) -> None: log_config = config.get("log_config") if log_config and not os.path.exists(log_config): log_file = self.abspath("homeserver.log") diff --git a/synapse/config/server.py b/synapse/config/server.py index 7bc0030a9e..8445e9dd05 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -421,7 +421,7 @@ class ServerConfig(Config): # before redacting them. redaction_retention_period = config.get("redaction_retention_period", "7d") if redaction_retention_period is not None: - self.redaction_retention_period = self.parse_duration( + self.redaction_retention_period: Optional[int] = self.parse_duration( redaction_retention_period ) else: @@ -430,7 +430,7 @@ class ServerConfig(Config): # How long to keep entries in the `users_ips` table. user_ips_max_age = config.get("user_ips_max_age", "28d") if user_ips_max_age is not None: - self.user_ips_max_age = self.parse_duration(user_ips_max_age) + self.user_ips_max_age: Optional[int] = self.parse_duration(user_ips_max_age) else: self.user_ips_max_age = None diff --git a/synapse/config/tls.py b/synapse/config/tls.py index 21e5ddd15f..4ca111618f 100644 --- a/synapse/config/tls.py +++ b/synapse/config/tls.py @@ -245,7 +245,7 @@ class TlsConfig(Config): cert_path = self.tls_certificate_file logger.info("Loading TLS certificate from %s", cert_path) cert_pem = self.read_file(cert_path, "tls_certificate_path") - cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem) + cert = crypto.load_certificate(crypto.FILETYPE_PEM, cert_pem.encode()) return cert diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index ac8e8142f1..96d7a8f2a9 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -1014,7 +1014,7 @@ class ModuleApi: A list containing the loaded templates, with the orders matching the one of the filenames parameter. """ - return self._hs.config.read_templates( + return self._hs.config.server.read_templates( filenames, (td for td in (self.custom_template_dir, custom_template_directory) if td), ) diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 8478463a2a..0e8c168667 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -1198,8 +1198,9 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): expiration_ts = now_ms + self._account_validity_period if use_delta: + assert self._account_validity_startup_job_max_delta is not None expiration_ts = random.randrange( - expiration_ts - self._account_validity_startup_job_max_delta, + int(expiration_ts - self._account_validity_startup_job_max_delta), expiration_ts, ) diff --git a/tests/config/test_load.py b/tests/config/test_load.py index d8668d56b2..69a4e9413b 100644 --- a/tests/config/test_load.py +++ b/tests/config/test_load.py @@ -46,15 +46,16 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase): "was: %r" % (config.key.macaroon_secret_key,) ) - config = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) + config2 = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) + assert config2 is not None self.assertTrue( - hasattr(config.key, "macaroon_secret_key"), + hasattr(config2.key, "macaroon_secret_key"), "Want config to have attr macaroon_secret_key", ) - if len(config.key.macaroon_secret_key) < 5: + if len(config2.key.macaroon_secret_key) < 5: self.fail( "Want macaroon secret key to be string of at least length 5," - "was: %r" % (config.key.macaroon_secret_key,) + "was: %r" % (config2.key.macaroon_secret_key,) ) def test_load_succeeds_if_macaroon_secret_key_missing(self): @@ -62,6 +63,9 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase): config1 = HomeServerConfig.load_config("", ["-c", self.config_file]) config2 = HomeServerConfig.load_config("", ["-c", self.config_file]) config3 = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) + assert config1 is not None + assert config2 is not None + assert config3 is not None self.assertEqual( config1.key.macaroon_secret_key, config2.key.macaroon_secret_key ) @@ -78,14 +82,16 @@ class ConfigLoadingFileTestCase(ConfigFileTestCase): config = HomeServerConfig.load_config("", ["-c", self.config_file]) self.assertFalse(config.registration.enable_registration) - config = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) - self.assertFalse(config.registration.enable_registration) + config2 = HomeServerConfig.load_or_generate_config("", ["-c", self.config_file]) + assert config2 is not None + self.assertFalse(config2.registration.enable_registration) # Check that either config value is clobbered by the command line. - config = HomeServerConfig.load_or_generate_config( + config3 = HomeServerConfig.load_or_generate_config( "", ["-c", self.config_file, "--enable-registration"] ) - self.assertTrue(config.registration.enable_registration) + assert config3 is not None + self.assertTrue(config3.registration.enable_registration) def test_stats_enabled(self): self.generate_config_and_remove_lines_containing("enable_metrics") -- cgit 1.5.1 From 0d88c4f9030c50bb9e016d9a28f6db27f7913d0b Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Thu, 25 Nov 2021 16:14:54 +0100 Subject: Improve performance of `remove_{hidden,deleted}_devices_from_device_inbox` (#11421) Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/11421.bugfix | 1 + synapse/storage/databases/main/deviceinbox.py | 209 +++++++-------------- .../65/08_device_inbox_background_updates.sql | 18 ++ tests/storage/databases/main/test_deviceinbox.py | 4 +- 4 files changed, 84 insertions(+), 148 deletions(-) create mode 100644 changelog.d/11421.bugfix create mode 100644 synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql (limited to 'synapse/storage/databases') diff --git a/changelog.d/11421.bugfix b/changelog.d/11421.bugfix new file mode 100644 index 0000000000..28ac65ea7c --- /dev/null +++ b/changelog.d/11421.bugfix @@ -0,0 +1 @@ +Improve performance of various background database schema updates. diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 7c0f953365..ab8766c75b 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -599,6 +599,7 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox" REMOVE_HIDDEN_DEVICES = "remove_hidden_devices_from_device_inbox" + REMOVE_DEAD_DEVICES_FROM_INBOX = "remove_dead_devices_from_device_inbox" def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) @@ -614,14 +615,18 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox ) - self.db_pool.updates.register_background_update_handler( - self.REMOVE_DELETED_DEVICES, - self._remove_deleted_devices_from_device_inbox, + # Used to be a background update that deletes all device_inboxes for deleted + # devices. + self.db_pool.updates.register_noop_background_update( + self.REMOVE_DELETED_DEVICES ) + # Used to be a background update that deletes all device_inboxes for hidden + # devices. + self.db_pool.updates.register_noop_background_update(self.REMOVE_HIDDEN_DEVICES) self.db_pool.updates.register_background_update_handler( - self.REMOVE_HIDDEN_DEVICES, - self._remove_hidden_devices_from_device_inbox, + self.REMOVE_DEAD_DEVICES_FROM_INBOX, + self._remove_dead_devices_from_device_inbox, ) async def _background_drop_index_device_inbox(self, progress, batch_size): @@ -636,171 +641,83 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): return 1 - async def _remove_deleted_devices_from_device_inbox( - self, progress: JsonDict, batch_size: int + async def _remove_dead_devices_from_device_inbox( + self, + progress: JsonDict, + batch_size: int, ) -> int: - """A background update that deletes all device_inboxes for deleted devices. - - This should only need to be run once (when users upgrade to v1.47.0) + """A background update to remove devices that were either deleted or hidden from + the device_inbox table. Args: - progress: JsonDict used to store progress of this background update - batch_size: the maximum number of rows to retrieve in a single select query + progress: The update's progress dict. + batch_size: The batch size for this update. Returns: - The number of deleted rows + The number of rows deleted. """ - def _remove_deleted_devices_from_device_inbox_txn( + def _remove_dead_devices_from_device_inbox_txn( txn: LoggingTransaction, - ) -> int: - """stream_id is not unique - we need to use an inclusive `stream_id >= ?` clause, - since we might not have deleted all dead device messages for the stream_id - returned from the previous query + ) -> Tuple[int, bool]: - Then delete only rows matching the `(user_id, device_id, stream_id)` tuple, - to avoid problems of deleting a large number of rows all at once - due to a single device having lots of device messages. - """ + if "max_stream_id" in progress: + max_stream_id = progress["max_stream_id"] + else: + txn.execute("SELECT max(stream_id) FROM device_inbox") + # There's a type mismatch here between how we want to type the row and + # what fetchone says it returns, but we silence it because we know that + # res can't be None. + res: Tuple[Optional[int]] = txn.fetchone() # type: ignore[assignment] + if res[0] is None: + # this can only happen if the `device_inbox` table is empty, in which + # case we have no work to do. + return 0, True + else: + max_stream_id = res[0] - last_stream_id = progress.get("stream_id", 0) + start = progress.get("stream_id", 0) + stop = start + batch_size + # delete rows in `device_inbox` which do *not* correspond to a known, + # unhidden device. sql = """ - SELECT device_id, user_id, stream_id - FROM device_inbox + DELETE FROM device_inbox WHERE - stream_id >= ? - AND (device_id, user_id) NOT IN ( - SELECT device_id, user_id FROM devices + stream_id >= ? AND stream_id < ? + AND NOT EXISTS ( + SELECT * FROM devices d + WHERE + d.device_id=device_inbox.device_id + AND d.user_id=device_inbox.user_id + AND NOT hidden ) - ORDER BY stream_id - LIMIT ? - """ - - txn.execute(sql, (last_stream_id, batch_size)) - rows = txn.fetchall() + """ - num_deleted = 0 - for row in rows: - num_deleted += self.db_pool.simple_delete_txn( - txn, - "device_inbox", - {"device_id": row[0], "user_id": row[1], "stream_id": row[2]}, - ) + txn.execute(sql, (start, stop)) - if rows: - # send more than stream_id to progress - # otherwise it can happen in large deployments that - # no change of status is visible in the log file - # it may be that the stream_id does not change in several runs - self.db_pool.updates._background_update_progress_txn( - txn, - self.REMOVE_DELETED_DEVICES, - { - "device_id": rows[-1][0], - "user_id": rows[-1][1], - "stream_id": rows[-1][2], - }, - ) - - return num_deleted - - number_deleted = await self.db_pool.runInteraction( - "_remove_deleted_devices_from_device_inbox", - _remove_deleted_devices_from_device_inbox_txn, - ) - - # The task is finished when no more lines are deleted. - if not number_deleted: - await self.db_pool.updates._end_background_update( - self.REMOVE_DELETED_DEVICES + self.db_pool.updates._background_update_progress_txn( + txn, + self.REMOVE_DEAD_DEVICES_FROM_INBOX, + { + "stream_id": stop, + "max_stream_id": max_stream_id, + }, ) - return number_deleted - - async def _remove_hidden_devices_from_device_inbox( - self, progress: JsonDict, batch_size: int - ) -> int: - """A background update that deletes all device_inboxes for hidden devices. - - This should only need to be run once (when users upgrade to v1.47.0) - - Args: - progress: JsonDict used to store progress of this background update - batch_size: the maximum number of rows to retrieve in a single select query - - Returns: - The number of deleted rows - """ - - def _remove_hidden_devices_from_device_inbox_txn( - txn: LoggingTransaction, - ) -> int: - """stream_id is not unique - we need to use an inclusive `stream_id >= ?` clause, - since we might not have deleted all hidden device messages for the stream_id - returned from the previous query - - Then delete only rows matching the `(user_id, device_id, stream_id)` tuple, - to avoid problems of deleting a large number of rows all at once - due to a single device having lots of device messages. - """ - - last_stream_id = progress.get("stream_id", 0) - - sql = """ - SELECT device_id, user_id, stream_id - FROM device_inbox - WHERE - stream_id >= ? - AND (device_id, user_id) IN ( - SELECT device_id, user_id FROM devices WHERE hidden = ? - ) - ORDER BY stream_id - LIMIT ? - """ - - txn.execute(sql, (last_stream_id, True, batch_size)) - rows = txn.fetchall() - - num_deleted = 0 - for row in rows: - num_deleted += self.db_pool.simple_delete_txn( - txn, - "device_inbox", - {"device_id": row[0], "user_id": row[1], "stream_id": row[2]}, - ) - - if rows: - # We don't just save the `stream_id` in progress as - # otherwise it can happen in large deployments that - # no change of status is visible in the log file, as - # it may be that the stream_id does not change in several runs - self.db_pool.updates._background_update_progress_txn( - txn, - self.REMOVE_HIDDEN_DEVICES, - { - "device_id": rows[-1][0], - "user_id": rows[-1][1], - "stream_id": rows[-1][2], - }, - ) - - return num_deleted + return stop > max_stream_id - number_deleted = await self.db_pool.runInteraction( - "_remove_hidden_devices_from_device_inbox", - _remove_hidden_devices_from_device_inbox_txn, + finished = await self.db_pool.runInteraction( + "_remove_devices_from_device_inbox_txn", + _remove_dead_devices_from_device_inbox_txn, ) - # The task is finished when no more lines are deleted. - if not number_deleted: + if finished: await self.db_pool.updates._end_background_update( - self.REMOVE_HIDDEN_DEVICES + self.REMOVE_DEAD_DEVICES_FROM_INBOX, ) - return number_deleted + return batch_size class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): diff --git a/synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql b/synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql new file mode 100644 index 0000000000..d79455c2ce --- /dev/null +++ b/synapse/storage/schema/main/delta/65/08_device_inbox_background_updates.sql @@ -0,0 +1,18 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Background update to clear the inboxes of hidden and deleted devices. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6508, 'remove_dead_devices_from_device_inbox', '{}'); diff --git a/tests/storage/databases/main/test_deviceinbox.py b/tests/storage/databases/main/test_deviceinbox.py index 4b67bd15b7..36c933b9e9 100644 --- a/tests/storage/databases/main/test_deviceinbox.py +++ b/tests/storage/databases/main/test_deviceinbox.py @@ -66,7 +66,7 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): self.store.db_pool.simple_insert( "background_updates", { - "update_name": "remove_deleted_devices_from_device_inbox", + "update_name": "remove_dead_devices_from_device_inbox", "progress_json": "{}", }, ) @@ -140,7 +140,7 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): self.store.db_pool.simple_insert( "background_updates", { - "update_name": "remove_hidden_devices_from_device_inbox", + "update_name": "remove_dead_devices_from_device_inbox", "progress_json": "{}", }, ) -- cgit 1.5.1 From c675a18071ea86cb20bdc426a4c4c34bc629b6af Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 26 Nov 2021 13:47:24 +0000 Subject: Track ongoing event fetches correctly (again) (#11376) The previous fix for the ongoing event fetches counter (8eec25a1d9d656905db18a2c62a5552e63db2667) was both insufficient and incorrect. When the database is unreachable, `_do_fetch` never gets run and so `_event_fetch_ongoing` is never decremented. The previous fix also moved the `_event_fetch_ongoing` decrement outside of the `_event_fetch_lock` which allowed race conditions to corrupt the counter. --- changelog.d/11376.bugfix | 1 + synapse/storage/databases/main/events_worker.py | 154 +++++++++++++++------ tests/storage/databases/main/test_events_worker.py | 139 ++++++++++++++++++- 3 files changed, 251 insertions(+), 43 deletions(-) create mode 100644 changelog.d/11376.bugfix (limited to 'synapse/storage/databases') diff --git a/changelog.d/11376.bugfix b/changelog.d/11376.bugfix new file mode 100644 index 0000000000..639e48b59b --- /dev/null +++ b/changelog.d/11376.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where all requests that read events from the database could get stuck as a result of losing the database connection, for real this time. Also fix a race condition introduced in the previous insufficient fix in 1.47.0. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index c6bf316d5b..c6bcfe1c32 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -72,7 +72,7 @@ from synapse.util.metrics import Measure logger = logging.getLogger(__name__) -# These values are used in the `enqueus_event` and `_do_fetch` methods to +# These values are used in the `enqueue_event` and `_fetch_loop` methods to # control how we batch/bulk fetch events from the database. # The values are plucked out of thing air to make initial sync run faster # on jki.re @@ -602,7 +602,7 @@ class EventsWorkerStore(SQLBaseStore): # already due to `_get_events_from_db`). fetching_deferred: ObservableDeferred[ Dict[str, _EventCacheEntry] - ] = ObservableDeferred(defer.Deferred()) + ] = ObservableDeferred(defer.Deferred(), consumeErrors=True) for event_id in missing_events_ids: self._current_event_fetches[event_id] = fetching_deferred @@ -736,35 +736,118 @@ class EventsWorkerStore(SQLBaseStore): for e in state_to_include.values() ] - def _do_fetch(self, conn: Connection) -> None: + def _maybe_start_fetch_thread(self) -> None: + """Starts an event fetch thread if we are not yet at the maximum number.""" + with self._event_fetch_lock: + if ( + self._event_fetch_list + and self._event_fetch_ongoing < EVENT_QUEUE_THREADS + ): + self._event_fetch_ongoing += 1 + event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) + # `_event_fetch_ongoing` is decremented in `_fetch_thread`. + should_start = True + else: + should_start = False + + if should_start: + run_as_background_process("fetch_events", self._fetch_thread) + + async def _fetch_thread(self) -> None: + """Services requests for events from `_event_fetch_list`.""" + exc = None + try: + await self.db_pool.runWithConnection(self._fetch_loop) + except BaseException as e: + exc = e + raise + finally: + should_restart = False + event_fetches_to_fail = [] + with self._event_fetch_lock: + self._event_fetch_ongoing -= 1 + event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) + + # There may still be work remaining in `_event_fetch_list` if we + # failed, or it was added in between us deciding to exit and + # decrementing `_event_fetch_ongoing`. + if self._event_fetch_list: + if exc is None: + # We decided to exit, but then some more work was added + # before `_event_fetch_ongoing` was decremented. + # If a new event fetch thread was not started, we should + # restart ourselves since the remaining event fetch threads + # may take a while to get around to the new work. + # + # Unfortunately it is not possible to tell whether a new + # event fetch thread was started, so we restart + # unconditionally. If we are unlucky, we will end up with + # an idle fetch thread, but it will time out after + # `EVENT_QUEUE_ITERATIONS * EVENT_QUEUE_TIMEOUT_S` seconds + # in any case. + # + # Note that multiple fetch threads may run down this path at + # the same time. + should_restart = True + elif isinstance(exc, Exception): + if self._event_fetch_ongoing == 0: + # We were the last remaining fetcher and failed. + # Fail any outstanding fetches since no one else will + # handle them. + event_fetches_to_fail = self._event_fetch_list + self._event_fetch_list = [] + else: + # We weren't the last remaining fetcher, so another + # fetcher will pick up the work. This will either happen + # after their existing work, however long that takes, + # or after at most `EVENT_QUEUE_TIMEOUT_S` seconds if + # they are idle. + pass + else: + # The exception is a `SystemExit`, `KeyboardInterrupt` or + # `GeneratorExit`. Don't try to do anything clever here. + pass + + if should_restart: + # We exited cleanly but noticed more work. + self._maybe_start_fetch_thread() + + if event_fetches_to_fail: + # We were the last remaining fetcher and failed. + # Fail any outstanding fetches since no one else will handle them. + assert exc is not None + with PreserveLoggingContext(): + for _, deferred in event_fetches_to_fail: + deferred.errback(exc) + + def _fetch_loop(self, conn: Connection) -> None: """Takes a database connection and waits for requests for events from the _event_fetch_list queue. """ - try: - i = 0 - while True: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if ( - not self.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING - or single_threaded - or i > EVENT_QUEUE_ITERATIONS - ): - break - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 + i = 0 + while True: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + # There are no requests waiting. If we haven't yet reached the + # maximum iteration limit, wait for some more requests to turn up. + # Otherwise, bail out. + single_threaded = self.database_engine.single_threaded + if ( + not self.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING + or single_threaded + or i > EVENT_QUEUE_ITERATIONS + ): + return + + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 - self._fetch_event_list(conn, event_list) - finally: - self._event_fetch_ongoing -= 1 - event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) + self._fetch_event_list(conn, event_list) def _fetch_event_list( self, conn: Connection, event_list: List[Tuple[List[str], defer.Deferred]] @@ -806,9 +889,7 @@ class EventsWorkerStore(SQLBaseStore): # We only want to resolve deferreds from the main thread def fire(evs, exc): for _, d in evs: - if not d.called: - with PreserveLoggingContext(): - d.errback(exc) + d.errback(exc) with PreserveLoggingContext(): self.hs.get_reactor().callFromThread(fire, event_list, e) @@ -983,20 +1064,9 @@ class EventsWorkerStore(SQLBaseStore): events_d = defer.Deferred() with self._event_fetch_lock: self._event_fetch_list.append((events, events_d)) - self._event_fetch_lock.notify() - if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: - self._event_fetch_ongoing += 1 - event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) - should_start = True - else: - should_start = False - - if should_start: - run_as_background_process( - "fetch_events", self.db_pool.runWithConnection, self._do_fetch - ) + self._maybe_start_fetch_thread() logger.debug("Loading %d events: %s", len(events), events) with PreserveLoggingContext(): diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py index a649e8c618..5ae491ff5a 100644 --- a/tests/storage/databases/main/test_events_worker.py +++ b/tests/storage/databases/main/test_events_worker.py @@ -12,11 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. import json +from contextlib import contextmanager +from typing import Generator +from twisted.enterprise.adbapi import ConnectionPool +from twisted.internet.defer import ensureDeferred +from twisted.test.proto_helpers import MemoryReactor + +from synapse.api.room_versions import EventFormatVersions, RoomVersions from synapse.logging.context import LoggingContext from synapse.rest import admin from synapse.rest.client import login, room -from synapse.storage.databases.main.events_worker import EventsWorkerStore +from synapse.server import HomeServer +from synapse.storage.databases.main.events_worker import ( + EVENT_QUEUE_THREADS, + EventsWorkerStore, +) +from synapse.storage.types import Connection +from synapse.util import Clock from synapse.util.async_helpers import yieldable_gather_results from tests import unittest @@ -144,3 +157,127 @@ class EventCacheTestCase(unittest.HomeserverTestCase): # We should have fetched the event from the DB self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1) + + +class DatabaseOutageTestCase(unittest.HomeserverTestCase): + """Test event fetching during a database outage.""" + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer): + self.store: EventsWorkerStore = hs.get_datastore() + + self.room_id = f"!room:{hs.hostname}" + self.event_ids = [f"event{i}" for i in range(20)] + + self._populate_events() + + def _populate_events(self) -> None: + """Ensure that there are test events in the database. + + When testing with the in-memory SQLite database, all the events are lost during + the simulated outage. + + To ensure consistency between `room_id`s and `event_id`s before and after the + outage, rows are built and inserted manually. + + Upserts are used to handle the non-SQLite case where events are not lost. + """ + self.get_success( + self.store.db_pool.simple_upsert( + "rooms", + {"room_id": self.room_id}, + {"room_version": RoomVersions.V4.identifier}, + ) + ) + + self.event_ids = [f"event{i}" for i in range(20)] + for idx, event_id in enumerate(self.event_ids): + self.get_success( + self.store.db_pool.simple_upsert( + "events", + {"event_id": event_id}, + { + "event_id": event_id, + "room_id": self.room_id, + "topological_ordering": idx, + "stream_ordering": idx, + "type": "test", + "processed": True, + "outlier": False, + }, + ) + ) + self.get_success( + self.store.db_pool.simple_upsert( + "event_json", + {"event_id": event_id}, + { + "room_id": self.room_id, + "json": json.dumps({"type": "test", "room_id": self.room_id}), + "internal_metadata": "{}", + "format_version": EventFormatVersions.V3, + }, + ) + ) + + @contextmanager + def _outage(self) -> Generator[None, None, None]: + """Simulate a database outage. + + Returns: + A context manager. While the context is active, any attempts to connect to + the database will fail. + """ + connection_pool = self.store.db_pool._db_pool + + # Close all connections and shut down the database `ThreadPool`. + connection_pool.close() + + # Restart the database `ThreadPool`. + connection_pool.start() + + original_connection_factory = connection_pool.connectionFactory + + def connection_factory(_pool: ConnectionPool) -> Connection: + raise Exception("Could not connect to the database.") + + connection_pool.connectionFactory = connection_factory # type: ignore[assignment] + try: + yield + finally: + connection_pool.connectionFactory = original_connection_factory + + # If the in-memory SQLite database is being used, all the events are gone. + # Restore the test data. + self._populate_events() + + def test_failure(self) -> None: + """Test that event fetches do not get stuck during a database outage.""" + with self._outage(): + failure = self.get_failure( + self.store.get_event(self.event_ids[0]), Exception + ) + self.assertEqual(str(failure.value), "Could not connect to the database.") + + def test_recovery(self) -> None: + """Test that event fetchers recover after a database outage.""" + with self._outage(): + # Kick off a bunch of event fetches but do not pump the reactor + event_deferreds = [] + for event_id in self.event_ids: + event_deferreds.append(ensureDeferred(self.store.get_event(event_id))) + + # We should have maxed out on event fetcher threads + self.assertEqual(self.store._event_fetch_ongoing, EVENT_QUEUE_THREADS) + + # All the event fetchers will fail + self.pump() + self.assertEqual(self.store._event_fetch_ongoing, 0) + + for event_deferred in event_deferreds: + failure = self.get_failure(event_deferred, Exception) + self.assertEqual( + str(failure.value), "Could not connect to the database." + ) + + # This next event fetch should succeed + self.get_success(self.store.get_event(self.event_ids[0])) -- cgit 1.5.1 From 1d8b80b3346b31a297668e093fb813d9ce7a1b48 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Fri, 26 Nov 2021 14:27:14 +0000 Subject: Support expiry of refresh tokens and expiry of the overall session when refresh tokens are in use. (#11425) --- changelog.d/11425.feature | 1 + synapse/config/registration.py | 24 ++-- synapse/handlers/auth.py | 90 +++++++++++++-- synapse/handlers/register.py | 44 ++++++-- synapse/rest/client/login.py | 52 ++++++--- synapse/storage/databases/main/registration.py | 28 ++++- .../main/delta/65/10_expirable_refresh_tokens.sql | 28 +++++ tests/rest/client/test_auth.py | 125 ++++++++++++++++++++- 8 files changed, 338 insertions(+), 54 deletions(-) create mode 100644 changelog.d/11425.feature create mode 100644 synapse/storage/schema/main/delta/65/10_expirable_refresh_tokens.sql (limited to 'synapse/storage/databases') diff --git a/changelog.d/11425.feature b/changelog.d/11425.feature new file mode 100644 index 0000000000..806dd5d91c --- /dev/null +++ b/changelog.d/11425.feature @@ -0,0 +1 @@ +Support expiry of refresh tokens and expiry of the overall session when refresh tokens are in use. \ No newline at end of file diff --git a/synapse/config/registration.py b/synapse/config/registration.py index 61e569d412..5e21548060 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -113,14 +113,11 @@ class RegistrationConfig(Config): self.session_lifetime = session_lifetime # The `refreshable_access_token_lifetime` applies for tokens that can be renewed - # using a refresh token, as per MSC2918. If it is `None`, the refresh - # token mechanism is disabled. - # - # Since it is incompatible with the `session_lifetime` mechanism, it is set to - # `None` by default if a `session_lifetime` is set. + # using a refresh token, as per MSC2918. + # If it is `None`, the refresh token mechanism is disabled. refreshable_access_token_lifetime = config.get( "refreshable_access_token_lifetime", - "5m" if session_lifetime is None else None, + "5m", ) if refreshable_access_token_lifetime is not None: refreshable_access_token_lifetime = self.parse_duration( @@ -128,17 +125,10 @@ class RegistrationConfig(Config): ) self.refreshable_access_token_lifetime = refreshable_access_token_lifetime - if ( - session_lifetime is not None - and refreshable_access_token_lifetime is not None - ): - raise ConfigError( - "The refresh token mechanism is incompatible with the " - "`session_lifetime` option. Consider disabling the " - "`session_lifetime` option or disabling the refresh token " - "mechanism by removing the `refreshable_access_token_lifetime` " - "option." - ) + refresh_token_lifetime = config.get("refresh_token_lifetime") + if refresh_token_lifetime is not None: + refresh_token_lifetime = self.parse_duration(refresh_token_lifetime) + self.refresh_token_lifetime = refresh_token_lifetime # The fallback template used for authenticating using a registration token self.registration_token_template = self.read_template("registration_token.html") diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 4b66a9862f..4d9c4e5834 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -18,6 +18,7 @@ import time import unicodedata import urllib.parse from binascii import crc32 +from http import HTTPStatus from typing import ( TYPE_CHECKING, Any, @@ -756,53 +757,109 @@ class AuthHandler: async def refresh_token( self, refresh_token: str, - valid_until_ms: Optional[int], - ) -> Tuple[str, str]: + access_token_valid_until_ms: Optional[int], + refresh_token_valid_until_ms: Optional[int], + ) -> Tuple[str, str, Optional[int]]: """ Consumes a refresh token and generate both a new access token and a new refresh token from it. The consumed refresh token is considered invalid after the first use of the new access token or the new refresh token. + The lifetime of both the access token and refresh token will be capped so that they + do not exceed the session's ultimate expiry time, if applicable. + Args: refresh_token: The token to consume. - valid_until_ms: The expiration timestamp of the new access token. - + access_token_valid_until_ms: The expiration timestamp of the new access token. + None if the access token does not expire. + refresh_token_valid_until_ms: The expiration timestamp of the new refresh token. + None if the refresh token does not expire. Returns: - A tuple containing the new access token and refresh token + A tuple containing: + - the new access token + - the new refresh token + - the actual expiry time of the access token, which may be earlier than + `access_token_valid_until_ms`. """ # Verify the token signature first before looking up the token if not self._verify_refresh_token(refresh_token): - raise SynapseError(401, "invalid refresh token", Codes.UNKNOWN_TOKEN) + raise SynapseError( + HTTPStatus.UNAUTHORIZED, "invalid refresh token", Codes.UNKNOWN_TOKEN + ) existing_token = await self.store.lookup_refresh_token(refresh_token) if existing_token is None: - raise SynapseError(401, "refresh token does not exist", Codes.UNKNOWN_TOKEN) + raise SynapseError( + HTTPStatus.UNAUTHORIZED, + "refresh token does not exist", + Codes.UNKNOWN_TOKEN, + ) if ( existing_token.has_next_access_token_been_used or existing_token.has_next_refresh_token_been_refreshed ): raise SynapseError( - 403, "refresh token isn't valid anymore", Codes.FORBIDDEN + HTTPStatus.FORBIDDEN, + "refresh token isn't valid anymore", + Codes.FORBIDDEN, + ) + + now_ms = self._clock.time_msec() + + if existing_token.expiry_ts is not None and existing_token.expiry_ts < now_ms: + + raise SynapseError( + HTTPStatus.FORBIDDEN, + "The supplied refresh token has expired", + Codes.FORBIDDEN, ) + if existing_token.ultimate_session_expiry_ts is not None: + # This session has a bounded lifetime, even across refreshes. + + if access_token_valid_until_ms is not None: + access_token_valid_until_ms = min( + access_token_valid_until_ms, + existing_token.ultimate_session_expiry_ts, + ) + else: + access_token_valid_until_ms = existing_token.ultimate_session_expiry_ts + + if refresh_token_valid_until_ms is not None: + refresh_token_valid_until_ms = min( + refresh_token_valid_until_ms, + existing_token.ultimate_session_expiry_ts, + ) + else: + refresh_token_valid_until_ms = existing_token.ultimate_session_expiry_ts + if existing_token.ultimate_session_expiry_ts < now_ms: + raise SynapseError( + HTTPStatus.FORBIDDEN, + "The session has expired and can no longer be refreshed", + Codes.FORBIDDEN, + ) + ( new_refresh_token, new_refresh_token_id, ) = await self.create_refresh_token_for_user_id( - user_id=existing_token.user_id, device_id=existing_token.device_id + user_id=existing_token.user_id, + device_id=existing_token.device_id, + expiry_ts=refresh_token_valid_until_ms, + ultimate_session_expiry_ts=existing_token.ultimate_session_expiry_ts, ) access_token = await self.create_access_token_for_user_id( user_id=existing_token.user_id, device_id=existing_token.device_id, - valid_until_ms=valid_until_ms, + valid_until_ms=access_token_valid_until_ms, refresh_token_id=new_refresh_token_id, ) await self.store.replace_refresh_token( existing_token.token_id, new_refresh_token_id ) - return access_token, new_refresh_token + return access_token, new_refresh_token, access_token_valid_until_ms def _verify_refresh_token(self, token: str) -> bool: """ @@ -836,6 +893,8 @@ class AuthHandler: self, user_id: str, device_id: str, + expiry_ts: Optional[int], + ultimate_session_expiry_ts: Optional[int], ) -> Tuple[str, int]: """ Creates a new refresh token for the user with the given user ID. @@ -843,6 +902,13 @@ class AuthHandler: Args: user_id: canonical user ID device_id: the device ID to associate with the token. + expiry_ts (milliseconds since the epoch): Time after which the + refresh token cannot be used. + If None, the refresh token never expires until it has been used. + ultimate_session_expiry_ts (milliseconds since the epoch): + Time at which the session will end and can not be extended any + further. + If None, the session can be refreshed indefinitely. Returns: The newly created refresh token and its ID in the database @@ -852,6 +918,8 @@ class AuthHandler: user_id=user_id, token=refresh_token, device_id=device_id, + expiry_ts=expiry_ts, + ultimate_session_expiry_ts=ultimate_session_expiry_ts, ) return refresh_token, refresh_token_id diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 448a36108e..8136ae264d 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -119,6 +119,7 @@ class RegistrationHandler: self.refreshable_access_token_lifetime = ( hs.config.registration.refreshable_access_token_lifetime ) + self.refresh_token_lifetime = hs.config.registration.refresh_token_lifetime init_counters_for_auth_provider("") @@ -793,13 +794,13 @@ class RegistrationHandler: class and RegisterDeviceReplicationServlet. """ assert not self.hs.config.worker.worker_app - valid_until_ms = None + access_token_expiry = None if self.session_lifetime is not None: if is_guest: raise Exception( "session_lifetime is not currently implemented for guest access" ) - valid_until_ms = self.clock.time_msec() + self.session_lifetime + access_token_expiry = self.clock.time_msec() + self.session_lifetime refresh_token = None refresh_token_id = None @@ -808,25 +809,52 @@ class RegistrationHandler: user_id, device_id, initial_display_name ) if is_guest: - assert valid_until_ms is None + assert access_token_expiry is None access_token = self.macaroon_gen.generate_guest_access_token(user_id) else: if should_issue_refresh_token: + now_ms = self.clock.time_msec() + + # Set the expiry time of the refreshable access token + access_token_expiry = now_ms + self.refreshable_access_token_lifetime + + # Set the refresh token expiry time (if configured) + refresh_token_expiry = None + if self.refresh_token_lifetime is not None: + refresh_token_expiry = now_ms + self.refresh_token_lifetime + + # Set an ultimate session expiry time (if configured) + ultimate_session_expiry_ts = None + if self.session_lifetime is not None: + ultimate_session_expiry_ts = now_ms + self.session_lifetime + + # Also ensure that the issued tokens don't outlive the + # session. + # (It would be weird to configure a homeserver with a shorter + # session lifetime than token lifetime, but may as well handle + # it.) + access_token_expiry = min( + access_token_expiry, ultimate_session_expiry_ts + ) + if refresh_token_expiry is not None: + refresh_token_expiry = min( + refresh_token_expiry, ultimate_session_expiry_ts + ) + ( refresh_token, refresh_token_id, ) = await self._auth_handler.create_refresh_token_for_user_id( user_id, device_id=registered_device_id, - ) - valid_until_ms = ( - self.clock.time_msec() + self.refreshable_access_token_lifetime + expiry_ts=refresh_token_expiry, + ultimate_session_expiry_ts=ultimate_session_expiry_ts, ) access_token = await self._auth_handler.create_access_token_for_user_id( user_id, device_id=registered_device_id, - valid_until_ms=valid_until_ms, + valid_until_ms=access_token_expiry, is_appservice_ghost=is_appservice_ghost, refresh_token_id=refresh_token_id, ) @@ -834,7 +862,7 @@ class RegistrationHandler: return { "device_id": registered_device_id, "access_token": access_token, - "valid_until_ms": valid_until_ms, + "valid_until_ms": access_token_expiry, "refresh_token": refresh_token, } diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index 67e03dca04..c982e54156 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -14,7 +14,17 @@ import logging import re -from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Awaitable, + Callable, + Dict, + List, + Optional, + Tuple, + Union, +) from typing_extensions import TypedDict @@ -458,6 +468,7 @@ class RefreshTokenServlet(RestServlet): self.refreshable_access_token_lifetime = ( hs.config.registration.refreshable_access_token_lifetime ) + self.refresh_token_lifetime = hs.config.registration.refresh_token_lifetime async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: refresh_submission = parse_json_object_from_request(request) @@ -467,22 +478,33 @@ class RefreshTokenServlet(RestServlet): if not isinstance(token, str): raise SynapseError(400, "Invalid param: refresh_token", Codes.INVALID_PARAM) - valid_until_ms = ( - self._clock.time_msec() + self.refreshable_access_token_lifetime - ) - access_token, refresh_token = await self._auth_handler.refresh_token( - token, valid_until_ms - ) - expires_in_ms = valid_until_ms - self._clock.time_msec() - return ( - 200, - { - "access_token": access_token, - "refresh_token": refresh_token, - "expires_in_ms": expires_in_ms, - }, + now = self._clock.time_msec() + access_valid_until_ms = None + if self.refreshable_access_token_lifetime is not None: + access_valid_until_ms = now + self.refreshable_access_token_lifetime + refresh_valid_until_ms = None + if self.refresh_token_lifetime is not None: + refresh_valid_until_ms = now + self.refresh_token_lifetime + + ( + access_token, + refresh_token, + actual_access_token_expiry, + ) = await self._auth_handler.refresh_token( + token, access_valid_until_ms, refresh_valid_until_ms ) + response: Dict[str, Union[str, int]] = { + "access_token": access_token, + "refresh_token": refresh_token, + } + + # expires_in_ms is only present if the token expires + if actual_access_token_expiry is not None: + response["expires_in_ms"] = actual_access_token_expiry - now + + return 200, response + class SsoRedirectServlet(RestServlet): PATTERNS = list(client_patterns("/login/(cas|sso)/redirect$", v1=True)) + [ diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 0e8c168667..e1ddf06916 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -106,6 +106,15 @@ class RefreshTokenLookupResult: has_next_access_token_been_used: bool """True if the next access token was already used at least once.""" + expiry_ts: Optional[int] + """The time at which the refresh token expires and can not be used. + If None, the refresh token doesn't expire.""" + + ultimate_session_expiry_ts: Optional[int] + """The time at which the session comes to an end and can no longer be + refreshed. + If None, the session can be refreshed indefinitely.""" + class RegistrationWorkerStore(CacheInvalidationWorkerStore): def __init__( @@ -1626,8 +1635,10 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): rt.user_id, rt.device_id, rt.next_token_id, - (nrt.next_token_id IS NOT NULL) has_next_refresh_token_been_refreshed, - at.used has_next_access_token_been_used + (nrt.next_token_id IS NOT NULL) AS has_next_refresh_token_been_refreshed, + at.used AS has_next_access_token_been_used, + rt.expiry_ts, + rt.ultimate_session_expiry_ts FROM refresh_tokens rt LEFT JOIN refresh_tokens nrt ON rt.next_token_id = nrt.id LEFT JOIN access_tokens at ON at.refresh_token_id = nrt.id @@ -1648,6 +1659,8 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): has_next_refresh_token_been_refreshed=row[4], # This column is nullable, ensure it's a boolean has_next_access_token_been_used=(row[5] or False), + expiry_ts=row[6], + ultimate_session_expiry_ts=row[7], ) return await self.db_pool.runInteraction( @@ -1915,6 +1928,8 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): user_id: str, token: str, device_id: Optional[str], + expiry_ts: Optional[int], + ultimate_session_expiry_ts: Optional[int], ) -> int: """Adds a refresh token for the given user. @@ -1922,6 +1937,13 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): user_id: The user ID. token: The new access token to add. device_id: ID of the device to associate with the refresh token. + expiry_ts (milliseconds since the epoch): Time after which the + refresh token cannot be used. + If None, the refresh token never expires until it has been used. + ultimate_session_expiry_ts (milliseconds since the epoch): + Time at which the session will end and can not be extended any + further. + If None, the session can be refreshed indefinitely. Raises: StoreError if there was a problem adding this. Returns: @@ -1937,6 +1959,8 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): "device_id": device_id, "token": token, "next_token_id": None, + "expiry_ts": expiry_ts, + "ultimate_session_expiry_ts": ultimate_session_expiry_ts, }, desc="add_refresh_token_to_user", ) diff --git a/synapse/storage/schema/main/delta/65/10_expirable_refresh_tokens.sql b/synapse/storage/schema/main/delta/65/10_expirable_refresh_tokens.sql new file mode 100644 index 0000000000..bdc491c817 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/10_expirable_refresh_tokens.sql @@ -0,0 +1,28 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +ALTER TABLE refresh_tokens + -- We add an expiry_ts column (in milliseconds since the Epoch) to refresh tokens. + -- They may not be used after they have expired. + -- If null, then the refresh token's lifetime is unlimited. + ADD COLUMN expiry_ts BIGINT DEFAULT NULL; + +ALTER TABLE refresh_tokens + -- We also add an ultimate session expiry time (in milliseconds since the Epoch). + -- No matter how much the access and refresh tokens are refreshed, they cannot + -- be extended past this time. + -- If null, then the session length is unlimited. + ADD COLUMN ultimate_session_expiry_ts BIGINT DEFAULT NULL; diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py index 8552671431..8045b7f76a 100644 --- a/tests/rest/client/test_auth.py +++ b/tests/rest/client/test_auth.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from http import HTTPStatus from typing import Optional, Union from twisted.internet.defer import succeed @@ -513,6 +514,16 @@ class RefreshAuthTests(unittest.HomeserverTestCase): self.user_pass = "pass" self.user = self.register_user("test", self.user_pass) + def use_refresh_token(self, refresh_token: str) -> FakeChannel: + """ + Helper that makes a request to use a refresh token. + """ + return self.make_request( + "POST", + "/_matrix/client/unstable/org.matrix.msc2918.refresh_token/refresh", + {"refresh_token": refresh_token}, + ) + def test_login_issue_refresh_token(self): """ A login response should include a refresh_token only if asked. @@ -599,7 +610,7 @@ class RefreshAuthTests(unittest.HomeserverTestCase): ) @override_config({"refreshable_access_token_lifetime": "1m"}) - def test_refresh_token_expiration(self): + def test_refreshable_access_token_expiration(self): """ The access token should have some time as specified in the config. """ @@ -623,6 +634,118 @@ class RefreshAuthTests(unittest.HomeserverTestCase): self.assertApproximates( refresh_response.json_body["expires_in_ms"], 60 * 1000, 100 ) + access_token = refresh_response.json_body["access_token"] + + # Advance 59 seconds in the future (just shy of 1 minute, the time of expiry) + self.reactor.advance(59.0) + # Check that our token is valid + self.assertEqual( + self.make_request( + "GET", "/_matrix/client/v3/account/whoami", access_token=access_token + ).code, + HTTPStatus.OK, + ) + + # Advance 2 more seconds (just past the time of expiry) + self.reactor.advance(2.0) + # Check that our token is invalid + self.assertEqual( + self.make_request( + "GET", "/_matrix/client/v3/account/whoami", access_token=access_token + ).code, + HTTPStatus.UNAUTHORIZED, + ) + + @override_config( + {"refreshable_access_token_lifetime": "1m", "refresh_token_lifetime": "2m"} + ) + def test_refresh_token_expiry(self): + """ + The refresh token can be configured to have a limited lifetime. + When that lifetime has ended, the refresh token can no longer be used to + refresh the session. + """ + + body = {"type": "m.login.password", "user": "test", "password": self.user_pass} + login_response = self.make_request( + "POST", + "/_matrix/client/r0/login?org.matrix.msc2918.refresh_token=true", + body, + ) + self.assertEqual(login_response.code, HTTPStatus.OK, login_response.result) + refresh_token1 = login_response.json_body["refresh_token"] + + # Advance 119 seconds in the future (just shy of 2 minutes) + self.reactor.advance(119.0) + + # Refresh our session. The refresh token should still JUST be valid right now. + # By doing so, we get a new access token and a new refresh token. + refresh_response = self.use_refresh_token(refresh_token1) + self.assertEqual(refresh_response.code, HTTPStatus.OK, refresh_response.result) + self.assertIn( + "refresh_token", + refresh_response.json_body, + "No new refresh token returned after refresh.", + ) + refresh_token2 = refresh_response.json_body["refresh_token"] + + # Advance 121 seconds in the future (just a bit more than 2 minutes) + self.reactor.advance(121.0) + + # Try to refresh our session, but instead notice that the refresh token is + # not valid (it just expired). + refresh_response = self.use_refresh_token(refresh_token2) + self.assertEqual( + refresh_response.code, HTTPStatus.FORBIDDEN, refresh_response.result + ) + + @override_config( + { + "refreshable_access_token_lifetime": "2m", + "refresh_token_lifetime": "2m", + "session_lifetime": "3m", + } + ) + def test_ultimate_session_expiry(self): + """ + The session can be configured to have an ultimate, limited lifetime. + """ + + body = {"type": "m.login.password", "user": "test", "password": self.user_pass} + login_response = self.make_request( + "POST", + "/_matrix/client/r0/login?org.matrix.msc2918.refresh_token=true", + body, + ) + self.assertEqual(login_response.code, 200, login_response.result) + refresh_token = login_response.json_body["refresh_token"] + + # Advance shy of 2 minutes into the future + self.reactor.advance(119.0) + + # Refresh our session. The refresh token should still be valid right now. + refresh_response = self.use_refresh_token(refresh_token) + self.assertEqual(refresh_response.code, 200, refresh_response.result) + self.assertIn( + "refresh_token", + refresh_response.json_body, + "No new refresh token returned after refresh.", + ) + # Notice that our access token lifetime has been diminished to match the + # session lifetime. + # 3 minutes - 119 seconds = 61 seconds. + self.assertEqual(refresh_response.json_body["expires_in_ms"], 61_000) + refresh_token = refresh_response.json_body["refresh_token"] + + # Advance 61 seconds into the future. Our session should have expired + # now, because we've had our 3 minutes. + self.reactor.advance(61.0) + + # Try to issue a new, refreshed, access token. + # This should fail because the refresh token's lifetime has also been + # diminished as our session expired. + refresh_response = self.use_refresh_token(refresh_token) + self.assertEqual(refresh_response.code, 403, refresh_response.result) def test_refresh_token_invalidation(self): """Refresh tokens are invalidated after first use of the next token. -- cgit 1.5.1 From ffd858aa68239aeaf06591d94c0ab1b3c185440f Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 26 Nov 2021 18:41:31 +0000 Subject: Add type hints to `synapse/storage/databases/main/events_worker.py` (#11411) Also refactor the stream ID trackers/generators a bit and try to document them better. --- changelog.d/11411.misc | 1 + mypy.ini | 4 +- .../slave/storage/_slaved_id_tracker.py | 22 +-- synapse/replication/slave/storage/push_rule.py | 4 - synapse/replication/tcp/streams/events.py | 6 +- synapse/state/__init__.py | 2 +- synapse/state/v1.py | 3 +- synapse/storage/_base.py | 4 +- synapse/storage/databases/main/events.py | 29 +-- synapse/storage/databases/main/events_worker.py | 218 ++++++++++++++------- synapse/storage/databases/main/push_rule.py | 11 +- synapse/storage/util/id_generators.py | 116 ++++++----- tests/replication/test_sharded_event_persister.py | 6 +- 13 files changed, 255 insertions(+), 171 deletions(-) create mode 100644 changelog.d/11411.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11411.misc b/changelog.d/11411.misc new file mode 100644 index 0000000000..86594a332d --- /dev/null +++ b/changelog.d/11411.misc @@ -0,0 +1 @@ +Add type hints to storage classes. diff --git a/mypy.ini b/mypy.ini index bc4f59154d..eb3976e74c 100644 --- a/mypy.ini +++ b/mypy.ini @@ -33,7 +33,6 @@ exclude = (?x) |synapse/storage/databases/main/event_federation.py |synapse/storage/databases/main/event_push_actions.py |synapse/storage/databases/main/events_bg_updates.py - |synapse/storage/databases/main/events_worker.py |synapse/storage/databases/main/group_server.py |synapse/storage/databases/main/metrics.py |synapse/storage/databases/main/monthly_active_users.py @@ -184,6 +183,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.directory] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.events_worker] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.room_batch] disallow_untyped_defs = True diff --git a/synapse/replication/slave/storage/_slaved_id_tracker.py b/synapse/replication/slave/storage/_slaved_id_tracker.py index 8c1bf9227a..fa132d10b4 100644 --- a/synapse/replication/slave/storage/_slaved_id_tracker.py +++ b/synapse/replication/slave/storage/_slaved_id_tracker.py @@ -14,10 +14,18 @@ from typing import List, Optional, Tuple from synapse.storage.database import LoggingDatabaseConnection -from synapse.storage.util.id_generators import _load_current_id +from synapse.storage.util.id_generators import AbstractStreamIdTracker, _load_current_id -class SlavedIdTracker: +class SlavedIdTracker(AbstractStreamIdTracker): + """Tracks the "current" stream ID of a stream with a single writer. + + See `AbstractStreamIdTracker` for more details. + + Note that this class does not work correctly when there are multiple + writers. + """ + def __init__( self, db_conn: LoggingDatabaseConnection, @@ -36,17 +44,7 @@ class SlavedIdTracker: self._current = (max if self.step > 0 else min)(self._current, new_id) def get_current_token(self) -> int: - """ - - Returns: - int - """ return self._current def get_current_token_for_writer(self, instance_name: str) -> int: - """Returns the position of the given writer. - - For streams with single writers this is equivalent to - `get_current_token`. - """ return self.get_current_token() diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 4d5f862862..7541e21de9 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import PushRulesStream from synapse.storage.databases.main.push_rule import PushRulesWorkerStore @@ -25,9 +24,6 @@ class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore): return self._push_rules_stream_id_gen.get_current_token() def process_replication_rows(self, stream_name, instance_name, token, rows): - # We assert this for the benefit of mypy - assert isinstance(self._push_rules_stream_id_gen, SlavedIdTracker) - if stream_name == PushRulesStream.NAME: self._push_rules_stream_id_gen.advance(instance_name, token) for row in rows: diff --git a/synapse/replication/tcp/streams/events.py b/synapse/replication/tcp/streams/events.py index a030e9299e..a390cfcb74 100644 --- a/synapse/replication/tcp/streams/events.py +++ b/synapse/replication/tcp/streams/events.py @@ -14,7 +14,7 @@ # limitations under the License. import heapq from collections.abc import Iterable -from typing import TYPE_CHECKING, List, Optional, Tuple, Type +from typing import TYPE_CHECKING, Optional, Tuple, Type import attr @@ -157,7 +157,7 @@ class EventsStream(Stream): # now we fetch up to that many rows from the events table - event_rows: List[Tuple] = await self._store.get_all_new_forward_event_rows( + event_rows = await self._store.get_all_new_forward_event_rows( instance_name, from_token, current_token, target_row_count ) @@ -191,7 +191,7 @@ class EventsStream(Stream): # finally, fetch the ex-outliers rows. We assume there are few enough of these # not to bother with the limit. - ex_outliers_rows: List[Tuple] = await self._store.get_ex_outlier_stream_rows( + ex_outliers_rows = await self._store.get_ex_outlier_stream_rows( instance_name, from_token, upper_limit ) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 1605411b00..446204dbe5 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -764,7 +764,7 @@ class StateResolutionStore: store: "DataStore" def get_events( - self, event_ids: Iterable[str], allow_rejected: bool = False + self, event_ids: Collection[str], allow_rejected: bool = False ) -> Awaitable[Dict[str, EventBase]]: """Get events from the database diff --git a/synapse/state/v1.py b/synapse/state/v1.py index 6edadea550..499a328201 100644 --- a/synapse/state/v1.py +++ b/synapse/state/v1.py @@ -17,6 +17,7 @@ import logging from typing import ( Awaitable, Callable, + Collection, Dict, Iterable, List, @@ -44,7 +45,7 @@ async def resolve_events_with_store( room_version: RoomVersion, state_sets: Sequence[StateMap[str]], event_map: Optional[Dict[str, EventBase]], - state_map_factory: Callable[[Iterable[str]], Awaitable[Dict[str, EventBase]]], + state_map_factory: Callable[[Collection[str]], Awaitable[Dict[str, EventBase]]], ) -> StateMap[str]: """ Args: diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 0623da9aa1..3056e64ff5 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -21,7 +21,7 @@ from synapse.storage.database import LoggingTransaction # noqa: F401 from synapse.storage.database import make_in_list_sql_clause # noqa: F401 from synapse.storage.database import DatabasePool from synapse.storage.types import Connection -from synapse.types import StreamToken, get_domain_from_id +from synapse.types import get_domain_from_id from synapse.util import json_decoder if TYPE_CHECKING: @@ -48,7 +48,7 @@ class SQLBaseStore(metaclass=ABCMeta): self, stream_name: str, instance_name: str, - token: StreamToken, + token: int, rows: Iterable[Any], ) -> None: pass diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 06832221ad..c3440de2cb 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -15,7 +15,7 @@ # limitations under the License. import itertools import logging -from collections import OrderedDict, namedtuple +from collections import OrderedDict from typing import ( TYPE_CHECKING, Any, @@ -41,9 +41,10 @@ from synapse.events.snapshot import EventContext # noqa: F401 from synapse.logging.utils import log_function from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import DatabasePool, LoggingTransaction +from synapse.storage.databases.main.events_worker import EventCacheEntry from synapse.storage.databases.main.search import SearchEntry from synapse.storage.types import Connection -from synapse.storage.util.id_generators import MultiWriterIdGenerator +from synapse.storage.util.id_generators import AbstractStreamIdGenerator from synapse.storage.util.sequence import SequenceGenerator from synapse.types import StateMap, get_domain_from_id from synapse.util import json_encoder @@ -64,9 +65,6 @@ event_counter = Counter( ) -_EventCacheEntry = namedtuple("_EventCacheEntry", ("event", "redacted_event")) - - @attr.s(slots=True) class DeltaState: """Deltas to use to update the `current_state_events` table. @@ -108,16 +106,21 @@ class PersistEventsStore: self._ephemeral_messages_enabled = hs.config.server.enable_ephemeral_messages self.is_mine_id = hs.is_mine_id - # Ideally we'd move these ID gens here, unfortunately some other ID - # generators are chained off them so doing so is a bit of a PITA. - self._backfill_id_gen: MultiWriterIdGenerator = self.store._backfill_id_gen - self._stream_id_gen: MultiWriterIdGenerator = self.store._stream_id_gen - # This should only exist on instances that are configured to write assert ( hs.get_instance_name() in hs.config.worker.writers.events ), "Can only instantiate EventsStore on master" + # Since we have been configured to write, we ought to have id generators, + # rather than id trackers. + assert isinstance(self.store._backfill_id_gen, AbstractStreamIdGenerator) + assert isinstance(self.store._stream_id_gen, AbstractStreamIdGenerator) + + # Ideally we'd move these ID gens here, unfortunately some other ID + # generators are chained off them so doing so is a bit of a PITA. + self._backfill_id_gen: AbstractStreamIdGenerator = self.store._backfill_id_gen + self._stream_id_gen: AbstractStreamIdGenerator = self.store._stream_id_gen + async def _persist_events_and_state_updates( self, events_and_contexts: List[Tuple[EventBase, EventContext]], @@ -1553,11 +1556,13 @@ class PersistEventsStore: for row in rows: event = ev_map[row["event_id"]] if not row["rejects"] and not row["redacts"]: - to_prefill.append(_EventCacheEntry(event=event, redacted_event=None)) + to_prefill.append(EventCacheEntry(event=event, redacted_event=None)) def prefill(): for cache_entry in to_prefill: - self.store._get_event_cache.set((cache_entry[0].event_id,), cache_entry) + self.store._get_event_cache.set( + (cache_entry.event.event_id,), cache_entry + ) txn.call_after(prefill) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index c6bcfe1c32..4cefc0a07e 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -15,14 +15,18 @@ import logging import threading from typing import ( + TYPE_CHECKING, + Any, Collection, Container, Dict, Iterable, List, + NoReturn, Optional, Set, Tuple, + cast, overload, ) @@ -38,6 +42,7 @@ from synapse.api.errors import NotFoundError, SynapseError from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, EventFormatVersions, + RoomVersion, RoomVersions, ) from synapse.events import EventBase, make_event_from_dict @@ -56,10 +61,18 @@ from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import BackfillStream from synapse.replication.tcp.streams.events import EventsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool, LoggingTransaction +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.engines import PostgresEngine -from synapse.storage.types import Connection -from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator +from synapse.storage.types import Cursor +from synapse.storage.util.id_generators import ( + AbstractStreamIdTracker, + MultiWriterIdGenerator, + StreamIdGenerator, +) from synapse.storage.util.sequence import build_sequence_generator from synapse.types import JsonDict, get_domain_from_id from synapse.util import unwrapFirstError @@ -69,6 +82,9 @@ from synapse.util.caches.lrucache import LruCache from synapse.util.iterutils import batch_iter from synapse.util.metrics import Measure +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) @@ -89,7 +105,7 @@ event_fetch_ongoing_gauge = Gauge( @attr.s(slots=True, auto_attribs=True) -class _EventCacheEntry: +class EventCacheEntry: event: EventBase redacted_event: Optional[EventBase] @@ -129,7 +145,7 @@ class _EventRow: json: str internal_metadata: str format_version: Optional[int] - room_version_id: Optional[int] + room_version_id: Optional[str] rejected_reason: Optional[str] redactions: List[str] outlier: bool @@ -153,9 +169,16 @@ class EventsWorkerStore(SQLBaseStore): # options controlling this. USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING = True - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): super().__init__(database, db_conn, hs) + self._stream_id_gen: AbstractStreamIdTracker + self._backfill_id_gen: AbstractStreamIdTracker if isinstance(database.engine, PostgresEngine): # If we're using Postgres than we can use `MultiWriterIdGenerator` # regardless of whether this process writes to the streams or not. @@ -214,7 +237,7 @@ class EventsWorkerStore(SQLBaseStore): 5 * 60 * 1000, ) - self._get_event_cache = LruCache( + self._get_event_cache: LruCache[Tuple[str], EventCacheEntry] = LruCache( cache_name="*getEvent*", max_size=hs.config.caches.event_cache_size, ) @@ -223,19 +246,21 @@ class EventsWorkerStore(SQLBaseStore): # ID to cache entry. Note that the returned dict may not have the # requested event in it if the event isn't in the DB. self._current_event_fetches: Dict[ - str, ObservableDeferred[Dict[str, _EventCacheEntry]] + str, ObservableDeferred[Dict[str, EventCacheEntry]] ] = {} self._event_fetch_lock = threading.Condition() - self._event_fetch_list = [] + self._event_fetch_list: List[ + Tuple[Iterable[str], "defer.Deferred[Dict[str, _EventRow]]"] + ] = [] self._event_fetch_ongoing = 0 event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) # We define this sequence here so that it can be referenced from both # the DataStore and PersistEventStore. - def get_chain_id_txn(txn): + def get_chain_id_txn(txn: Cursor) -> int: txn.execute("SELECT COALESCE(max(chain_id), 0) FROM event_auth_chains") - return txn.fetchone()[0] + return cast(Tuple[int], txn.fetchone())[0] self.event_chain_id_gen = build_sequence_generator( db_conn, @@ -246,7 +271,13 @@ class EventsWorkerStore(SQLBaseStore): id_column="chain_id", ) - def process_replication_rows(self, stream_name, instance_name, token, rows): + def process_replication_rows( + self, + stream_name: str, + instance_name: str, + token: int, + rows: Iterable[Any], + ) -> None: if stream_name == EventsStream.NAME: self._stream_id_gen.advance(instance_name, token) elif stream_name == BackfillStream.NAME: @@ -280,10 +311,10 @@ class EventsWorkerStore(SQLBaseStore): self, event_id: str, redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT, - get_prev_content: bool = False, - allow_rejected: bool = False, - allow_none: Literal[False] = False, - check_room_id: Optional[str] = None, + get_prev_content: bool = ..., + allow_rejected: bool = ..., + allow_none: Literal[False] = ..., + check_room_id: Optional[str] = ..., ) -> EventBase: ... @@ -292,10 +323,10 @@ class EventsWorkerStore(SQLBaseStore): self, event_id: str, redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT, - get_prev_content: bool = False, - allow_rejected: bool = False, - allow_none: Literal[True] = False, - check_room_id: Optional[str] = None, + get_prev_content: bool = ..., + allow_rejected: bool = ..., + allow_none: Literal[True] = ..., + check_room_id: Optional[str] = ..., ) -> Optional[EventBase]: ... @@ -357,7 +388,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_events( self, - event_ids: Iterable[str], + event_ids: Collection[str], redact_behaviour: EventRedactBehaviour = EventRedactBehaviour.REDACT, get_prev_content: bool = False, allow_rejected: bool = False, @@ -544,7 +575,7 @@ class EventsWorkerStore(SQLBaseStore): async def _get_events_from_cache_or_db( self, event_ids: Iterable[str], allow_rejected: bool = False - ) -> Dict[str, _EventCacheEntry]: + ) -> Dict[str, EventCacheEntry]: """Fetch a bunch of events from the cache or the database. If events are pulled from the database, they will be cached for future lookups. @@ -578,7 +609,7 @@ class EventsWorkerStore(SQLBaseStore): # same dict into itself N times). already_fetching_ids: Set[str] = set() already_fetching_deferreds: Set[ - ObservableDeferred[Dict[str, _EventCacheEntry]] + ObservableDeferred[Dict[str, EventCacheEntry]] ] = set() for event_id in missing_events_ids: @@ -601,7 +632,7 @@ class EventsWorkerStore(SQLBaseStore): # function returning more events than requested, but that can happen # already due to `_get_events_from_db`). fetching_deferred: ObservableDeferred[ - Dict[str, _EventCacheEntry] + Dict[str, EventCacheEntry] ] = ObservableDeferred(defer.Deferred(), consumeErrors=True) for event_id in missing_events_ids: self._current_event_fetches[event_id] = fetching_deferred @@ -658,12 +689,12 @@ class EventsWorkerStore(SQLBaseStore): return event_entry_map - def _invalidate_get_event_cache(self, event_id): + def _invalidate_get_event_cache(self, event_id: str) -> None: self._get_event_cache.invalidate((event_id,)) def _get_events_from_cache( self, events: Iterable[str], update_metrics: bool = True - ) -> Dict[str, _EventCacheEntry]: + ) -> Dict[str, EventCacheEntry]: """Fetch events from the caches. May return rejected events. @@ -820,7 +851,7 @@ class EventsWorkerStore(SQLBaseStore): for _, deferred in event_fetches_to_fail: deferred.errback(exc) - def _fetch_loop(self, conn: Connection) -> None: + def _fetch_loop(self, conn: LoggingDatabaseConnection) -> None: """Takes a database connection and waits for requests for events from the _event_fetch_list queue. """ @@ -850,7 +881,9 @@ class EventsWorkerStore(SQLBaseStore): self._fetch_event_list(conn, event_list) def _fetch_event_list( - self, conn: Connection, event_list: List[Tuple[List[str], defer.Deferred]] + self, + conn: LoggingDatabaseConnection, + event_list: List[Tuple[Iterable[str], "defer.Deferred[Dict[str, _EventRow]]"]], ) -> None: """Handle a load of requests from the _event_fetch_list queue @@ -877,7 +910,7 @@ class EventsWorkerStore(SQLBaseStore): ) # We only want to resolve deferreds from the main thread - def fire(): + def fire() -> None: for _, d in event_list: d.callback(row_dict) @@ -887,16 +920,16 @@ class EventsWorkerStore(SQLBaseStore): logger.exception("do_fetch") # We only want to resolve deferreds from the main thread - def fire(evs, exc): - for _, d in evs: + def fire_errback(exc: Exception) -> None: + for _, d in event_list: d.errback(exc) with PreserveLoggingContext(): - self.hs.get_reactor().callFromThread(fire, event_list, e) + self.hs.get_reactor().callFromThread(fire_errback, e) async def _get_events_from_db( - self, event_ids: Iterable[str] - ) -> Dict[str, _EventCacheEntry]: + self, event_ids: Collection[str] + ) -> Dict[str, EventCacheEntry]: """Fetch a bunch of events from the database. May return rejected events. @@ -912,29 +945,29 @@ class EventsWorkerStore(SQLBaseStore): map from event id to result. May return extra events which weren't asked for. """ - fetched_events = {} + fetched_event_ids: Set[str] = set() + fetched_events: Dict[str, _EventRow] = {} events_to_fetch = event_ids while events_to_fetch: row_map = await self._enqueue_events(events_to_fetch) # we need to recursively fetch any redactions of those events - redaction_ids = set() + redaction_ids: Set[str] = set() for event_id in events_to_fetch: row = row_map.get(event_id) - fetched_events[event_id] = row + fetched_event_ids.add(event_id) if row: + fetched_events[event_id] = row redaction_ids.update(row.redactions) - events_to_fetch = redaction_ids.difference(fetched_events.keys()) + events_to_fetch = redaction_ids.difference(fetched_event_ids) if events_to_fetch: logger.debug("Also fetching redaction events %s", events_to_fetch) # build a map from event_id to EventBase - event_map = {} + event_map: Dict[str, EventBase] = {} for event_id, row in fetched_events.items(): - if not row: - continue assert row.event_id == event_id rejected_reason = row.rejected_reason @@ -962,6 +995,7 @@ class EventsWorkerStore(SQLBaseStore): room_version_id = row.room_version_id + room_version: Optional[RoomVersion] if not room_version_id: # this should only happen for out-of-band membership events which # arrived before #6983 landed. For all other events, we should have @@ -1032,14 +1066,14 @@ class EventsWorkerStore(SQLBaseStore): # finally, we can decide whether each one needs redacting, and build # the cache entries. - result_map = {} + result_map: Dict[str, EventCacheEntry] = {} for event_id, original_ev in event_map.items(): redactions = fetched_events[event_id].redactions redacted_event = self._maybe_redact_event_row( original_ev, redactions, event_map ) - cache_entry = _EventCacheEntry( + cache_entry = EventCacheEntry( event=original_ev, redacted_event=redacted_event ) @@ -1048,7 +1082,7 @@ class EventsWorkerStore(SQLBaseStore): return result_map - async def _enqueue_events(self, events: Iterable[str]) -> Dict[str, _EventRow]: + async def _enqueue_events(self, events: Collection[str]) -> Dict[str, _EventRow]: """Fetches events from the database using the _event_fetch_list. This allows batch and bulk fetching of events - it allows us to fetch events without having to create a new transaction for each request for events. @@ -1061,7 +1095,7 @@ class EventsWorkerStore(SQLBaseStore): that weren't requested. """ - events_d = defer.Deferred() + events_d: "defer.Deferred[Dict[str, _EventRow]]" = defer.Deferred() with self._event_fetch_lock: self._event_fetch_list.append((events, events_d)) self._event_fetch_lock.notify() @@ -1216,7 +1250,7 @@ class EventsWorkerStore(SQLBaseStore): # no valid redaction found for this event return None - async def have_events_in_timeline(self, event_ids): + async def have_events_in_timeline(self, event_ids: Iterable[str]) -> Set[str]: """Given a list of event ids, check if we have already processed and stored them as non outliers. """ @@ -1245,7 +1279,7 @@ class EventsWorkerStore(SQLBaseStore): event_ids: events we are looking for Returns: - set[str]: The events we have already seen. + The set of events we have already seen. """ res = await self._have_seen_events_dict( (room_id, event_id) for event_id in event_ids @@ -1268,7 +1302,9 @@ class EventsWorkerStore(SQLBaseStore): } results = {x: True for x in cache_results} - def have_seen_events_txn(txn, chunk: Tuple[Tuple[str, str], ...]): + def have_seen_events_txn( + txn: LoggingTransaction, chunk: Tuple[Tuple[str, str], ...] + ) -> None: # we deliberately do *not* query the database for room_id, to make the # query an index-only lookup on `events_event_id_key`. # @@ -1294,12 +1330,14 @@ class EventsWorkerStore(SQLBaseStore): return results @cached(max_entries=100000, tree=True) - async def have_seen_event(self, room_id: str, event_id: str): + async def have_seen_event(self, room_id: str, event_id: str) -> NoReturn: # this only exists for the benefit of the @cachedList descriptor on # _have_seen_events_dict raise NotImplementedError() - def _get_current_state_event_counts_txn(self, txn, room_id): + def _get_current_state_event_counts_txn( + self, txn: LoggingTransaction, room_id: str + ) -> int: """ See get_current_state_event_counts. """ @@ -1324,7 +1362,7 @@ class EventsWorkerStore(SQLBaseStore): room_id, ) - async def get_room_complexity(self, room_id): + async def get_room_complexity(self, room_id: str) -> Dict[str, float]: """ Get a rough approximation of the complexity of the room. This is used by remote servers to decide whether they wish to join the room or not. @@ -1332,10 +1370,10 @@ class EventsWorkerStore(SQLBaseStore): more resources. Args: - room_id (str) + room_id: The room ID to query. Returns: - dict[str:int] of complexity version to complexity. + dict[str:float] of complexity version to complexity. """ state_events = await self.get_current_state_event_counts(room_id) @@ -1345,13 +1383,13 @@ class EventsWorkerStore(SQLBaseStore): return {"v1": complexity_v1} - def get_current_events_token(self): + def get_current_events_token(self) -> int: """The current maximum token that events have reached""" return self._stream_id_gen.get_current_token() async def get_all_new_forward_event_rows( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> List[Tuple]: + ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: """Returns new events, for the Events replication stream Args: @@ -1365,7 +1403,9 @@ class EventsWorkerStore(SQLBaseStore): EventsStreamRow. """ - def get_all_new_forward_event_rows(txn): + def get_all_new_forward_event_rows( + txn: LoggingTransaction, + ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: sql = ( "SELECT e.stream_ordering, e.event_id, e.room_id, e.type," " state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" @@ -1381,7 +1421,9 @@ class EventsWorkerStore(SQLBaseStore): " LIMIT ?" ) txn.execute(sql, (last_id, current_id, instance_name, limit)) - return txn.fetchall() + return cast( + List[Tuple[int, str, str, str, str, str, str, str, str]], txn.fetchall() + ) return await self.db_pool.runInteraction( "get_all_new_forward_event_rows", get_all_new_forward_event_rows @@ -1389,7 +1431,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_ex_outlier_stream_rows( self, instance_name: str, last_id: int, current_id: int - ) -> List[Tuple]: + ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: """Returns de-outliered events, for the Events replication stream Args: @@ -1402,7 +1444,9 @@ class EventsWorkerStore(SQLBaseStore): EventsStreamRow. """ - def get_ex_outlier_stream_rows_txn(txn): + def get_ex_outlier_stream_rows_txn( + txn: LoggingTransaction, + ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: sql = ( "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," " state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" @@ -1420,7 +1464,9 @@ class EventsWorkerStore(SQLBaseStore): ) txn.execute(sql, (last_id, current_id, instance_name)) - return txn.fetchall() + return cast( + List[Tuple[int, str, str, str, str, str, str, str, str]], txn.fetchall() + ) return await self.db_pool.runInteraction( "get_ex_outlier_stream_rows", get_ex_outlier_stream_rows_txn @@ -1428,7 +1474,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_all_new_backfill_event_rows( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> Tuple[List[Tuple[int, list]], int, bool]: + ) -> Tuple[List[Tuple[int, Tuple[str, str, str, str, str, str]]], int, bool]: """Get updates for backfill replication stream, including all new backfilled events and events that have gone from being outliers to not. @@ -1456,7 +1502,9 @@ class EventsWorkerStore(SQLBaseStore): if last_id == current_id: return [], current_id, False - def get_all_new_backfill_event_rows(txn): + def get_all_new_backfill_event_rows( + txn: LoggingTransaction, + ) -> Tuple[List[Tuple[int, Tuple[str, str, str, str, str, str]]], int, bool]: sql = ( "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type," " state_key, redacts, relates_to_id" @@ -1470,7 +1518,15 @@ class EventsWorkerStore(SQLBaseStore): " LIMIT ?" ) txn.execute(sql, (-last_id, -current_id, instance_name, limit)) - new_event_updates = [(row[0], row[1:]) for row in txn] + new_event_updates: List[ + Tuple[int, Tuple[str, str, str, str, str, str]] + ] = [] + row: Tuple[int, str, str, str, str, str, str] + # Type safety: iterating over `txn` yields `Tuple`, i.e. + # `Tuple[Any, ...]` of arbitrary length. Mypy detects assigning a + # variadic tuple to a fixed length tuple and flags it up as an error. + for row in txn: # type: ignore[assignment] + new_event_updates.append((row[0], row[1:])) limited = False if len(new_event_updates) == limit: @@ -1493,7 +1549,11 @@ class EventsWorkerStore(SQLBaseStore): " ORDER BY event_stream_ordering DESC" ) txn.execute(sql, (-last_id, -upper_bound, instance_name)) - new_event_updates.extend((row[0], row[1:]) for row in txn) + # Type safety: iterating over `txn` yields `Tuple`, i.e. + # `Tuple[Any, ...]` of arbitrary length. Mypy detects assigning a + # variadic tuple to a fixed length tuple and flags it up as an error. + for row in txn: # type: ignore[assignment] + new_event_updates.append((row[0], row[1:])) if len(new_event_updates) >= limit: upper_bound = new_event_updates[-1][0] @@ -1507,7 +1567,7 @@ class EventsWorkerStore(SQLBaseStore): async def get_all_updated_current_state_deltas( self, instance_name: str, from_token: int, to_token: int, target_row_count: int - ) -> Tuple[List[Tuple], int, bool]: + ) -> Tuple[List[Tuple[int, str, str, str, str]], int, bool]: """Fetch updates from current_state_delta_stream Args: @@ -1527,7 +1587,9 @@ class EventsWorkerStore(SQLBaseStore): * `limited` is whether there are more updates to fetch. """ - def get_all_updated_current_state_deltas_txn(txn): + def get_all_updated_current_state_deltas_txn( + txn: LoggingTransaction, + ) -> List[Tuple[int, str, str, str, str]]: sql = """ SELECT stream_id, room_id, type, state_key, event_id FROM current_state_delta_stream @@ -1536,21 +1598,23 @@ class EventsWorkerStore(SQLBaseStore): ORDER BY stream_id ASC LIMIT ? """ txn.execute(sql, (from_token, to_token, instance_name, target_row_count)) - return txn.fetchall() + return cast(List[Tuple[int, str, str, str, str]], txn.fetchall()) - def get_deltas_for_stream_id_txn(txn, stream_id): + def get_deltas_for_stream_id_txn( + txn: LoggingTransaction, stream_id: int + ) -> List[Tuple[int, str, str, str, str]]: sql = """ SELECT stream_id, room_id, type, state_key, event_id FROM current_state_delta_stream WHERE stream_id = ? """ txn.execute(sql, [stream_id]) - return txn.fetchall() + return cast(List[Tuple[int, str, str, str, str]], txn.fetchall()) # we need to make sure that, for every stream id in the results, we get *all* # the rows with that stream id. - rows: List[Tuple] = await self.db_pool.runInteraction( + rows: List[Tuple[int, str, str, str, str]] = await self.db_pool.runInteraction( "get_all_updated_current_state_deltas", get_all_updated_current_state_deltas_txn, ) @@ -1579,14 +1643,14 @@ class EventsWorkerStore(SQLBaseStore): return rows, to_token, True - async def is_event_after(self, event_id1, event_id2): + async def is_event_after(self, event_id1: str, event_id2: str) -> bool: """Returns True if event_id1 is after event_id2 in the stream""" to_1, so_1 = await self.get_event_ordering(event_id1) to_2, so_2 = await self.get_event_ordering(event_id2) return (to_1, so_1) > (to_2, so_2) @cached(max_entries=5000) - async def get_event_ordering(self, event_id): + async def get_event_ordering(self, event_id: str) -> Tuple[int, int]: res = await self.db_pool.simple_select_one( table="events", retcols=["topological_ordering", "stream_ordering"], @@ -1609,7 +1673,9 @@ class EventsWorkerStore(SQLBaseStore): None otherwise. """ - def get_next_event_to_expire_txn(txn): + def get_next_event_to_expire_txn( + txn: LoggingTransaction, + ) -> Optional[Tuple[str, int]]: txn.execute( """ SELECT event_id, expiry_ts FROM event_expiry @@ -1617,7 +1683,7 @@ class EventsWorkerStore(SQLBaseStore): """ ) - return txn.fetchone() + return cast(Optional[Tuple[str, int]], txn.fetchone()) return await self.db_pool.runInteraction( desc="get_next_event_to_expire", func=get_next_event_to_expire_txn @@ -1681,10 +1747,10 @@ class EventsWorkerStore(SQLBaseStore): return mapping @wrap_as_background_process("_cleanup_old_transaction_ids") - async def _cleanup_old_transaction_ids(self): + async def _cleanup_old_transaction_ids(self) -> None: """Cleans out transaction id mappings older than 24hrs.""" - def _cleanup_old_transaction_ids_txn(txn): + def _cleanup_old_transaction_ids_txn(txn: LoggingTransaction) -> None: sql = """ DELETE FROM event_txn_id WHERE inserted_ts < ? diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index fa782023d4..3b63267395 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -28,7 +28,10 @@ from synapse.storage.databases.main.receipts import ReceiptsWorkerStore from synapse.storage.databases.main.roommember import RoomMemberWorkerStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException -from synapse.storage.util.id_generators import StreamIdGenerator +from synapse.storage.util.id_generators import ( + AbstractStreamIdTracker, + StreamIdGenerator, +) from synapse.util import json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -82,9 +85,9 @@ class PushRulesWorkerStore( super().__init__(database, db_conn, hs) if hs.config.worker.worker_app is None: - self._push_rules_stream_id_gen: Union[ - StreamIdGenerator, SlavedIdTracker - ] = StreamIdGenerator(db_conn, "push_rules_stream", "stream_id") + self._push_rules_stream_id_gen: AbstractStreamIdTracker = StreamIdGenerator( + db_conn, "push_rules_stream", "stream_id" + ) else: self._push_rules_stream_id_gen = SlavedIdTracker( db_conn, "push_rules_stream", "stream_id" diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index ac56bc9a05..4ff3013908 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -89,31 +89,77 @@ def _load_current_id( return (max if step > 0 else min)(current_id, step) -class AbstractStreamIdGenerator(metaclass=abc.ABCMeta): - @abc.abstractmethod - def get_next(self) -> AsyncContextManager[int]: - raise NotImplementedError() +class AbstractStreamIdTracker(metaclass=abc.ABCMeta): + """Tracks the "current" stream ID of a stream that may have multiple writers. + + Stream IDs are monotonically increasing or decreasing integers representing write + transactions. The "current" stream ID is the stream ID such that all transactions + with equal or smaller stream IDs have completed. Since transactions may complete out + of order, this is not the same as the stream ID of the last completed transaction. + + Completed transactions include both committed transactions and transactions that + have been rolled back. + """ @abc.abstractmethod - def get_next_mult(self, n: int) -> AsyncContextManager[Sequence[int]]: + def advance(self, instance_name: str, new_id: int) -> None: + """Advance the position of the named writer to the given ID, if greater + than existing entry. + """ raise NotImplementedError() @abc.abstractmethod def get_current_token(self) -> int: + """Returns the maximum stream id such that all stream ids less than or + equal to it have been successfully persisted. + + Returns: + The maximum stream id. + """ raise NotImplementedError() @abc.abstractmethod def get_current_token_for_writer(self, instance_name: str) -> int: + """Returns the position of the given writer. + + For streams with single writers this is equivalent to `get_current_token`. + """ + raise NotImplementedError() + + +class AbstractStreamIdGenerator(AbstractStreamIdTracker): + """Generates stream IDs for a stream that may have multiple writers. + + Each stream ID represents a write transaction, whose completion is tracked + so that the "current" stream ID of the stream can be determined. + + See `AbstractStreamIdTracker` for more details. + """ + + @abc.abstractmethod + def get_next(self) -> AsyncContextManager[int]: + """ + Usage: + async with stream_id_gen.get_next() as stream_id: + # ... persist event ... + """ + raise NotImplementedError() + + @abc.abstractmethod + def get_next_mult(self, n: int) -> AsyncContextManager[Sequence[int]]: + """ + Usage: + async with stream_id_gen.get_next(n) as stream_ids: + # ... persist events ... + """ raise NotImplementedError() class StreamIdGenerator(AbstractStreamIdGenerator): - """Used to generate new stream ids when persisting events while keeping - track of which transactions have been completed. + """Generates and tracks stream IDs for a stream with a single writer. - This allows us to get the "current" stream id, i.e. the stream id such that - all ids less than or equal to it have completed. This handles the fact that - persistence of events can complete out of order. + This class must only be used when the current Synapse process is the sole + writer for a stream. Args: db_conn(connection): A database connection to use to fetch the @@ -157,12 +203,12 @@ class StreamIdGenerator(AbstractStreamIdGenerator): # The key and values are the same, but we never look at the values. self._unfinished_ids: OrderedDict[int, int] = OrderedDict() + def advance(self, instance_name: str, new_id: int) -> None: + # `StreamIdGenerator` should only be used when there is a single writer, + # so replication should never happen. + raise Exception("Replication is not supported by StreamIdGenerator") + def get_next(self) -> AsyncContextManager[int]: - """ - Usage: - async with stream_id_gen.get_next() as stream_id: - # ... persist event ... - """ with self._lock: self._current += self._step next_id = self._current @@ -180,11 +226,6 @@ class StreamIdGenerator(AbstractStreamIdGenerator): return _AsyncCtxManagerWrapper(manager()) def get_next_mult(self, n: int) -> AsyncContextManager[Sequence[int]]: - """ - Usage: - async with stream_id_gen.get_next(n) as stream_ids: - # ... persist events ... - """ with self._lock: next_ids = range( self._current + self._step, @@ -208,12 +249,6 @@ class StreamIdGenerator(AbstractStreamIdGenerator): return _AsyncCtxManagerWrapper(manager()) def get_current_token(self) -> int: - """Returns the maximum stream id such that all stream ids less than or - equal to it have been successfully persisted. - - Returns: - The maximum stream id. - """ with self._lock: if self._unfinished_ids: return next(iter(self._unfinished_ids)) - self._step @@ -221,16 +256,11 @@ class StreamIdGenerator(AbstractStreamIdGenerator): return self._current def get_current_token_for_writer(self, instance_name: str) -> int: - """Returns the position of the given writer. - - For streams with single writers this is equivalent to - `get_current_token`. - """ return self.get_current_token() class MultiWriterIdGenerator(AbstractStreamIdGenerator): - """An ID generator that tracks a stream that can have multiple writers. + """Generates and tracks stream IDs for a stream with multiple writers. Uses a Postgres sequence to coordinate ID assignment, but positions of other writers will only get updated when `advance` is called (by replication). @@ -475,12 +505,6 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): return stream_ids def get_next(self) -> AsyncContextManager[int]: - """ - Usage: - async with stream_id_gen.get_next() as stream_id: - # ... persist event ... - """ - # If we have a list of instances that are allowed to write to this # stream, make sure we're in it. if self._writers and self._instance_name not in self._writers: @@ -492,12 +516,6 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): return cast(AsyncContextManager[int], _MultiWriterCtxManager(self)) def get_next_mult(self, n: int) -> AsyncContextManager[List[int]]: - """ - Usage: - async with stream_id_gen.get_next_mult(5) as stream_ids: - # ... persist events ... - """ - # If we have a list of instances that are allowed to write to this # stream, make sure we're in it. if self._writers and self._instance_name not in self._writers: @@ -597,15 +615,9 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): self._add_persisted_position(next_id) def get_current_token(self) -> int: - """Returns the maximum stream id such that all stream ids less than or - equal to it have been successfully persisted. - """ - return self.get_persisted_upto_position() def get_current_token_for_writer(self, instance_name: str) -> int: - """Returns the position of the given writer.""" - # If we don't have an entry for the given instance name, we assume it's a # new writer. # @@ -631,10 +643,6 @@ class MultiWriterIdGenerator(AbstractStreamIdGenerator): } def advance(self, instance_name: str, new_id: int) -> None: - """Advance the position of the named writer to the given ID, if greater - than existing entry. - """ - new_id *= self._return_factor with self._lock: diff --git a/tests/replication/test_sharded_event_persister.py b/tests/replication/test_sharded_event_persister.py index 0a6e4795ee..596ba5a0c9 100644 --- a/tests/replication/test_sharded_event_persister.py +++ b/tests/replication/test_sharded_event_persister.py @@ -17,6 +17,7 @@ from unittest.mock import patch from synapse.api.room_versions import RoomVersion from synapse.rest import admin from synapse.rest.client import login, room, sync +from synapse.storage.util.id_generators import MultiWriterIdGenerator from tests.replication._base import BaseMultiWorkerStreamTestCase from tests.server import make_request @@ -193,7 +194,10 @@ class EventPersisterShardTestCase(BaseMultiWorkerStreamTestCase): # # Worker2's event stream position will not advance until we call # __aexit__ again. - actx = worker_hs2.get_datastore()._stream_id_gen.get_next() + worker_store2 = worker_hs2.get_datastore() + assert isinstance(worker_store2._stream_id_gen, MultiWriterIdGenerator) + + actx = worker_store2._stream_id_gen.get_next() self.get_success(actx.__aenter__()) response = self.helper.send(room_id1, body="Hi!", tok=self.other_access_token) -- cgit 1.5.1 From fb58611d212ea16be2b42d0e2441a6dc09f6f61d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 29 Nov 2021 16:01:54 -0600 Subject: Refactor `backfilled` into specific behavior function arguments (`_persist_events_and_state_updates`) (#11417) Part of https://github.com/matrix-org/synapse/issues/11300 Call stack: - `_persist_events_and_state_updates` (added `use_negative_stream_ordering`) - `_persist_events_txn` - `_update_room_depths_txn` (added `update_room_forward_stream_ordering`) - `_update_metadata_tables_txn` - `_store_room_members_txn` (added `inhibit_local_membership_updates`) Using keyword-only arguments (`*`) to reduce the mistakes from `backfilled` being left as a positional argument somewhere and being interpreted wrong by our new arguments. --- changelog.d/11417.misc | 1 + synapse/storage/databases/main/events.py | 74 +++++++++++++++++++++++--------- synapse/storage/persist_events.py | 3 +- 3 files changed, 57 insertions(+), 21 deletions(-) create mode 100644 changelog.d/11417.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11417.misc b/changelog.d/11417.misc new file mode 100644 index 0000000000..88dc4722da --- /dev/null +++ b/changelog.d/11417.misc @@ -0,0 +1 @@ +Refactor `backfilled` into specific behavior function arguments (`_persist_events_and_state_updates` and downstream calls). diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index c3440de2cb..4171b904eb 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -124,10 +124,12 @@ class PersistEventsStore: async def _persist_events_and_state_updates( self, events_and_contexts: List[Tuple[EventBase, EventContext]], + *, current_state_for_room: Dict[str, StateMap[str]], state_delta_for_room: Dict[str, DeltaState], new_forward_extremeties: Dict[str, List[str]], - backfilled: bool = False, + use_negative_stream_ordering: bool = False, + inhibit_local_membership_updates: bool = False, ) -> None: """Persist a set of events alongside updates to the current state and forward extremities tables. @@ -140,7 +142,14 @@ class PersistEventsStore: room state new_forward_extremities: Map from room_id to list of event IDs that are the new forward extremities of the room. - backfilled + use_negative_stream_ordering: Whether to start stream_ordering on + the negative side and decrement. This should be set as True + for backfilled events because backfilled events get a negative + stream ordering so they don't come down incremental `/sync`. + inhibit_local_membership_updates: Stop the local_current_membership + from being updated by these events. This should be set to True + for backfilled events because backfilled events in the past do + not affect the current local state. Returns: Resolves when the events have been persisted @@ -162,7 +171,7 @@ class PersistEventsStore: # # Note: Multiple instances of this function cannot be in flight at # the same time for the same room. - if backfilled: + if use_negative_stream_ordering: stream_ordering_manager = self._backfill_id_gen.get_next_mult( len(events_and_contexts) ) @@ -179,13 +188,13 @@ class PersistEventsStore: "persist_events", self._persist_events_txn, events_and_contexts=events_and_contexts, - backfilled=backfilled, + inhibit_local_membership_updates=inhibit_local_membership_updates, state_delta_for_room=state_delta_for_room, new_forward_extremeties=new_forward_extremeties, ) persist_event_counter.inc(len(events_and_contexts)) - if not backfilled: + if stream < 0: # backfilled events have negative stream orderings, so we don't # want to set the event_persisted_position to that. synapse.metrics.event_persisted_position.set( @@ -319,8 +328,9 @@ class PersistEventsStore: def _persist_events_txn( self, txn: LoggingTransaction, + *, events_and_contexts: List[Tuple[EventBase, EventContext]], - backfilled: bool, + inhibit_local_membership_updates: bool = False, state_delta_for_room: Optional[Dict[str, DeltaState]] = None, new_forward_extremeties: Optional[Dict[str, List[str]]] = None, ): @@ -333,7 +343,10 @@ class PersistEventsStore: Args: txn events_and_contexts: events to persist - backfilled: True if the events were backfilled + inhibit_local_membership_updates: Stop the local_current_membership + from being updated by these events. This should be set to True + for backfilled events because backfilled events in the past do + not affect the current local state. delete_existing True to purge existing table rows for the events from the database. This is useful when retrying due to IntegrityError. @@ -366,9 +379,7 @@ class PersistEventsStore: events_and_contexts ) - self._update_room_depths_txn( - txn, events_and_contexts=events_and_contexts, backfilled=backfilled - ) + self._update_room_depths_txn(txn, events_and_contexts=events_and_contexts) # _update_outliers_txn filters out any events which have already been # persisted, and returns the filtered list. @@ -401,7 +412,7 @@ class PersistEventsStore: txn, events_and_contexts=events_and_contexts, all_events_and_contexts=all_events_and_contexts, - backfilled=backfilled, + inhibit_local_membership_updates=inhibit_local_membership_updates, ) # We call this last as it assumes we've inserted the events into @@ -1203,7 +1214,6 @@ class PersistEventsStore: self, txn, events_and_contexts: List[Tuple[EventBase, EventContext]], - backfilled: bool, ): """Update min_depth for each room @@ -1211,13 +1221,18 @@ class PersistEventsStore: txn (twisted.enterprise.adbapi.Connection): db connection events_and_contexts (list[(EventBase, EventContext)]): events we are persisting - backfilled (bool): True if the events were backfilled """ depth_updates: Dict[str, int] = {} for event, context in events_and_contexts: # Remove the any existing cache entries for the event_ids txn.call_after(self.store._invalidate_get_event_cache, event.event_id) - if not backfilled: + # Then update the `stream_ordering` position to mark the latest + # event as the front of the room. This should not be done for + # backfilled events because backfilled events have negative + # stream_ordering and happened in the past so we know that we don't + # need to update the stream_ordering tip/front for the room. + assert event.internal_metadata.stream_ordering is not None + if event.internal_metadata.stream_ordering >= 0: txn.call_after( self.store._events_stream_cache.entity_has_changed, event.room_id, @@ -1430,7 +1445,12 @@ class PersistEventsStore: return [ec for ec in events_and_contexts if ec[0] not in to_remove] def _update_metadata_tables_txn( - self, txn, events_and_contexts, all_events_and_contexts, backfilled + self, + txn, + *, + events_and_contexts, + all_events_and_contexts, + inhibit_local_membership_updates: bool = False, ): """Update all the miscellaneous tables for new events @@ -1442,7 +1462,10 @@ class PersistEventsStore: events that we were going to persist. This includes events we've already persisted, etc, that wouldn't appear in events_and_context. - backfilled (bool): True if the events were backfilled + inhibit_local_membership_updates: Stop the local_current_membership + from being updated by these events. This should be set to True + for backfilled events because backfilled events in the past do + not affect the current local state. """ # Insert all the push actions into the event_push_actions table. @@ -1516,7 +1539,7 @@ class PersistEventsStore: for event, _ in events_and_contexts if event.type == EventTypes.Member ], - backfilled=backfilled, + inhibit_local_membership_updates=inhibit_local_membership_updates, ) # Insert event_reference_hashes table. @@ -1643,8 +1666,19 @@ class PersistEventsStore: txn, table="event_reference_hashes", values=vals ) - def _store_room_members_txn(self, txn, events, backfilled): - """Store a room member in the database.""" + def _store_room_members_txn( + self, txn, events, *, inhibit_local_membership_updates: bool = False + ): + """ + Store a room member in the database. + Args: + txn: The transaction to use. + events: List of events to store. + inhibit_local_membership_updates: Stop the local_current_membership + from being updated by these events. This should be set to True + for backfilled events because backfilled events in the past do + not affect the current local state. + """ def non_null_str_or_none(val: Any) -> Optional[str]: return val if isinstance(val, str) and "\u0000" not in val else None @@ -1687,7 +1721,7 @@ class PersistEventsStore: # band membership", like a remote invite or a rejection of a remote invite. if ( self.is_mine_id(event.state_key) - and not backfilled + and not inhibit_local_membership_updates and event.internal_metadata.is_outlier() and event.internal_metadata.is_out_of_band_membership() ): diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 402f134d89..428d66a617 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -583,7 +583,8 @@ class EventsPersistenceStorage: current_state_for_room=current_state_for_room, state_delta_for_room=state_delta_for_room, new_forward_extremeties=new_forward_extremeties, - backfilled=backfilled, + use_negative_stream_ordering=backfilled, + inhibit_local_membership_updates=backfilled, ) await self._handle_potentially_left_users(potentially_left_users) -- cgit 1.5.1 From a9481223d1d5a8b3bf0d7ce2140dd3c919481f4f Mon Sep 17 00:00:00 2001 From: Marcus Date: Tue, 30 Nov 2021 12:49:20 +0100 Subject: Improved push typing (#11409) Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/11409.misc | 1 + docs/templates.md | 5 + synapse/push/emailpusher.py | 10 +- synapse/push/httppusher.py | 3 +- synapse/push/mailer.py | 72 ++++++----- synapse/push/push_types.py | 136 +++++++++++++++++++++ .../storage/databases/main/event_push_actions.py | 19 ++- 7 files changed, 210 insertions(+), 36 deletions(-) create mode 100644 changelog.d/11409.misc create mode 100644 synapse/push/push_types.py (limited to 'synapse/storage/databases') diff --git a/changelog.d/11409.misc b/changelog.d/11409.misc new file mode 100644 index 0000000000..f9e8ae9e3a --- /dev/null +++ b/changelog.d/11409.misc @@ -0,0 +1 @@ +Improve internal types in push code. diff --git a/docs/templates.md b/docs/templates.md index a240f58b54..2b66e9d862 100644 --- a/docs/templates.md +++ b/docs/templates.md @@ -71,7 +71,12 @@ Below are the templates Synapse will look for when generating the content of an * `sender_avatar_url`: the avatar URL (as a `mxc://` URL) for the event's sender * `sender_hash`: a hash of the user ID of the sender + * `msgtype`: the type of the message + * `body_text_html`: html representation of the message + * `body_text_plain`: plaintext representation of the message + * `image_url`: mxc url of an image, when "msgtype" is "m.image" * `link`: a `matrix.to` link to the room + * `avator_url`: url to the room's avator * `reason`: information on the event that triggered the email to be sent. It's an object with the following attributes: * `room_id`: the ID of the room the event was sent in diff --git a/synapse/push/emailpusher.py b/synapse/push/emailpusher.py index cf5abdfbda..4f13c0418a 100644 --- a/synapse/push/emailpusher.py +++ b/synapse/push/emailpusher.py @@ -21,6 +21,8 @@ from twisted.internet.interfaces import IDelayedCall from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import Pusher, PusherConfig, PusherConfigException, ThrottleParams from synapse.push.mailer import Mailer +from synapse.push.push_types import EmailReason +from synapse.storage.databases.main.event_push_actions import EmailPushAction from synapse.util.threepids import validate_email if TYPE_CHECKING: @@ -190,7 +192,7 @@ class EmailPusher(Pusher): # we then consider all previously outstanding notifications # to be delivered. - reason = { + reason: EmailReason = { "room_id": push_action["room_id"], "now": self.clock.time_msec(), "received_at": received_at, @@ -275,7 +277,7 @@ class EmailPusher(Pusher): return may_send_at async def sent_notif_update_throttle( - self, room_id: str, notified_push_action: dict + self, room_id: str, notified_push_action: EmailPushAction ) -> None: # We have sent a notification, so update the throttle accordingly. # If the event that triggered the notif happened more than @@ -315,7 +317,9 @@ class EmailPusher(Pusher): self.pusher_id, room_id, self.throttle_params[room_id] ) - async def send_notification(self, push_actions: List[dict], reason: dict) -> None: + async def send_notification( + self, push_actions: List[EmailPushAction], reason: EmailReason + ) -> None: logger.info("Sending notif email for user %r", self.user_id) await self.mailer.send_notification_mail( diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index dbf4ad7f97..3fa603ccb7 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -26,6 +26,7 @@ from synapse.events import EventBase from synapse.logging import opentracing from synapse.metrics.background_process_metrics import run_as_background_process from synapse.push import Pusher, PusherConfig, PusherConfigException +from synapse.storage.databases.main.event_push_actions import HttpPushAction from . import push_rule_evaluator, push_tools @@ -273,7 +274,7 @@ class HttpPusher(Pusher): ) break - async def _process_one(self, push_action: dict) -> bool: + async def _process_one(self, push_action: HttpPushAction) -> bool: if "notify" not in push_action["actions"]: return True diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index ce299ba3da..ba4f866487 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -14,7 +14,7 @@ import logging import urllib.parse -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, TypeVar +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, TypeVar import bleach import jinja2 @@ -28,6 +28,14 @@ from synapse.push.presentable_names import ( descriptor_from_member_events, name_from_member_event, ) +from synapse.push.push_types import ( + EmailReason, + MessageVars, + NotifVars, + RoomVars, + TemplateVars, +) +from synapse.storage.databases.main.event_push_actions import EmailPushAction from synapse.storage.state import StateFilter from synapse.types import StateMap, UserID from synapse.util.async_helpers import concurrently_execute @@ -135,7 +143,7 @@ class Mailer: % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, @@ -165,7 +173,7 @@ class Mailer: % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, @@ -196,7 +204,7 @@ class Mailer: % urllib.parse.urlencode(params) ) - template_vars = {"link": link} + template_vars: TemplateVars = {"link": link} await self.send_email( email_address, @@ -210,8 +218,8 @@ class Mailer: app_id: str, user_id: str, email_address: str, - push_actions: Iterable[Dict[str, Any]], - reason: Dict[str, Any], + push_actions: Iterable[EmailPushAction], + reason: EmailReason, ) -> None: """ Send email regarding a user's room notifications @@ -230,7 +238,7 @@ class Mailer: [pa["event_id"] for pa in push_actions] ) - notifs_by_room: Dict[str, List[Dict[str, Any]]] = {} + notifs_by_room: Dict[str, List[EmailPushAction]] = {} for pa in push_actions: notifs_by_room.setdefault(pa["room_id"], []).append(pa) @@ -258,7 +266,7 @@ class Mailer: # actually sort our so-called rooms_in_order list, most recent room first rooms_in_order.sort(key=lambda r: -(notifs_by_room[r][-1]["received_ts"] or 0)) - rooms: List[Dict[str, Any]] = [] + rooms: List[RoomVars] = [] for r in rooms_in_order: roomvars = await self._get_room_vars( @@ -289,7 +297,7 @@ class Mailer: notifs_by_room, state_by_room, notif_events, reason ) - template_vars = { + template_vars: TemplateVars = { "user_display_name": user_display_name, "unsubscribe_link": self._make_unsubscribe_link( user_id, app_id, email_address @@ -302,10 +310,10 @@ class Mailer: await self.send_email(email_address, summary_text, template_vars) async def send_email( - self, email_address: str, subject: str, extra_template_vars: Dict[str, Any] + self, email_address: str, subject: str, extra_template_vars: TemplateVars ) -> None: """Send an email with the given information and template text""" - template_vars = { + template_vars: TemplateVars = { "app_name": self.app_name, "server_name": self.hs.config.server.server_name, } @@ -327,10 +335,10 @@ class Mailer: self, room_id: str, user_id: str, - notifs: Iterable[Dict[str, Any]], + notifs: Iterable[EmailPushAction], notif_events: Dict[str, EventBase], room_state_ids: StateMap[str], - ) -> Dict[str, Any]: + ) -> RoomVars: """ Generate the variables for notifications on a per-room basis. @@ -356,7 +364,7 @@ class Mailer: room_name = await calculate_room_name(self.store, room_state_ids, user_id) - room_vars: Dict[str, Any] = { + room_vars: RoomVars = { "title": room_name, "hash": string_ordinal_total(room_id), # See sender avatar hash "notifs": [], @@ -417,11 +425,11 @@ class Mailer: async def _get_notif_vars( self, - notif: Dict[str, Any], + notif: EmailPushAction, user_id: str, notif_event: EventBase, room_state_ids: StateMap[str], - ) -> Dict[str, Any]: + ) -> NotifVars: """ Generate the variables for a single notification. @@ -442,7 +450,7 @@ class Mailer: after_limit=CONTEXT_AFTER, ) - ret = { + ret: NotifVars = { "link": self._make_notif_link(notif), "ts": notif["received_ts"], "messages": [], @@ -461,8 +469,8 @@ class Mailer: return ret async def _get_message_vars( - self, notif: Dict[str, Any], event: EventBase, room_state_ids: StateMap[str] - ) -> Optional[Dict[str, Any]]: + self, notif: EmailPushAction, event: EventBase, room_state_ids: StateMap[str] + ) -> Optional[MessageVars]: """ Generate the variables for a single event, if possible. @@ -494,7 +502,9 @@ class Mailer: if sender_state_event: sender_name = name_from_member_event(sender_state_event) - sender_avatar_url = sender_state_event.content.get("avatar_url") + sender_avatar_url: Optional[str] = sender_state_event.content.get( + "avatar_url" + ) else: # No state could be found, fallback to the MXID. sender_name = event.sender @@ -504,7 +514,7 @@ class Mailer: # sender_hash % the number of default images to choose from sender_hash = string_ordinal_total(event.sender) - ret = { + ret: MessageVars = { "event_type": event.type, "is_historical": event.event_id != notif["event_id"], "id": event.event_id, @@ -519,6 +529,8 @@ class Mailer: return ret msgtype = event.content.get("msgtype") + if not isinstance(msgtype, str): + msgtype = None ret["msgtype"] = msgtype @@ -533,7 +545,7 @@ class Mailer: return ret def _add_text_message_vars( - self, messagevars: Dict[str, Any], event: EventBase + self, messagevars: MessageVars, event: EventBase ) -> None: """ Potentially add a sanitised message body to the message variables. @@ -543,8 +555,8 @@ class Mailer: event: The event under consideration. """ msgformat = event.content.get("format") - - messagevars["format"] = msgformat + if not isinstance(msgformat, str): + msgformat = None formatted_body = event.content.get("formatted_body") body = event.content.get("body") @@ -555,7 +567,7 @@ class Mailer: messagevars["body_text_html"] = safe_text(body) def _add_image_message_vars( - self, messagevars: Dict[str, Any], event: EventBase + self, messagevars: MessageVars, event: EventBase ) -> None: """ Potentially add an image URL to the message variables. @@ -570,7 +582,7 @@ class Mailer: async def _make_summary_text_single_room( self, room_id: str, - notifs: List[Dict[str, Any]], + notifs: List[EmailPushAction], room_state_ids: StateMap[str], notif_events: Dict[str, EventBase], user_id: str, @@ -685,10 +697,10 @@ class Mailer: async def _make_summary_text( self, - notifs_by_room: Dict[str, List[Dict[str, Any]]], + notifs_by_room: Dict[str, List[EmailPushAction]], room_state_ids: Dict[str, StateMap[str]], notif_events: Dict[str, EventBase], - reason: Dict[str, Any], + reason: EmailReason, ) -> str: """ Make a summary text for the email when multiple rooms have notifications. @@ -718,7 +730,7 @@ class Mailer: async def _make_summary_text_from_member_events( self, room_id: str, - notifs: List[Dict[str, Any]], + notifs: List[EmailPushAction], room_state_ids: StateMap[str], notif_events: Dict[str, EventBase], ) -> str: @@ -805,7 +817,7 @@ class Mailer: base_url = "https://matrix.to/#" return "%s/%s" % (base_url, room_id) - def _make_notif_link(self, notif: Dict[str, str]) -> str: + def _make_notif_link(self, notif: EmailPushAction) -> str: """ Generate a link to open an event in the web client. diff --git a/synapse/push/push_types.py b/synapse/push/push_types.py new file mode 100644 index 0000000000..8d16ab62ce --- /dev/null +++ b/synapse/push/push_types.py @@ -0,0 +1,136 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import List, Optional + +from typing_extensions import TypedDict + + +class EmailReason(TypedDict, total=False): + """ + Information on the event that triggered the email to be sent + + room_id: the ID of the room the event was sent in + now: timestamp in ms when the email is being sent out + room_name: a human-readable name for the room the event was sent in + received_at: the time in milliseconds at which the event was received + delay_before_mail_ms: the amount of time in milliseconds Synapse always waits + before ever emailing about a notification (to give the user a chance to respond + to other push or notice the window) + last_sent_ts: the time in milliseconds at which a notification was last sent + for an event in this room + throttle_ms: the minimum amount of time in milliseconds between two + notifications can be sent for this room + """ + + room_id: str + now: int + room_name: Optional[str] + received_at: int + delay_before_mail_ms: int + last_sent_ts: int + throttle_ms: int + + +class MessageVars(TypedDict, total=False): + """ + Details about a specific message to include in a notification + + event_type: the type of the event + is_historical: a boolean, which is `False` if the message is the one + that triggered the notification, `True` otherwise + id: the ID of the event + ts: the time in milliseconds at which the event was sent + sender_name: the display name for the event's sender + sender_avatar_url: the avatar URL (as a `mxc://` URL) for the event's + sender + sender_hash: a hash of the user ID of the sender + msgtype: the type of the message + body_text_html: html representation of the message + body_text_plain: plaintext representation of the message + image_url: mxc url of an image, when "msgtype" is "m.image" + """ + + event_type: str + is_historical: bool + id: str + ts: int + sender_name: str + sender_avatar_url: Optional[str] + sender_hash: int + msgtype: Optional[str] + body_text_html: str + body_text_plain: str + image_url: str + + +class NotifVars(TypedDict): + """ + Details about an event we are about to include in a notification + + link: a `matrix.to` link to the event + ts: the time in milliseconds at which the event was received + messages: a list of messages containing one message before the event, the + message in the event, and one message after the event. + """ + + link: str + ts: Optional[int] + messages: List[MessageVars] + + +class RoomVars(TypedDict): + """ + Represents a room containing events to include in the email. + + title: a human-readable name for the room + hash: a hash of the ID of the room + invite: a boolean, which is `True` if the room is an invite the user hasn't + accepted yet, `False` otherwise + notifs: a list of events, or an empty list if `invite` is `True`. + link: a `matrix.to` link to the room + avator_url: url to the room's avator + """ + + title: Optional[str] + hash: int + invite: bool + notifs: List[NotifVars] + link: str + avatar_url: Optional[str] + + +class TemplateVars(TypedDict, total=False): + """ + Generic structure for passing to the email sender, can hold all the fields used in email templates. + + app_name: name of the app/service this homeserver is associated with + server_name: name of our own homeserver + link: a link to include into the email to be sent + user_display_name: the display name for the user receiving the notification + unsubscribe_link: the link users can click to unsubscribe from email notifications + summary_text: a summary of the notification(s). The text used can be customised + by configuring the various settings in the `email.subjects` section of the + configuration file. + rooms: a list of rooms containing events to include in the email + reason: information on the event that triggered the email to be sent + """ + + app_name: str + server_name: str + link: str + user_display_name: str + unsubscribe_link: str + summary_text: str + rooms: List[RoomVars] + reason: EmailReason diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index d957e770dc..3efdd0c920 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -16,6 +16,7 @@ import logging from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union import attr +from typing_extensions import TypedDict from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json @@ -37,6 +38,20 @@ DEFAULT_HIGHLIGHT_ACTION = [ ] +class BasePushAction(TypedDict): + event_id: str + actions: List[Union[dict, str]] + + +class HttpPushAction(BasePushAction): + room_id: str + stream_ordering: int + + +class EmailPushAction(HttpPushAction): + received_ts: Optional[int] + + def _serialize_action(actions, is_highlight): """Custom serializer for actions. This allows us to "compress" common actions. @@ -221,7 +236,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): min_stream_ordering: int, max_stream_ordering: int, limit: int = 20, - ) -> List[dict]: + ) -> List[HttpPushAction]: """Get a list of the most recent unread push actions for a given user, within the given stream ordering range. Called by the httppusher. @@ -326,7 +341,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): min_stream_ordering: int, max_stream_ordering: int, limit: int = 20, - ) -> List[dict]: + ) -> List[EmailPushAction]: """Get a list of the most recent unread push actions for a given user, within the given stream ordering range. Called by the emailpusher -- cgit 1.5.1 From a6f1a3abecf8e8fd3e1bff439a06b853df18f194 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 2 Dec 2021 01:02:20 -0600 Subject: Add MSC3030 experimental client and federation API endpoints to get the closest event to a given timestamp (#9445) MSC3030: https://github.com/matrix-org/matrix-doc/pull/3030 Client API endpoint. This will also go and fetch from the federation API endpoint if unable to find an event locally or we found an extremity with possibly a closer event we don't know about. ``` GET /_matrix/client/unstable/org.matrix.msc3030/rooms//timestamp_to_event?ts=&dir= { "event_id": ... "origin_server_ts": ... } ``` Federation API endpoint: ``` GET /_matrix/federation/unstable/org.matrix.msc3030/timestamp_to_event/?ts=&dir= { "event_id": ... "origin_server_ts": ... } ``` Co-authored-by: Erik Johnston --- changelog.d/9445.feature | 1 + synapse/config/experimental.py | 3 + synapse/federation/federation_client.py | 77 +++++++++ synapse/federation/federation_server.py | 43 +++++ synapse/federation/transport/client.py | 36 ++++ synapse/federation/transport/server/__init__.py | 12 +- synapse/federation/transport/server/federation.py | 41 +++++ synapse/handlers/federation.py | 61 +++---- synapse/handlers/room.py | 144 ++++++++++++++++ synapse/http/servlet.py | 29 ++++ synapse/rest/client/room.py | 58 +++++++ synapse/server.py | 5 + synapse/storage/databases/main/events_worker.py | 195 ++++++++++++++++++++++ 13 files changed, 674 insertions(+), 31 deletions(-) create mode 100644 changelog.d/9445.feature (limited to 'synapse/storage/databases') diff --git a/changelog.d/9445.feature b/changelog.d/9445.feature new file mode 100644 index 0000000000..6d12eea71f --- /dev/null +++ b/changelog.d/9445.feature @@ -0,0 +1 @@ +Add [MSC3030](https://github.com/matrix-org/matrix-doc/pull/3030) experimental client and federation API endpoints to get the closest event to a given timestamp. diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 8b098ad48d..d78a15097c 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -46,3 +46,6 @@ class ExperimentalConfig(Config): # MSC3266 (room summary api) self.msc3266_enabled: bool = experimental.get("msc3266_enabled", False) + + # MSC3030 (Jump to date API endpoint) + self.msc3030_enabled: bool = experimental.get("msc3030_enabled", False) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index bc3f96c1fc..be1423da24 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -1517,6 +1517,83 @@ class FederationClient(FederationBase): self._get_room_hierarchy_cache[(room_id, suggested_only)] = result return result + async def timestamp_to_event( + self, destination: str, room_id: str, timestamp: int, direction: str + ) -> "TimestampToEventResponse": + """ + Calls a remote federating server at `destination` asking for their + closest event to the given timestamp in the given direction. Also + validates the response to always return the expected keys or raises an + error. + + Args: + destination: Domain name of the remote homeserver + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + A parsed TimestampToEventResponse including the closest event_id + and origin_server_ts + + Raises: + Various exceptions when the request fails + InvalidResponseError when the response does not have the correct + keys or wrong types + """ + remote_response = await self.transport_layer.timestamp_to_event( + destination, room_id, timestamp, direction + ) + + if not isinstance(remote_response, dict): + raise InvalidResponseError( + "Response must be a JSON dictionary but received %r" % remote_response + ) + + try: + return TimestampToEventResponse.from_json_dict(remote_response) + except ValueError as e: + raise InvalidResponseError(str(e)) + + +@attr.s(frozen=True, slots=True, auto_attribs=True) +class TimestampToEventResponse: + """Typed response dictionary for the federation /timestamp_to_event endpoint""" + + event_id: str + origin_server_ts: int + + # the raw data, including the above keys + data: JsonDict + + @classmethod + def from_json_dict(cls, d: JsonDict) -> "TimestampToEventResponse": + """Parsed response from the federation /timestamp_to_event endpoint + + Args: + d: JSON object response to be parsed + + Raises: + ValueError if d does not the correct keys or they are the wrong types + """ + + event_id = d.get("event_id") + if not isinstance(event_id, str): + raise ValueError( + "Invalid response: 'event_id' must be a str but received %r" % event_id + ) + + origin_server_ts = d.get("origin_server_ts") + if not isinstance(origin_server_ts, int): + raise ValueError( + "Invalid response: 'origin_server_ts' must be a int but received %r" + % origin_server_ts + ) + + return cls(event_id, origin_server_ts, d) + @attr.s(frozen=True, slots=True, auto_attribs=True) class FederationSpaceSummaryEventResult: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 8fbc75aa65..cce85526e7 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -110,6 +110,7 @@ class FederationServer(FederationBase): super().__init__(hs) self.handler = hs.get_federation_handler() + self.storage = hs.get_storage() self._federation_event_handler = hs.get_federation_event_handler() self.state = hs.get_state_handler() self._event_auth_handler = hs.get_event_auth_handler() @@ -200,6 +201,48 @@ class FederationServer(FederationBase): return 200, res + async def on_timestamp_to_event_request( + self, origin: str, room_id: str, timestamp: int, direction: str + ) -> Tuple[int, Dict[str, Any]]: + """When we receive a federated `/timestamp_to_event` request, + handle all of the logic for validating and fetching the event. + + Args: + origin: The server we received the event from + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + Tuple indicating the response status code and dictionary response + body including `event_id`. + """ + with (await self._server_linearizer.queue((origin, room_id))): + origin_host, _ = parse_server_name(origin) + await self.check_server_matches_acl(origin_host, room_id) + + # We only try to fetch data from the local database + event_id = await self.store.get_event_id_for_timestamp( + room_id, timestamp, direction + ) + if event_id: + event = await self.store.get_event( + event_id, allow_none=False, allow_rejected=False + ) + + return 200, { + "event_id": event_id, + "origin_server_ts": event.origin_server_ts, + } + + raise SynapseError( + 404, + "Unable to find event from %s in direction %s" % (timestamp, direction), + errcode=Codes.NOT_FOUND, + ) + async def on_incoming_transaction( self, origin: str, diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index fe29bcfd4b..d1f4be641d 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -148,6 +148,42 @@ class TransportLayerClient: destination, path=path, args=args, try_trailing_slash_on_400=True ) + @log_function + async def timestamp_to_event( + self, destination: str, room_id: str, timestamp: int, direction: str + ) -> Union[JsonDict, List]: + """ + Calls a remote federating server at `destination` asking for their + closest event to the given timestamp in the given direction. + + Args: + destination: Domain name of the remote homeserver + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + Response dict received from the remote homeserver. + + Raises: + Various exceptions when the request fails + """ + path = _create_path( + FEDERATION_UNSTABLE_PREFIX, + "/org.matrix.msc3030/timestamp_to_event/%s", + room_id, + ) + + args = {"ts": [str(timestamp)], "dir": [direction]} + + remote_response = await self.client.get_json( + destination, path=path, args=args, try_trailing_slash_on_400=True + ) + + return remote_response + @log_function async def send_transaction( self, diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py index c32539bf5a..abcb8728f5 100644 --- a/synapse/federation/transport/server/__init__.py +++ b/synapse/federation/transport/server/__init__.py @@ -22,7 +22,10 @@ from synapse.federation.transport.server._base import ( Authenticator, BaseFederationServlet, ) -from synapse.federation.transport.server.federation import FEDERATION_SERVLET_CLASSES +from synapse.federation.transport.server.federation import ( + FEDERATION_SERVLET_CLASSES, + FederationTimestampLookupServlet, +) from synapse.federation.transport.server.groups_local import GROUP_LOCAL_SERVLET_CLASSES from synapse.federation.transport.server.groups_server import ( GROUP_SERVER_SERVLET_CLASSES, @@ -324,6 +327,13 @@ def register_servlets( ) for servletclass in DEFAULT_SERVLET_GROUPS[servlet_group]: + # Only allow the `/timestamp_to_event` servlet if msc3030 is enabled + if ( + servletclass == FederationTimestampLookupServlet + and not hs.config.experimental.msc3030_enabled + ): + continue + servletclass( hs=hs, authenticator=authenticator, diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py index 66e915228c..77bfd88ad0 100644 --- a/synapse/federation/transport/server/federation.py +++ b/synapse/federation/transport/server/federation.py @@ -174,6 +174,46 @@ class FederationBackfillServlet(BaseFederationServerServlet): return await self.handler.on_backfill_request(origin, room_id, versions, limit) +class FederationTimestampLookupServlet(BaseFederationServerServlet): + """ + API endpoint to fetch the `event_id` of the closest event to the given + timestamp (`ts` query parameter) in the given direction (`dir` query + parameter). + + Useful for other homeservers when they're unable to find an event locally. + + `ts` is a timestamp in milliseconds where we will find the closest event in + the given direction. + + `dir` can be `f` or `b` to indicate forwards and backwards in time from the + given timestamp. + + GET /_matrix/federation/unstable/org.matrix.msc3030/timestamp_to_event/?ts=&dir= + { + "event_id": ... + } + """ + + PATH = "/timestamp_to_event/(?P[^/]*)/?" + PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc3030" + + async def on_GET( + self, + origin: str, + content: Literal[None], + query: Dict[bytes, List[bytes]], + room_id: str, + ) -> Tuple[int, JsonDict]: + timestamp = parse_integer_from_args(query, "ts", required=True) + direction = parse_string_from_args( + query, "dir", default="f", allowed_values=["f", "b"], required=True + ) + + return await self.handler.on_timestamp_to_event_request( + origin, room_id, timestamp, direction + ) + + class FederationQueryServlet(BaseFederationServerServlet): PATH = "/query/(?P[^/]*)" @@ -683,6 +723,7 @@ FEDERATION_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( FederationStateV1Servlet, FederationStateIdsServlet, FederationBackfillServlet, + FederationTimestampLookupServlet, FederationQueryServlet, FederationMakeJoinServlet, FederationMakeLeaveServlet, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 3112cc88b1..1ea837d082 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -68,6 +68,37 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]: + """Get joined domains from state + + Args: + state: State map from type/state key to event. + + Returns: + Returns a list of servers with the lowest depth of their joins. + Sorted by lowest depth first. + """ + joined_users = [ + (state_key, int(event.depth)) + for (e_type, state_key), event in state.items() + if e_type == EventTypes.Member and event.membership == Membership.JOIN + ] + + joined_domains: Dict[str, int] = {} + for u, d in joined_users: + try: + dom = get_domain_from_id(u) + old_d = joined_domains.get(dom) + if old_d: + joined_domains[dom] = min(d, old_d) + else: + joined_domains[dom] = d + except Exception: + pass + + return sorted(joined_domains.items(), key=lambda d: d[1]) + + class FederationHandler: """Handles general incoming federation requests @@ -268,36 +299,6 @@ class FederationHandler: curr_state = await self.state_handler.get_current_state(room_id) - def get_domains_from_state(state: StateMap[EventBase]) -> List[Tuple[str, int]]: - """Get joined domains from state - - Args: - state: State map from type/state key to event. - - Returns: - Returns a list of servers with the lowest depth of their joins. - Sorted by lowest depth first. - """ - joined_users = [ - (state_key, int(event.depth)) - for (e_type, state_key), event in state.items() - if e_type == EventTypes.Member and event.membership == Membership.JOIN - ] - - joined_domains: Dict[str, int] = {} - for u, d in joined_users: - try: - dom = get_domain_from_id(u) - old_d = joined_domains.get(dom) - if old_d: - joined_domains[dom] = min(d, old_d) - else: - joined_domains[dom] = d - except Exception: - pass - - return sorted(joined_domains.items(), key=lambda d: d[1]) - curr_domains = get_domains_from_state(curr_state) likely_domains = [ diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 88053f9869..2bcdf32dcc 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -46,6 +46,7 @@ from synapse.api.constants import ( from synapse.api.errors import ( AuthError, Codes, + HttpResponseException, LimitExceededError, NotFoundError, StoreError, @@ -56,6 +57,8 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase from synapse.events.utils import copy_power_levels_contents +from synapse.federation.federation_client import InvalidResponseError +from synapse.handlers.federation import get_domains_from_state from synapse.rest.admin._base import assert_user_is_admin from synapse.storage.state import StateFilter from synapse.streams import EventSource @@ -1220,6 +1223,147 @@ class RoomContextHandler: return results +class TimestampLookupHandler: + def __init__(self, hs: "HomeServer"): + self.server_name = hs.hostname + self.store = hs.get_datastore() + self.state_handler = hs.get_state_handler() + self.federation_client = hs.get_federation_client() + + async def get_event_for_timestamp( + self, + requester: Requester, + room_id: str, + timestamp: int, + direction: str, + ) -> Tuple[str, int]: + """Find the closest event to the given timestamp in the given direction. + If we can't find an event locally or the event we have locally is next to a gap, + it will ask other federated homeservers for an event. + + Args: + requester: The user making the request according to the access token + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + A tuple containing the `event_id` closest to the given timestamp in + the given direction and the `origin_server_ts`. + + Raises: + SynapseError if unable to find any event locally in the given direction + """ + + local_event_id = await self.store.get_event_id_for_timestamp( + room_id, timestamp, direction + ) + logger.debug( + "get_event_for_timestamp: locally, we found event_id=%s closest to timestamp=%s", + local_event_id, + timestamp, + ) + + # Check for gaps in the history where events could be hiding in between + # the timestamp given and the event we were able to find locally + is_event_next_to_backward_gap = False + is_event_next_to_forward_gap = False + if local_event_id: + local_event = await self.store.get_event( + local_event_id, allow_none=False, allow_rejected=False + ) + + if direction == "f": + # We only need to check for a backward gap if we're looking forwards + # to ensure there is nothing in between. + is_event_next_to_backward_gap = ( + await self.store.is_event_next_to_backward_gap(local_event) + ) + elif direction == "b": + # We only need to check for a forward gap if we're looking backwards + # to ensure there is nothing in between + is_event_next_to_forward_gap = ( + await self.store.is_event_next_to_forward_gap(local_event) + ) + + # If we found a gap, we should probably ask another homeserver first + # about more history in between + if ( + not local_event_id + or is_event_next_to_backward_gap + or is_event_next_to_forward_gap + ): + logger.debug( + "get_event_for_timestamp: locally, we found event_id=%s closest to timestamp=%s which is next to a gap in event history so we're asking other homeservers first", + local_event_id, + timestamp, + ) + + # Find other homeservers from the given state in the room + curr_state = await self.state_handler.get_current_state(room_id) + curr_domains = get_domains_from_state(curr_state) + likely_domains = [ + domain for domain, depth in curr_domains if domain != self.server_name + ] + + # Loop through each homeserver candidate until we get a succesful response + for domain in likely_domains: + try: + remote_response = await self.federation_client.timestamp_to_event( + domain, room_id, timestamp, direction + ) + logger.debug( + "get_event_for_timestamp: response from domain(%s)=%s", + domain, + remote_response, + ) + + # TODO: Do we want to persist this as an extremity? + # TODO: I think ideally, we would try to backfill from + # this event and run this whole + # `get_event_for_timestamp` function again to make sure + # they didn't give us an event from their gappy history. + remote_event_id = remote_response.event_id + origin_server_ts = remote_response.origin_server_ts + + # Only return the remote event if it's closer than the local event + if not local_event or ( + abs(origin_server_ts - timestamp) + < abs(local_event.origin_server_ts - timestamp) + ): + return remote_event_id, origin_server_ts + except (HttpResponseException, InvalidResponseError) as ex: + # Let's not put a high priority on some other homeserver + # failing to respond or giving a random response + logger.debug( + "Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s", + domain, + type(ex).__name__, + ex, + ex.args, + ) + except Exception as ex: + # But we do want to see some exceptions in our code + logger.warning( + "Failed to fetch /timestamp_to_event from %s because of exception(%s) %s args=%s", + domain, + type(ex).__name__, + ex, + ex.args, + ) + + if not local_event_id: + raise SynapseError( + 404, + "Unable to find event from %s in direction %s" % (timestamp, direction), + errcode=Codes.NOT_FOUND, + ) + + return local_event_id, local_event.origin_server_ts + + class RoomEventSource(EventSource[RoomStreamToken, EventBase]): def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 91ba93372c..6dd9b9ad03 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -79,6 +79,35 @@ def parse_integer( return parse_integer_from_args(args, name, default, required) +@overload +def parse_integer_from_args( + args: Mapping[bytes, Sequence[bytes]], + name: str, + default: Optional[int] = None, +) -> Optional[int]: + ... + + +@overload +def parse_integer_from_args( + args: Mapping[bytes, Sequence[bytes]], + name: str, + *, + required: Literal[True], +) -> int: + ... + + +@overload +def parse_integer_from_args( + args: Mapping[bytes, Sequence[bytes]], + name: str, + default: Optional[int] = None, + required: bool = False, +) -> Optional[int]: + ... + + def parse_integer_from_args( args: Mapping[bytes, Sequence[bytes]], name: str, diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 99f303c88e..3598967be0 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -1070,6 +1070,62 @@ def register_txn_path( ) +class TimestampLookupRestServlet(RestServlet): + """ + API endpoint to fetch the `event_id` of the closest event to the given + timestamp (`ts` query parameter) in the given direction (`dir` query + parameter). + + Useful for cases like jump to date so you can start paginating messages from + a given date in the archive. + + `ts` is a timestamp in milliseconds where we will find the closest event in + the given direction. + + `dir` can be `f` or `b` to indicate forwards and backwards in time from the + given timestamp. + + GET /_matrix/client/unstable/org.matrix.msc3030/rooms//timestamp_to_event?ts=&dir= + { + "event_id": ... + } + """ + + PATTERNS = ( + re.compile( + "^/_matrix/client/unstable/org.matrix.msc3030" + "/rooms/(?P[^/]*)/timestamp_to_event$" + ), + ) + + def __init__(self, hs: "HomeServer"): + super().__init__() + self._auth = hs.get_auth() + self._store = hs.get_datastore() + self.timestamp_lookup_handler = hs.get_timestamp_lookup_handler() + + async def on_GET( + self, request: SynapseRequest, room_id: str + ) -> Tuple[int, JsonDict]: + requester = await self._auth.get_user_by_req(request) + await self._auth.check_user_in_room(room_id, requester.user.to_string()) + + timestamp = parse_integer(request, "ts", required=True) + direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"]) + + ( + event_id, + origin_server_ts, + ) = await self.timestamp_lookup_handler.get_event_for_timestamp( + requester, room_id, timestamp, direction + ) + + return 200, { + "event_id": event_id, + "origin_server_ts": origin_server_ts, + } + + class RoomSpaceSummaryRestServlet(RestServlet): PATTERNS = ( re.compile( @@ -1239,6 +1295,8 @@ def register_servlets( RoomAliasListServlet(hs).register(http_server) SearchRestServlet(hs).register(http_server) RoomCreateRestServlet(hs).register(http_server) + if hs.config.experimental.msc3030_enabled: + TimestampLookupRestServlet(hs).register(http_server) # Some servlets only get registered for the main process. if not is_worker: diff --git a/synapse/server.py b/synapse/server.py index 877eba6c08..185e40e4da 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -97,6 +97,7 @@ from synapse.handlers.room import ( RoomContextHandler, RoomCreationHandler, RoomShutdownHandler, + TimestampLookupHandler, ) from synapse.handlers.room_batch import RoomBatchHandler from synapse.handlers.room_list import RoomListHandler @@ -728,6 +729,10 @@ class HomeServer(metaclass=abc.ABCMeta): def get_room_context_handler(self) -> RoomContextHandler: return RoomContextHandler(self) + @cache_in_self + def get_timestamp_lookup_handler(self) -> TimestampLookupHandler: + return TimestampLookupHandler(self) + @cache_in_self def get_registration_handler(self) -> RegistrationHandler: return RegistrationHandler(self) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 4cefc0a07e..fd19674f93 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1762,3 +1762,198 @@ class EventsWorkerStore(SQLBaseStore): "_cleanup_old_transaction_ids", _cleanup_old_transaction_ids_txn, ) + + async def is_event_next_to_backward_gap(self, event: EventBase) -> bool: + """Check if the given event is next to a backward gap of missing events. + A(False)--->B(False)--->C(True)---> + + Args: + room_id: room where the event lives + event_id: event to check + + Returns: + Boolean indicating whether it's an extremity + """ + + def is_event_next_to_backward_gap_txn(txn: LoggingTransaction) -> bool: + # If the event in question has any of its prev_events listed as a + # backward extremity, it's next to a gap. + # + # We can't just check the backward edges in `event_edges` because + # when we persist events, we will also record the prev_events as + # edges to the event in question regardless of whether we have those + # prev_events yet. We need to check whether those prev_events are + # backward extremities, also known as gaps, that need to be + # backfilled. + backward_extremity_query = """ + SELECT 1 FROM event_backward_extremities + WHERE + room_id = ? + AND %s + LIMIT 1 + """ + + # If the event in question is a backward extremity or has any of its + # prev_events listed as a backward extremity, it's next to a + # backward gap. + clause, args = make_in_list_sql_clause( + self.database_engine, + "event_id", + [event.event_id] + list(event.prev_event_ids()), + ) + + txn.execute(backward_extremity_query % (clause,), [event.room_id] + args) + backward_extremities = txn.fetchall() + + # We consider any backward extremity as a backward gap + if len(backward_extremities): + return True + + return False + + return await self.db_pool.runInteraction( + "is_event_next_to_backward_gap_txn", + is_event_next_to_backward_gap_txn, + ) + + async def is_event_next_to_forward_gap(self, event: EventBase) -> bool: + """Check if the given event is next to a forward gap of missing events. + The gap in front of the latest events is not considered a gap. + A(False)--->B(False)--->C(False)---> + A(False)--->B(False)---> --->D(True)--->E(False) + + Args: + room_id: room where the event lives + event_id: event to check + + Returns: + Boolean indicating whether it's an extremity + """ + + def is_event_next_to_gap_txn(txn: LoggingTransaction) -> bool: + # If the event in question is a forward extremity, we will just + # consider any potential forward gap as not a gap since it's one of + # the latest events in the room. + # + # `event_forward_extremities` does not include backfilled or outlier + # events so we can't rely on it to find forward gaps. We can only + # use it to determine whether a message is the latest in the room. + # + # We can't combine this query with the `forward_edge_query` below + # because if the event in question has no forward edges (isn't + # referenced by any other event's prev_events) but is in + # `event_forward_extremities`, we don't want to return 0 rows and + # say it's next to a gap. + forward_extremity_query = """ + SELECT 1 FROM event_forward_extremities + WHERE + room_id = ? + AND event_id = ? + LIMIT 1 + """ + + # Check to see whether the event in question is already referenced + # by another event. If we don't see any edges, we're next to a + # forward gap. + forward_edge_query = """ + SELECT 1 FROM event_edges + /* Check to make sure the event referencing our event in question is not rejected */ + LEFT JOIN rejections ON event_edges.event_id == rejections.event_id + WHERE + event_edges.room_id = ? + AND event_edges.prev_event_id = ? + /* It's not a valid edge if the event referencing our event in + * question is rejected. + */ + AND rejections.event_id IS NULL + LIMIT 1 + """ + + # We consider any forward extremity as the latest in the room and + # not a forward gap. + # + # To expand, even though there is technically a gap at the front of + # the room where the forward extremities are, we consider those the + # latest messages in the room so asking other homeservers for more + # is useless. The new latest messages will just be federated as + # usual. + txn.execute(forward_extremity_query, (event.room_id, event.event_id)) + forward_extremities = txn.fetchall() + if len(forward_extremities): + return False + + # If there are no forward edges to the event in question (another + # event hasn't referenced this event in their prev_events), then we + # assume there is a forward gap in the history. + txn.execute(forward_edge_query, (event.room_id, event.event_id)) + forward_edges = txn.fetchall() + if not len(forward_edges): + return True + + return False + + return await self.db_pool.runInteraction( + "is_event_next_to_gap_txn", + is_event_next_to_gap_txn, + ) + + async def get_event_id_for_timestamp( + self, room_id: str, timestamp: int, direction: str + ) -> Optional[str]: + """Find the closest event to the given timestamp in the given direction. + + Args: + room_id: Room to fetch the event from + timestamp: The point in time (inclusive) we should navigate from in + the given direction to find the closest event. + direction: ["f"|"b"] to indicate whether we should navigate forward + or backward from the given timestamp to find the closest event. + + Returns: + The closest event_id otherwise None if we can't find any event in + the given direction. + """ + + sql_template = """ + SELECT event_id FROM events + LEFT JOIN rejections USING (event_id) + WHERE + origin_server_ts %s ? + AND room_id = ? + /* Make sure event is not rejected */ + AND rejections.event_id IS NULL + ORDER BY origin_server_ts %s + LIMIT 1; + """ + + def get_event_id_for_timestamp_txn(txn: LoggingTransaction) -> Optional[str]: + if direction == "b": + # Find closest event *before* a given timestamp. We use descending + # (which gives values largest to smallest) because we want the + # largest possible timestamp *before* the given timestamp. + comparison_operator = "<=" + order = "DESC" + else: + # Find closest event *after* a given timestamp. We use ascending + # (which gives values smallest to largest) because we want the + # closest possible timestamp *after* the given timestamp. + comparison_operator = ">=" + order = "ASC" + + txn.execute( + sql_template % (comparison_operator, order), (timestamp, room_id) + ) + row = txn.fetchone() + if row: + (event_id,) = row + return event_id + + return None + + if direction not in ("f", "b"): + raise ValueError("Unknown direction: %s" % (direction,)) + + return await self.db_pool.runInteraction( + "get_event_id_for_timestamp_txn", + get_event_id_for_timestamp_txn, + ) -- cgit 1.5.1 From 435f04480728c5d982e1a63c1b2777784bf9cd26 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 2 Dec 2021 15:30:05 +0000 Subject: Add type annotations to `tests.storage.test_appservice`. (#11488) --- changelog.d/11488.misc | 1 + mypy.ini | 1 - synapse/appservice/__init__.py | 3 +- synapse/storage/databases/main/appservice.py | 6 +- tests/storage/test_appservice.py | 140 ++++++++++++++++++--------- 5 files changed, 98 insertions(+), 53 deletions(-) create mode 100644 changelog.d/11488.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11488.misc b/changelog.d/11488.misc new file mode 100644 index 0000000000..c14a7d2e98 --- /dev/null +++ b/changelog.d/11488.misc @@ -0,0 +1 @@ +Add type annotations to `tests.storage.test_appservice`. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 51056a8f64..99b5c41ad6 100644 --- a/mypy.ini +++ b/mypy.ini @@ -111,7 +111,6 @@ exclude = (?x) |tests/server_notices/test_resource_limits_server_notices.py |tests/state/test_v2.py |tests/storage/test_account_data.py - |tests/storage/test_appservice.py |tests/storage/test_background_update.py |tests/storage/test_base.py |tests/storage/test_client_ips.py diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index 6504c6bd3f..f9d3bd337d 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. import logging import re +from enum import Enum from typing import TYPE_CHECKING, Iterable, List, Match, Optional from synapse.api.constants import EventTypes @@ -27,7 +28,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class ApplicationServiceState: +class ApplicationServiceState(Enum): DOWN = "down" UP = "up" diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index baec35ee27..4a883dc166 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -143,7 +143,7 @@ class ApplicationServiceTransactionWorkerStore( A list of ApplicationServices, which may be empty. """ results = await self.db_pool.simple_select_list( - "application_services_state", {"state": state}, ["as_id"] + "application_services_state", {"state": state.value}, ["as_id"] ) # NB: This assumes this class is linked with ApplicationServiceStore as_list = self.get_app_services() @@ -173,7 +173,7 @@ class ApplicationServiceTransactionWorkerStore( desc="get_appservice_state", ) if result: - return result.get("state") + return ApplicationServiceState(result.get("state")) return None async def set_appservice_state( @@ -186,7 +186,7 @@ class ApplicationServiceTransactionWorkerStore( state: The connectivity state to apply. """ await self.db_pool.simple_upsert( - "application_services_state", {"as_id": service.id}, {"state": state} + "application_services_state", {"as_id": service.id}, {"state": state.value} ) async def create_appservice_txn( diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index f26d5acf9c..4b20a28ca2 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -14,19 +14,25 @@ import json import os import tempfile +from typing import Any, Generator, List, Optional, cast from unittest.mock import Mock import yaml from twisted.internet import defer +from twisted.internet.defer import Deferred +from twisted.test.proto_helpers import MemoryReactor from synapse.appservice import ApplicationService, ApplicationServiceState from synapse.config._base import ConfigError +from synapse.events import EventBase +from synapse.server import HomeServer from synapse.storage.database import DatabasePool, make_conn from synapse.storage.databases.main.appservice import ( ApplicationServiceStore, ApplicationServiceTransactionStore, ) +from synapse.util import Clock from tests import unittest from tests.test_utils import make_awaitable @@ -36,7 +42,7 @@ from tests.utils import setup_test_homeserver class ApplicationServiceStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): - self.as_yaml_files = [] + self.as_yaml_files: List[str] = [] hs = yield setup_test_homeserver( self.addCleanup, federation_sender=Mock(), federation_client=Mock() ) @@ -58,7 +64,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): database, make_conn(database._database_config, database.engine, "test"), hs ) - def tearDown(self): + def tearDown(self) -> None: # TODO: suboptimal that we need to create files for tests! for f in self.as_yaml_files: try: @@ -66,7 +72,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): except Exception: pass - def _add_appservice(self, as_token, id, url, hs_token, sender): + def _add_appservice(self, as_token, id, url, hs_token, sender) -> None: as_yaml = { "url": url, "as_token": as_token, @@ -80,12 +86,13 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): outfile.write(yaml.dump(as_yaml)) self.as_yaml_files.append(as_token) - def test_retrieve_unknown_service_token(self): + def test_retrieve_unknown_service_token(self) -> None: service = self.store.get_app_service_by_token("invalid_token") self.assertEquals(service, None) - def test_retrieval_of_service(self): + def test_retrieval_of_service(self) -> None: stored_service = self.store.get_app_service_by_token(self.as_token) + assert stored_service is not None self.assertEquals(stored_service.token, self.as_token) self.assertEquals(stored_service.id, self.as_id) self.assertEquals(stored_service.url, self.as_url) @@ -93,7 +100,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): self.assertEquals(stored_service.namespaces[ApplicationService.NS_ROOMS], []) self.assertEquals(stored_service.namespaces[ApplicationService.NS_USERS], []) - def test_retrieval_of_all_services(self): + def test_retrieval_of_all_services(self) -> None: services = self.store.get_app_services() self.assertEquals(len(services), 3) @@ -101,7 +108,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): @defer.inlineCallbacks def setUp(self): - self.as_yaml_files = [] + self.as_yaml_files: List[str] = [] hs = yield setup_test_homeserver( self.addCleanup, federation_sender=Mock(), federation_client=Mock() @@ -117,7 +124,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): {"token": "gamma_tok", "url": "https://gamma.com", "id": "id_gamma"}, ] for s in self.as_list: - yield self._add_service(s["url"], s["token"], s["id"]) + self._add_service(s["url"], s["token"], s["id"]) self.as_yaml_files = [] @@ -131,7 +138,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): database, make_conn(db_config, self.engine, "test"), hs ) - def _add_service(self, url, as_token, id): + def _add_service(self, url, as_token, id) -> None: as_yaml = { "url": url, "as_token": as_token, @@ -145,13 +152,15 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): outfile.write(yaml.dump(as_yaml)) self.as_yaml_files.append(as_token) - def _set_state(self, id, state, txn=None): + def _set_state( + self, id: str, state: ApplicationServiceState, txn: Optional[int] = None + ): return self.db_pool.runOperation( self.engine.convert_param_style( "INSERT INTO application_services_state(as_id, state, last_txn) " "VALUES(?,?,?)" ), - (id, state, txn), + (id, state.value, txn), ) def _insert_txn(self, as_id, txn_id, events): @@ -169,24 +178,30 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): "INSERT INTO application_services_state(as_id, last_txn, state) " "VALUES(?,?,?)" ), - (as_id, txn_id, ApplicationServiceState.UP), + (as_id, txn_id, ApplicationServiceState.UP.value), ) @defer.inlineCallbacks - def test_get_appservice_state_none(self): + def test_get_appservice_state_none( + self, + ) -> Generator["Deferred[object]", object, None]: service = Mock(id="999") state = yield defer.ensureDeferred(self.store.get_appservice_state(service)) self.assertEquals(None, state) @defer.inlineCallbacks - def test_get_appservice_state_up(self): + def test_get_appservice_state_up( + self, + ) -> Generator["Deferred[object]", object, None]: yield self._set_state(self.as_list[0]["id"], ApplicationServiceState.UP) service = Mock(id=self.as_list[0]["id"]) state = yield defer.ensureDeferred(self.store.get_appservice_state(service)) self.assertEquals(ApplicationServiceState.UP, state) @defer.inlineCallbacks - def test_get_appservice_state_down(self): + def test_get_appservice_state_down( + self, + ) -> Generator["Deferred[object]", object, None]: yield self._set_state(self.as_list[0]["id"], ApplicationServiceState.UP) yield self._set_state(self.as_list[1]["id"], ApplicationServiceState.DOWN) yield self._set_state(self.as_list[2]["id"], ApplicationServiceState.DOWN) @@ -195,14 +210,18 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(ApplicationServiceState.DOWN, state) @defer.inlineCallbacks - def test_get_appservices_by_state_none(self): + def test_get_appservices_by_state_none( + self, + ) -> Generator["Deferred[object]", Any, None]: services = yield defer.ensureDeferred( self.store.get_appservices_by_state(ApplicationServiceState.DOWN) ) self.assertEquals(0, len(services)) @defer.inlineCallbacks - def test_set_appservices_state_down(self): + def test_set_appservices_state_down( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[1]["id"]) yield defer.ensureDeferred( self.store.set_appservice_state(service, ApplicationServiceState.DOWN) @@ -211,12 +230,14 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.engine.convert_param_style( "SELECT as_id FROM application_services_state WHERE state=?" ), - (ApplicationServiceState.DOWN,), + (ApplicationServiceState.DOWN.value,), ) self.assertEquals(service.id, rows[0][0]) @defer.inlineCallbacks - def test_set_appservices_state_multiple_up(self): + def test_set_appservices_state_multiple_up( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[1]["id"]) yield defer.ensureDeferred( self.store.set_appservice_state(service, ApplicationServiceState.UP) @@ -231,14 +252,16 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.engine.convert_param_style( "SELECT as_id FROM application_services_state WHERE state=?" ), - (ApplicationServiceState.UP,), + (ApplicationServiceState.UP.value,), ) self.assertEquals(service.id, rows[0][0]) @defer.inlineCallbacks - def test_create_appservice_txn_first(self): + def test_create_appservice_txn_first( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[0]["id"]) - events = [Mock(event_id="e1"), Mock(event_id="e2")] + events = cast(List[EventBase], [Mock(event_id="e1"), Mock(event_id="e2")]) txn = yield defer.ensureDeferred( self.store.create_appservice_txn(service, events, []) ) @@ -247,9 +270,11 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(txn.service, service) @defer.inlineCallbacks - def test_create_appservice_txn_older_last_txn(self): + def test_create_appservice_txn_older_last_txn( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[0]["id"]) - events = [Mock(event_id="e1"), Mock(event_id="e2")] + events = cast(List[EventBase], [Mock(event_id="e1"), Mock(event_id="e2")]) yield self._set_last_txn(service.id, 9643) # AS is falling behind yield self._insert_txn(service.id, 9644, events) yield self._insert_txn(service.id, 9645, events) @@ -261,9 +286,11 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(txn.service, service) @defer.inlineCallbacks - def test_create_appservice_txn_up_to_date_last_txn(self): + def test_create_appservice_txn_up_to_date_last_txn( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[0]["id"]) - events = [Mock(event_id="e1"), Mock(event_id="e2")] + events = cast(List[EventBase], [Mock(event_id="e1"), Mock(event_id="e2")]) yield self._set_last_txn(service.id, 9643) txn = yield defer.ensureDeferred( self.store.create_appservice_txn(service, events, []) @@ -273,9 +300,11 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(txn.service, service) @defer.inlineCallbacks - def test_create_appservice_txn_up_fuzzing(self): + def test_create_appservice_txn_up_fuzzing( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[0]["id"]) - events = [Mock(event_id="e1"), Mock(event_id="e2")] + events = cast(List[EventBase], [Mock(event_id="e1"), Mock(event_id="e2")]) yield self._set_last_txn(service.id, 9643) # dump in rows with higher IDs to make sure the queries aren't wrong. @@ -296,7 +325,9 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(txn.service, service) @defer.inlineCallbacks - def test_complete_appservice_txn_first_txn(self): + def test_complete_appservice_txn_first_txn( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[0]["id"]) events = [Mock(event_id="e1"), Mock(event_id="e2")] txn_id = 1 @@ -324,7 +355,9 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(0, len(res)) @defer.inlineCallbacks - def test_complete_appservice_txn_existing_in_state_table(self): + def test_complete_appservice_txn_existing_in_state_table( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[0]["id"]) events = [Mock(event_id="e1"), Mock(event_id="e2")] txn_id = 5 @@ -342,7 +375,7 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): ) self.assertEquals(1, len(res)) self.assertEquals(txn_id, res[0][0]) - self.assertEquals(ApplicationServiceState.UP, res[0][1]) + self.assertEquals(ApplicationServiceState.UP.value, res[0][1]) res = yield self.db_pool.runQuery( self.engine.convert_param_style( @@ -353,20 +386,23 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(0, len(res)) @defer.inlineCallbacks - def test_get_oldest_unsent_txn_none(self): + def test_get_oldest_unsent_txn_none( + self, + ) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[0]["id"]) txn = yield defer.ensureDeferred(self.store.get_oldest_unsent_txn(service)) self.assertEquals(None, txn) @defer.inlineCallbacks - def test_get_oldest_unsent_txn(self): + def test_get_oldest_unsent_txn(self) -> Generator["Deferred[object]", Any, None]: service = Mock(id=self.as_list[0]["id"]) events = [Mock(event_id="e1"), Mock(event_id="e2")] other_events = [Mock(event_id="e5"), Mock(event_id="e6")] # we aren't testing store._base stuff here, so mock this out - self.store.get_events_as_list = Mock(return_value=make_awaitable(events)) + # (ignore needed because Mypy won't allow us to assign to a method otherwise) + self.store.get_events_as_list = Mock(return_value=make_awaitable(events)) # type: ignore[assignment] yield self._insert_txn(self.as_list[1]["id"], 9, other_events) yield self._insert_txn(service.id, 10, events) @@ -379,7 +415,9 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(events, txn.events) @defer.inlineCallbacks - def test_get_appservices_by_state_single(self): + def test_get_appservices_by_state_single( + self, + ) -> Generator["Deferred[object]", Any, None]: yield self._set_state(self.as_list[0]["id"], ApplicationServiceState.DOWN) yield self._set_state(self.as_list[1]["id"], ApplicationServiceState.UP) @@ -390,7 +428,9 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): self.assertEquals(self.as_list[0]["id"], services[0].id) @defer.inlineCallbacks - def test_get_appservices_by_state_multiple(self): + def test_get_appservices_by_state_multiple( + self, + ) -> Generator["Deferred[object]", Any, None]: yield self._set_state(self.as_list[0]["id"], ApplicationServiceState.DOWN) yield self._set_state(self.as_list[1]["id"], ApplicationServiceState.UP) yield self._set_state(self.as_list[2]["id"], ApplicationServiceState.DOWN) @@ -407,16 +447,20 @@ class ApplicationServiceTransactionStoreTestCase(unittest.TestCase): class ApplicationServiceStoreTypeStreamIds(unittest.HomeserverTestCase): - def make_homeserver(self, reactor, clock): + def make_homeserver(self, reactor, clock) -> "HomeServer": hs = self.setup_test_homeserver() return hs - def prepare(self, hs, reactor, clock): + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: self.service = Mock(id="foo") self.store = self.hs.get_datastore() - self.get_success(self.store.set_appservice_state(self.service, "up")) + self.get_success( + self.store.set_appservice_state(self.service, ApplicationServiceState.UP) + ) - def test_get_type_stream_id_for_appservice_no_value(self): + def test_get_type_stream_id_for_appservice_no_value(self) -> None: value = self.get_success( self.store.get_type_stream_id_for_appservice(self.service, "read_receipt") ) @@ -427,13 +471,13 @@ class ApplicationServiceStoreTypeStreamIds(unittest.HomeserverTestCase): ) self.assertEquals(value, 0) - def test_get_type_stream_id_for_appservice_invalid_type(self): + def test_get_type_stream_id_for_appservice_invalid_type(self) -> None: self.get_failure( self.store.get_type_stream_id_for_appservice(self.service, "foobar"), ValueError, ) - def test_set_type_stream_id_for_appservice(self): + def test_set_type_stream_id_for_appservice(self) -> None: read_receipt_value = 1024 self.get_success( self.store.set_type_stream_id_for_appservice( @@ -455,7 +499,7 @@ class ApplicationServiceStoreTypeStreamIds(unittest.HomeserverTestCase): ) self.assertEqual(result, read_receipt_value) - def test_set_type_stream_id_for_appservice_invalid_type(self): + def test_set_type_stream_id_for_appservice_invalid_type(self) -> None: self.get_failure( self.store.set_type_stream_id_for_appservice(self.service, "foobar", 1024), ValueError, @@ -464,12 +508,12 @@ class ApplicationServiceStoreTypeStreamIds(unittest.HomeserverTestCase): # required for ApplicationServiceTransactionStoreTestCase tests class TestTransactionStore(ApplicationServiceTransactionStore, ApplicationServiceStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn, hs) -> None: super().__init__(database, db_conn, hs) class ApplicationServiceStoreConfigTestCase(unittest.TestCase): - def _write_config(self, suffix, **kwargs): + def _write_config(self, suffix, **kwargs) -> str: vals = { "id": "id" + suffix, "url": "url" + suffix, @@ -486,7 +530,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): return path @defer.inlineCallbacks - def test_unique_works(self): + def test_unique_works(self) -> Generator["Deferred[object]", Any, None]: f1 = self._write_config(suffix="1") f2 = self._write_config(suffix="2") @@ -503,7 +547,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): ) @defer.inlineCallbacks - def test_duplicate_ids(self): + def test_duplicate_ids(self) -> Generator["Deferred[object]", Any, None]: f1 = self._write_config(id="id", suffix="1") f2 = self._write_config(id="id", suffix="2") @@ -528,7 +572,7 @@ class ApplicationServiceStoreConfigTestCase(unittest.TestCase): self.assertIn("id", str(e)) @defer.inlineCallbacks - def test_duplicate_as_tokens(self): + def test_duplicate_as_tokens(self) -> Generator["Deferred[object]", Any, None]: f1 = self._write_config(as_token="as_token", suffix="1") f2 = self._write_config(as_token="as_token", suffix="2") -- cgit 1.5.1 From d26808dd854006bd26a2366c675428ce0737238c Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 2 Dec 2021 20:58:32 +0000 Subject: Comments on the /sync tentacles (#11494) This mainly consists of docstrings and inline comments. There are one or two type annotations and variable renames thrown in while I was here. Co-authored-by: Patrick Cloke --- changelog.d/11494.misc | 1 + synapse/handlers/sync.py | 156 +++++++++++++++++++++++-------- synapse/storage/databases/main/stream.py | 15 ++- 3 files changed, 129 insertions(+), 43 deletions(-) create mode 100644 changelog.d/11494.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11494.misc b/changelog.d/11494.misc new file mode 100644 index 0000000000..7afd7d3a07 --- /dev/null +++ b/changelog.d/11494.misc @@ -0,0 +1 @@ +Add comments to various parts of the `/sync` handler. \ No newline at end of file diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 891435c14d..53d4627147 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -334,6 +334,19 @@ class SyncHandler: full_state: bool, cache_context: ResponseCacheContext[SyncRequestKey], ) -> SyncResult: + """The start of the machinery that produces a /sync response. + + See https://spec.matrix.org/v1.1/client-server-api/#syncing for full details. + + This method does high-level bookkeeping: + - tracking the kind of sync in the logging context + - deleting any to_device messages whose delivery has been acknowledged. + - deciding if we should dispatch an instant or delayed response + - marking the sync as being lazily loaded, if appropriate + + Computing the body of the response begins in the next method, + `current_sync_for_user`. + """ if since_token is None: sync_type = "initial_sync" elif full_state: @@ -363,7 +376,7 @@ class SyncHandler: sync_config, since_token, full_state=full_state ) else: - + # Otherwise, we wait for something to happen and report it to the user. async def current_sync_callback( before_token: StreamToken, after_token: StreamToken ) -> SyncResult: @@ -402,7 +415,12 @@ class SyncHandler: since_token: Optional[StreamToken] = None, full_state: bool = False, ) -> SyncResult: - """Get the sync for client needed to match what the server has now.""" + """Generates the response body of a sync result, represented as a SyncResult. + + This is a wrapper around `generate_sync_result` which starts an open tracing + span to track the sync. See `generate_sync_result` for the next part of your + indoctrination. + """ with start_active_span("current_sync_for_user"): log_kv({"since_token": since_token}) sync_result = await self.generate_sync_result( @@ -560,7 +578,7 @@ class SyncHandler: # that have happened since `since_key` up to `end_key`, so we # can just use `get_room_events_stream_for_room`. # Otherwise, we want to return the last N events in the room - # in toplogical ordering. + # in topological ordering. if since_key: events, end_key = await self.store.get_room_events_stream_for_room( room_id, @@ -1042,7 +1060,18 @@ class SyncHandler: since_token: Optional[StreamToken] = None, full_state: bool = False, ) -> SyncResult: - """Generates a sync result.""" + """Generates the response body of a sync result. + + This is represented by a `SyncResult` struct, which is built from small pieces + using a `SyncResultBuilder`. See also + https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync + the `sync_result_builder` is passed as a mutable ("inout") parameter to various + helper functions. These retrieve and process the data which forms the sync body, + often writing to the `sync_result_builder` to store their output. + + At the end, we transfer data from the `sync_result_builder` to a new `SyncResult` + instance to signify that the sync calculation is complete. + """ # NB: The now_token gets changed by some of the generate_sync_* methods, # this is due to some of the underlying streams not supporting the ability # to query up to a given point. @@ -1344,14 +1373,22 @@ class SyncHandler: async def _generate_sync_entry_for_account_data( self, sync_result_builder: "SyncResultBuilder" ) -> Dict[str, Dict[str, JsonDict]]: - """Generates the account data portion of the sync response. Populates - `sync_result_builder` with the result. + """Generates the account data portion of the sync response. + + Account data (called "Client Config" in the spec) can be set either globally + or for a specific room. Account data consists of a list of events which + accumulate state, much like a room. + + This function retrieves global and per-room account data. The former is written + to the given `sync_result_builder`. The latter is returned directly, to be + later written to the `sync_result_builder` on a room-by-room basis. Args: sync_result_builder Returns: - A dictionary containing the per room account data. + A dictionary whose keys (room ids) map to the per room account data for that + room. """ sync_config = sync_result_builder.sync_config user_id = sync_result_builder.sync_config.user.to_string() @@ -1359,7 +1396,7 @@ class SyncHandler: if since_token and not sync_result_builder.full_state: ( - account_data, + global_account_data, account_data_by_room, ) = await self.store.get_updated_account_data_for_user( user_id, since_token.account_data_key @@ -1370,23 +1407,23 @@ class SyncHandler: ) if push_rules_changed: - account_data["m.push_rules"] = await self.push_rules_for_user( + global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) else: ( - account_data, + global_account_data, account_data_by_room, ) = await self.store.get_account_data_for_user(sync_config.user.to_string()) - account_data["m.push_rules"] = await self.push_rules_for_user( + global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) account_data_for_user = await sync_config.filter_collection.filter_account_data( [ {"type": account_data_type, "content": content} - for account_data_type, content in account_data.items() + for account_data_type, content in global_account_data.items() ] ) @@ -1460,15 +1497,22 @@ class SyncHandler: """Generates the rooms portion of the sync response. Populates the `sync_result_builder` with the result. + In the response that reaches the client, rooms are divided into four categories: + `invite`, `join`, `knock`, `leave`. These aren't the same as the four sets of + room ids returned by this function. + Args: sync_result_builder account_data_by_room: Dictionary of per room account data Returns: - Returns a 4-tuple of - `(newly_joined_rooms, newly_joined_or_invited_users, - newly_left_rooms, newly_left_users)` + Returns a 4-tuple whose entries are: + - newly_joined_rooms + - newly_joined_or_invited_or_knocked_users + - newly_left_rooms + - newly_left_users """ + # Start by fetching all ephemeral events in rooms we've joined (if required). user_id = sync_result_builder.sync_config.user.to_string() block_all_room_ephemeral = ( sync_result_builder.since_token is None @@ -1590,6 +1634,8 @@ class SyncHandler: ) -> bool: """Returns whether there may be any new events that should be sent down the sync. Returns True if there are. + + Does not modify the `sync_result_builder`. """ user_id = sync_result_builder.sync_config.user.to_string() since_token = sync_result_builder.since_token @@ -1597,12 +1643,13 @@ class SyncHandler: assert since_token - # Get a list of membership change events that have happened. - rooms_changed = await self.store.get_membership_changes_for_user( + # Get a list of membership change events that have happened to the user + # requesting the sync. + membership_changes = await self.store.get_membership_changes_for_user( user_id, since_token.room_key, now_token.room_key ) - if rooms_changed: + if membership_changes: return True stream_id = since_token.room_key.stream @@ -1614,7 +1661,25 @@ class SyncHandler: async def _get_rooms_changed( self, sync_result_builder: "SyncResultBuilder", ignored_users: FrozenSet[str] ) -> _RoomChanges: - """Gets the the changes that have happened since the last sync.""" + """Determine the changes in rooms to report to the user. + + Ideally, we want to report all events whose stream ordering `s` lies in the + range `since_token < s <= now_token`, where the two tokens are read from the + sync_result_builder. + + If there are too many events in that range to report, things get complicated. + In this situation we return a truncated list of the most recent events, and + indicate in the response that there is a "gap" of omitted events. Additionally: + + - we include a "state_delta", to describe the changes in state over the gap, + - we include all membership events applying to the user making the request, + even those in the gap. + + See the spec for the rationale: + https://spec.matrix.org/v1.1/client-server-api/#syncing + + The sync_result_builder is not modified by this function. + """ user_id = sync_result_builder.sync_config.user.to_string() since_token = sync_result_builder.since_token now_token = sync_result_builder.now_token @@ -1622,21 +1687,36 @@ class SyncHandler: assert since_token - # Get a list of membership change events that have happened. - rooms_changed = await self.store.get_membership_changes_for_user( + # The spec + # https://spec.matrix.org/v1.1/client-server-api/#get_matrixclientv3sync + # notes that membership events need special consideration: + # + # > When a sync is limited, the server MUST return membership events for events + # > in the gap (between since and the start of the returned timeline), regardless + # > as to whether or not they are redundant. + # + # We fetch such events here, but we only seem to use them for categorising rooms + # as newly joined, newly left, invited or knocked. + # TODO: we've already called this function and ran this query in + # _have_rooms_changed. We could keep the results in memory to avoid a + # second query, at the cost of more complicated source code. + membership_change_events = await self.store.get_membership_changes_for_user( user_id, since_token.room_key, now_token.room_key ) mem_change_events_by_room_id: Dict[str, List[EventBase]] = {} - for event in rooms_changed: + for event in membership_change_events: mem_change_events_by_room_id.setdefault(event.room_id, []).append(event) - newly_joined_rooms = [] - newly_left_rooms = [] - room_entries = [] - invited = [] - knocked = [] + newly_joined_rooms: List[str] = [] + newly_left_rooms: List[str] = [] + room_entries: List[RoomSyncResultBuilder] = [] + invited: List[InvitedSyncResult] = [] + knocked: List[KnockedSyncResult] = [] for room_id, events in mem_change_events_by_room_id.items(): + # The body of this loop will add this room to at least one of the five lists + # above. Things get messy if you've e.g. joined, left, joined then left the + # room all in the same sync period. logger.debug( "Membership changes in %s: [%s]", room_id, @@ -1781,7 +1861,9 @@ class SyncHandler: timeline_limit = sync_config.filter_collection.timeline_limit() - # Get all events for rooms we're currently joined to. + # Get all events since the `from_key` in rooms we're currently joined to. + # If there are too many, we get the most recent events only. This leaves + # a "gap" in the timeline, as described by the spec for /sync. room_to_events = await self.store.get_room_events_stream_for_rooms( room_ids=sync_result_builder.joined_room_ids, from_key=since_token.room_key, @@ -1842,6 +1924,10 @@ class SyncHandler: ) -> _RoomChanges: """Returns entries for all rooms for the user. + Like `_get_rooms_changed`, but assumes the `since_token` is `None`. + + This function does not modify the sync_result_builder. + Args: sync_result_builder ignored_users: Set of users ignored by user. @@ -1853,16 +1939,9 @@ class SyncHandler: now_token = sync_result_builder.now_token sync_config = sync_result_builder.sync_config - membership_list = ( - Membership.INVITE, - Membership.KNOCK, - Membership.JOIN, - Membership.LEAVE, - Membership.BAN, - ) - room_list = await self.store.get_rooms_for_local_user_where_membership_is( - user_id=user_id, membership_list=membership_list + user_id=user_id, + membership_list=Membership.LIST, ) room_entries = [] @@ -2212,8 +2291,7 @@ def _calculate_state( # to only include membership events for the senders in the timeline. # In practice, we can do this by removing them from the p_ids list, # which is the list of relevant state we know we have already sent to the client. - # see https://github.com/matrix-org/synapse/pull/2970 - # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 + # see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 if lazy_load_members: p_ids.difference_update( diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 42dc807d17..57aab55259 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -497,7 +497,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): oldest `limit` events. Returns: - The list of events (in ascending order) and the token from the start + The list of events (in ascending stream order) and the token from the start of the chunk of events returned. """ if from_key == to_key: @@ -510,7 +510,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): if not has_changed: return [], from_key - def f(txn): + def f(txn: LoggingTransaction) -> List[_EventDictReturn]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -565,6 +565,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): async def get_membership_changes_for_user( self, user_id: str, from_key: RoomStreamToken, to_key: RoomStreamToken ) -> List[EventBase]: + """Fetch membership events for a given user. + + All such events whose stream ordering `s` lies in the range + `from_key < s <= to_key` are returned. Events are ordered by ascending stream + order. + """ + # Start by ruling out cases where a DB query is not necessary. if from_key == to_key: return [] @@ -575,7 +582,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): if not has_changed: return [] - def f(txn): + def f(txn: LoggingTransaction) -> List[_EventDictReturn]: # To handle tokens with a non-empty instance_map we fetch more # results than necessary and then filter down min_from_id = from_key.stream @@ -634,7 +641,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore, metaclass=abc.ABCMeta): Returns: A list of events and a token pointing to the start of the returned - events. The events returned are in ascending order. + events. The events returned are in ascending topological order. """ rows, token = await self.get_recent_event_ids_for_room( -- cgit 1.5.1 From 5640992d176a499204a0756b1677c9b1575b0a49 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 2 Dec 2021 22:42:58 +0000 Subject: Disambiguate queries on `state_key` (#11497) We're going to add a `state_key` column to the `events` table, so we need to add some disambiguation to queries which use it. --- changelog.d/11497.misc | 1 + synapse/storage/databases/main/event_federation.py | 4 ++-- synapse/storage/databases/main/events.py | 4 ++-- synapse/storage/databases/main/events_worker.py | 16 ++++++++-------- synapse/storage/databases/main/purge_events.py | 2 +- synapse/storage/databases/main/roommember.py | 4 ++-- synapse/storage/schema/__init__.py | 6 +++++- 7 files changed, 21 insertions(+), 16 deletions(-) create mode 100644 changelog.d/11497.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11497.misc b/changelog.d/11497.misc new file mode 100644 index 0000000000..c4393f6193 --- /dev/null +++ b/changelog.d/11497.misc @@ -0,0 +1 @@ +Preparation for database schema simplifications: disambiguate queries on `state_key`. diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ef5d1ef01e..9580a40785 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1552,9 +1552,9 @@ class EventFederationStore(EventFederationWorkerStore): DELETE FROM event_auth WHERE event_id IN ( SELECT event_id FROM events - LEFT JOIN state_events USING (room_id, event_id) + LEFT JOIN state_events AS se USING (room_id, event_id) WHERE ? <= stream_ordering AND stream_ordering < ? - AND state_key IS null + AND se.state_key IS null ) """ diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 4171b904eb..4e528612ea 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -575,9 +575,9 @@ class PersistEventsStore: # fetch their auth event info. while missing_auth_chains: sql = """ - SELECT event_id, events.type, state_key, chain_id, sequence_number + SELECT event_id, events.type, se.state_key, chain_id, sequence_number FROM events - INNER JOIN state_events USING (event_id) + INNER JOIN state_events AS se USING (event_id) LEFT JOIN event_auth_chains USING (event_id) WHERE """ diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index fd19674f93..c7b660ac5a 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1408,10 +1408,10 @@ class EventsWorkerStore(SQLBaseStore): ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: sql = ( "SELECT e.stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" + " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" " FROM events AS e" " LEFT JOIN redactions USING (event_id)" - " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN state_events AS se USING (event_id)" " LEFT JOIN event_relations USING (event_id)" " LEFT JOIN room_memberships USING (event_id)" " LEFT JOIN rejections USING (event_id)" @@ -1449,11 +1449,11 @@ class EventsWorkerStore(SQLBaseStore): ) -> List[Tuple[int, str, str, str, str, str, str, str, str]]: sql = ( "SELECT event_stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" + " se.state_key, redacts, relates_to_id, membership, rejections.reason IS NOT NULL" " FROM events AS e" " INNER JOIN ex_outlier_stream AS out USING (event_id)" " LEFT JOIN redactions USING (event_id)" - " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN state_events AS se USING (event_id)" " LEFT JOIN event_relations USING (event_id)" " LEFT JOIN room_memberships USING (event_id)" " LEFT JOIN rejections USING (event_id)" @@ -1507,10 +1507,10 @@ class EventsWorkerStore(SQLBaseStore): ) -> Tuple[List[Tuple[int, Tuple[str, str, str, str, str, str]]], int, bool]: sql = ( "SELECT -e.stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts, relates_to_id" + " se.state_key, redacts, relates_to_id" " FROM events AS e" " LEFT JOIN redactions USING (event_id)" - " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN state_events AS se USING (event_id)" " LEFT JOIN event_relations USING (event_id)" " WHERE ? > stream_ordering AND stream_ordering >= ?" " AND instance_name = ?" @@ -1537,11 +1537,11 @@ class EventsWorkerStore(SQLBaseStore): sql = ( "SELECT -event_stream_ordering, e.event_id, e.room_id, e.type," - " state_key, redacts, relates_to_id" + " se.state_key, redacts, relates_to_id" " FROM events AS e" " INNER JOIN ex_outlier_stream AS out USING (event_id)" " LEFT JOIN redactions USING (event_id)" - " LEFT JOIN state_events USING (event_id)" + " LEFT JOIN state_events AS se USING (event_id)" " LEFT JOIN event_relations USING (event_id)" " WHERE ? > event_stream_ordering" " AND event_stream_ordering >= ?" diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 3eb30944bf..91b0576b85 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -118,7 +118,7 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): logger.info("[purge] looking for events to delete") - should_delete_expr = "state_key IS NULL" + should_delete_expr = "state_events.state_key IS NULL" should_delete_params: Tuple[Any, ...] = () if not delete_local_events: should_delete_expr += " AND event_id NOT LIKE ?" diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 033a9831d6..6b2a8d06a6 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -476,7 +476,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): INNER JOIN events AS e USING (room_id, event_id) WHERE c.type = 'm.room.member' - AND state_key = ? + AND c.state_key = ? AND c.membership = ? """ else: @@ -487,7 +487,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): INNER JOIN events AS e USING (room_id, event_id) WHERE c.type = 'm.room.member' - AND state_key = ? + AND c.state_key = ? AND m.membership = ? """ diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 3a00ed6835..50d08094d5 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 65 # remember to update the list below when updating +SCHEMA_VERSION = 66 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -46,6 +46,10 @@ Changes in SCHEMA_VERSION = 65: - MSC2716: Remove unique event_id constraint from insertion_event_edges because an insertion event can have multiple edges. - Remove unused tables `user_stats_historical` and `room_stats_historical`. + +Changes in SCHEMA_VERSION = 66: + - Queries on state_key columns are now disambiguated (ie, the codebase can handle + the `events` table having a `state_key` column). """ -- cgit 1.5.1 From 8b4b153c9e86c04c7db8c74fde4b6a04becbc461 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 6 Dec 2021 17:59:50 +0100 Subject: Add admin API to get some information about federation status (#11407) --- changelog.d/11407.feature | 1 + docs/SUMMARY.md | 1 + docs/usage/administration/admin_api/federation.md | 114 ++++++ synapse/rest/admin/__init__.py | 6 + synapse/rest/admin/federation.py | 135 +++++++ synapse/storage/databases/main/transactions.py | 70 ++++ tests/rest/admin/test_federation.py | 456 ++++++++++++++++++++++ 7 files changed, 783 insertions(+) create mode 100644 changelog.d/11407.feature create mode 100644 docs/usage/administration/admin_api/federation.md create mode 100644 synapse/rest/admin/federation.py create mode 100644 tests/rest/admin/test_federation.py (limited to 'synapse/storage/databases') diff --git a/changelog.d/11407.feature b/changelog.d/11407.feature new file mode 100644 index 0000000000..1d21bde98f --- /dev/null +++ b/changelog.d/11407.feature @@ -0,0 +1 @@ +Add admin API to get some information about federation status with remote servers. \ No newline at end of file diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 41c8f0fbc9..b05af6d690 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -65,6 +65,7 @@ - [Statistics](admin_api/statistics.md) - [Users](admin_api/user_admin_api.md) - [Server Version](admin_api/version_api.md) + - [Federation](usage/administration/admin_api/federation.md) - [Manhole](manhole.md) - [Monitoring](metrics-howto.md) - [Understanding Synapse Through Grafana Graphs](usage/administration/understanding_synapse_through_grafana_graphs.md) diff --git a/docs/usage/administration/admin_api/federation.md b/docs/usage/administration/admin_api/federation.md new file mode 100644 index 0000000000..8f9535f57b --- /dev/null +++ b/docs/usage/administration/admin_api/federation.md @@ -0,0 +1,114 @@ +# Federation API + +This API allows a server administrator to manage Synapse's federation with other homeservers. + +Note: This API is new, experimental and "subject to change". + +## List of destinations + +This API gets the current destination retry timing info for all remote servers. + +The list contains all the servers with which the server federates, +regardless of whether an error occurred or not. +If an error occurs, it may take up to 20 minutes for the error to be displayed here, +as a complete retry must have failed. + +The API is: + +A standard request with no filtering: + +``` +GET /_synapse/admin/v1/federation/destinations +``` + +A response body like the following is returned: + +```json +{ + "destinations":[ + { + "destination": "matrix.org", + "retry_last_ts": 1557332397936, + "retry_interval": 3000000, + "failure_ts": 1557329397936, + "last_successful_stream_ordering": null + } + ], + "total": 1 +} +``` + +To paginate, check for `next_token` and if present, call the endpoint again +with `from` set to the value of `next_token`. This will return a new page. + +If the endpoint does not return a `next_token` then there are no more destinations +to paginate through. + +**Parameters** + +The following query parameters are available: + +- `from` - Offset in the returned list. Defaults to `0`. +- `limit` - Maximum amount of destinations to return. Defaults to `100`. +- `order_by` - The method in which to sort the returned list of destinations. + Valid values are: + - `destination` - Destinations are ordered alphabetically by remote server name. + This is the default. + - `retry_last_ts` - Destinations are ordered by time of last retry attempt in ms. + - `retry_interval` - Destinations are ordered by how long until next retry in ms. + - `failure_ts` - Destinations are ordered by when the server started failing in ms. + - `last_successful_stream_ordering` - Destinations are ordered by the stream ordering + of the most recent successfully-sent PDU. +- `dir` - Direction of room order. Either `f` for forwards or `b` for backwards. Setting + this value to `b` will reverse the above sort order. Defaults to `f`. + +*Caution:* The database only has an index on the column `destination`. +This means that if a different sort order is used, +this can cause a large load on the database, especially for large environments. + +**Response** + +The following fields are returned in the JSON response body: + +- `destinations` - An array of objects, each containing information about a destination. + Destination objects contain the following fields: + - `destination` - string - Name of the remote server to federate. + - `retry_last_ts` - integer - The last time Synapse tried and failed to reach the + remote server, in ms. This is `0` if the last attempt to communicate with the + remote server was successful. + - `retry_interval` - integer - How long since the last time Synapse tried to reach + the remote server before trying again, in ms. This is `0` if no further retrying occuring. + - `failure_ts` - nullable integer - The first time Synapse tried and failed to reach the + remote server, in ms. This is `null` if communication with the remote server has never failed. + - `last_successful_stream_ordering` - nullable integer - The stream ordering of the most + recent successfully-sent [PDU](understanding_synapse_through_grafana_graphs.md#federation) + to this destination, or `null` if this information has not been tracked yet. +- `next_token`: string representing a positive integer - Indication for pagination. See above. +- `total` - integer - Total number of destinations. + +# Destination Details API + +This API gets the retry timing info for a specific remote server. + +The API is: + +``` +GET /_synapse/admin/v1/federation/destinations/ +``` + +A response body like the following is returned: + +```json +{ + "destination": "matrix.org", + "retry_last_ts": 1557332397936, + "retry_interval": 3000000, + "failure_ts": 1557329397936, + "last_successful_stream_ordering": null +} +``` + +**Response** + +The response fields are the same like in the `destinations` array in +[List of destinations](#list-of-destinations) response. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index c51a029bf3..c499afd4be 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -40,6 +40,10 @@ from synapse.rest.admin.event_reports import ( EventReportDetailRestServlet, EventReportsRestServlet, ) +from synapse.rest.admin.federation import ( + DestinationsRestServlet, + ListDestinationsRestServlet, +) from synapse.rest.admin.groups import DeleteGroupAdminRestServlet from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo from synapse.rest.admin.registration_tokens import ( @@ -261,6 +265,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: ListRegistrationTokensRestServlet(hs).register(http_server) NewRegistrationTokenRestServlet(hs).register(http_server) RegistrationTokenRestServlet(hs).register(http_server) + DestinationsRestServlet(hs).register(http_server) + ListDestinationsRestServlet(hs).register(http_server) # Some servlets only get registered for the main process. if hs.config.worker.worker_app is None: diff --git a/synapse/rest/admin/federation.py b/synapse/rest/admin/federation.py new file mode 100644 index 0000000000..744687be35 --- /dev/null +++ b/synapse/rest/admin/federation.py @@ -0,0 +1,135 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from http import HTTPStatus +from typing import TYPE_CHECKING, Tuple + +from synapse.api.errors import Codes, NotFoundError, SynapseError +from synapse.http.servlet import RestServlet, parse_integer, parse_string +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin +from synapse.storage.databases.main.transactions import DestinationSortOrder +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class ListDestinationsRestServlet(RestServlet): + """Get request to list all destinations. + This needs user to have administrator access in Synapse. + + GET /_synapse/admin/v1/federation/destinations?from=0&limit=10 + + returns: + 200 OK with list of destinations if success otherwise an error. + + The parameters `from` and `limit` are required only for pagination. + By default, a `limit` of 100 is used. + The parameter `destination` can be used to filter by destination. + The parameter `order_by` can be used to order the result. + """ + + PATTERNS = admin_patterns("/federation/destinations$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + + if start < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if limit < 0: + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + destination = parse_string(request, "destination") + + order_by = parse_string( + request, + "order_by", + default=DestinationSortOrder.DESTINATION.value, + allowed_values=[dest.value for dest in DestinationSortOrder], + ) + + direction = parse_string(request, "dir", default="f", allowed_values=("f", "b")) + + destinations, total = await self._store.get_destinations_paginate( + start, limit, destination, order_by, direction + ) + response = {"destinations": destinations, "total": total} + if (start + limit) < total: + response["next_token"] = str(start + len(destinations)) + + return HTTPStatus.OK, response + + +class DestinationsRestServlet(RestServlet): + """Get details of a destination. + This needs user to have administrator access in Synapse. + + GET /_synapse/admin/v1/federation/destinations/ + + returns: + 200 OK with details of a destination if success otherwise an error. + """ + + PATTERNS = admin_patterns("/federation/destinations/(?P[^/]+)$") + + def __init__(self, hs: "HomeServer"): + self._auth = hs.get_auth() + self._store = hs.get_datastore() + + async def on_GET( + self, request: SynapseRequest, destination: str + ) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self._auth, request) + + destination_retry_timings = await self._store.get_destination_retry_timings( + destination + ) + + if not destination_retry_timings: + raise NotFoundError("Unknown destination") + + last_successful_stream_ordering = ( + await self._store.get_destination_last_successful_stream_ordering( + destination + ) + ) + + response = { + "destination": destination, + "failure_ts": destination_retry_timings.failure_ts, + "retry_last_ts": destination_retry_timings.retry_last_ts, + "retry_interval": destination_retry_timings.retry_interval, + "last_successful_stream_ordering": last_successful_stream_ordering, + } + + return HTTPStatus.OK, response diff --git a/synapse/storage/databases/main/transactions.py b/synapse/storage/databases/main/transactions.py index d7dc1f73ac..1622822552 100644 --- a/synapse/storage/databases/main/transactions.py +++ b/synapse/storage/databases/main/transactions.py @@ -14,6 +14,7 @@ import logging from collections import namedtuple +from enum import Enum from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple import attr @@ -44,6 +45,16 @@ _UpdateTransactionRow = namedtuple( ) +class DestinationSortOrder(Enum): + """Enum to define the sorting method used when returning destinations.""" + + DESTINATION = "destination" + RETRY_LAST_TS = "retry_last_ts" + RETTRY_INTERVAL = "retry_interval" + FAILURE_TS = "failure_ts" + LAST_SUCCESSFUL_STREAM_ORDERING = "last_successful_stream_ordering" + + @attr.s(slots=True, frozen=True, auto_attribs=True) class DestinationRetryTimings: """The current destination retry timing info for a remote server.""" @@ -480,3 +491,62 @@ class TransactionWorkerStore(CacheInvalidationWorkerStore): destinations = [row[0] for row in txn] return destinations + + async def get_destinations_paginate( + self, + start: int, + limit: int, + destination: Optional[str] = None, + order_by: str = DestinationSortOrder.DESTINATION.value, + direction: str = "f", + ) -> Tuple[List[JsonDict], int]: + """Function to retrieve a paginated list of destinations. + This will return a json list of destinations and the + total number of destinations matching the filter criteria. + + Args: + start: start number to begin the query from + limit: number of rows to retrieve + destination: search string in destination + order_by: the sort order of the returned list + direction: sort ascending or descending + Returns: + A tuple of a list of mappings from destination to information + and a count of total destinations. + """ + + def get_destinations_paginate_txn( + txn: LoggingTransaction, + ) -> Tuple[List[JsonDict], int]: + order_by_column = DestinationSortOrder(order_by).value + + if direction == "b": + order = "DESC" + else: + order = "ASC" + + args = [] + where_statement = "" + if destination: + args.extend(["%" + destination.lower() + "%"]) + where_statement = "WHERE LOWER(destination) LIKE ?" + + sql_base = f"FROM destinations {where_statement} " + sql = f"SELECT COUNT(*) as total_destinations {sql_base}" + txn.execute(sql, args) + count = txn.fetchone()[0] + + sql = f""" + SELECT destination, retry_last_ts, retry_interval, failure_ts, + last_successful_stream_ordering + {sql_base} + ORDER BY {order_by_column} {order}, destination ASC + LIMIT ? OFFSET ? + """ + txn.execute(sql, args + [limit, start]) + destinations = self.db_pool.cursor_to_dict(txn) + return destinations, count + + return await self.db_pool.runInteraction( + "get_destinations_paginate_txn", get_destinations_paginate_txn + ) diff --git a/tests/rest/admin/test_federation.py b/tests/rest/admin/test_federation.py new file mode 100644 index 0000000000..5188499ef2 --- /dev/null +++ b/tests/rest/admin/test_federation.py @@ -0,0 +1,456 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from http import HTTPStatus +from typing import List, Optional + +from parameterized import parameterized + +import synapse.rest.admin +from synapse.api.errors import Codes +from synapse.rest.client import login +from synapse.server import HomeServer +from synapse.types import JsonDict + +from tests import unittest + + +class FederationTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs: HomeServer): + self.store = hs.get_datastore() + self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.url = "/_synapse/admin/v1/federation/destinations" + + @parameterized.expand( + [ + ("/_synapse/admin/v1/federation/destinations",), + ("/_synapse/admin/v1/federation/destinations/dummy",), + ] + ) + def test_requester_is_no_admin(self, url: str): + """ + If the user is not a server admin, an error 403 is returned. + """ + + self.register_user("user", "pass", admin=False) + other_user_tok = self.login("user", "pass") + + channel = self.make_request( + "GET", + url, + content={}, + access_token=other_user_tok, + ) + + self.assertEqual(HTTPStatus.FORBIDDEN, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_invalid_parameter(self): + """ + If parameters are invalid, an error is returned. + """ + + # negative limit + channel = self.make_request( + "GET", + self.url + "?limit=-5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # negative from + channel = self.make_request( + "GET", + self.url + "?from=-5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # unkown order_by + channel = self.make_request( + "GET", + self.url + "?order_by=bar", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + + # invalid search order + channel = self.make_request( + "GET", + self.url + "?dir=bar", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.BAD_REQUEST, channel.code, msg=channel.json_body) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + + # invalid destination + channel = self.make_request( + "GET", + self.url + "/dummy", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.NOT_FOUND, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + + def test_limit(self): + """ + Testing list of destinations with limit + """ + + number_destinations = 20 + self._create_destinations(number_destinations) + + channel = self.make_request( + "GET", + self.url + "?limit=5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), 5) + self.assertEqual(channel.json_body["next_token"], "5") + self._check_fields(channel.json_body["destinations"]) + + def test_from(self): + """ + Testing list of destinations with a defined starting point (from) + """ + + number_destinations = 20 + self._create_destinations(number_destinations) + + channel = self.make_request( + "GET", + self.url + "?from=5", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), 15) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["destinations"]) + + def test_limit_and_from(self): + """ + Testing list of destinations with a defined starting point and limit + """ + + number_destinations = 20 + self._create_destinations(number_destinations) + + channel = self.make_request( + "GET", + self.url + "?from=5&limit=10", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(channel.json_body["next_token"], "15") + self.assertEqual(len(channel.json_body["destinations"]), 10) + self._check_fields(channel.json_body["destinations"]) + + def test_next_token(self): + """ + Testing that `next_token` appears at the right place + """ + + number_destinations = 20 + self._create_destinations(number_destinations) + + # `next_token` does not appear + # Number of results is the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=20", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), number_destinations) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does not appear + # Number of max results is larger than the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=21", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), number_destinations) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does appear + # Number of max results is smaller than the number of entries + channel = self.make_request( + "GET", + self.url + "?limit=19", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), 19) + self.assertEqual(channel.json_body["next_token"], "19") + + # Check + # Set `from` to value of `next_token` for request remaining entries + # `next_token` does not appear + channel = self.make_request( + "GET", + self.url + "?from=19", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], number_destinations) + self.assertEqual(len(channel.json_body["destinations"]), 1) + self.assertNotIn("next_token", channel.json_body) + + def test_list_all_destinations(self): + """ + List all destinations. + """ + number_destinations = 5 + self._create_destinations(number_destinations) + + channel = self.make_request( + "GET", + self.url, + {}, + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(number_destinations, len(channel.json_body["destinations"])) + self.assertEqual(number_destinations, channel.json_body["total"]) + + # Check that all fields are available + self._check_fields(channel.json_body["destinations"]) + + def test_order_by(self): + """ + Testing order list with parameter `order_by` + """ + + def _order_test( + expected_destination_list: List[str], + order_by: Optional[str], + dir: Optional[str] = None, + ): + """Request the list of destinations in a certain order. + Assert that order is what we expect + + Args: + expected_destination_list: The list of user_id in the order + we expect to get back from the server + order_by: The type of ordering to give the server + dir: The direction of ordering to give the server + """ + + url = f"{self.url}?" + if order_by is not None: + url += f"order_by={order_by}&" + if dir is not None and dir in ("b", "f"): + url += f"dir={dir}" + channel = self.make_request( + "GET", + url, + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], len(expected_destination_list)) + + returned_order = [ + row["destination"] for row in channel.json_body["destinations"] + ] + self.assertEqual(expected_destination_list, returned_order) + self._check_fields(channel.json_body["destinations"]) + + # create destinations + dest = [ + ("sub-a.example.com", 100, 300, 200, 300), + ("sub-b.example.com", 200, 200, 100, 100), + ("sub-c.example.com", 300, 100, 300, 200), + ] + for ( + destination, + failure_ts, + retry_last_ts, + retry_interval, + last_successful_stream_ordering, + ) in dest: + self.get_success( + self.store.set_destination_retry_timings( + destination, failure_ts, retry_last_ts, retry_interval + ) + ) + self.get_success( + self.store.set_destination_last_successful_stream_ordering( + destination, last_successful_stream_ordering + ) + ) + + # order by default (destination) + _order_test([dest[0][0], dest[1][0], dest[2][0]], None) + _order_test([dest[0][0], dest[1][0], dest[2][0]], None, "f") + _order_test([dest[2][0], dest[1][0], dest[0][0]], None, "b") + + # order by destination + _order_test([dest[0][0], dest[1][0], dest[2][0]], "destination") + _order_test([dest[0][0], dest[1][0], dest[2][0]], "destination", "f") + _order_test([dest[2][0], dest[1][0], dest[0][0]], "destination", "b") + + # order by failure_ts + _order_test([dest[0][0], dest[1][0], dest[2][0]], "failure_ts") + _order_test([dest[0][0], dest[1][0], dest[2][0]], "failure_ts", "f") + _order_test([dest[2][0], dest[1][0], dest[0][0]], "failure_ts", "b") + + # order by retry_last_ts + _order_test([dest[2][0], dest[1][0], dest[0][0]], "retry_last_ts") + _order_test([dest[2][0], dest[1][0], dest[0][0]], "retry_last_ts", "f") + _order_test([dest[0][0], dest[1][0], dest[2][0]], "retry_last_ts", "b") + + # order by retry_interval + _order_test([dest[1][0], dest[0][0], dest[2][0]], "retry_interval") + _order_test([dest[1][0], dest[0][0], dest[2][0]], "retry_interval", "f") + _order_test([dest[2][0], dest[0][0], dest[1][0]], "retry_interval", "b") + + # order by last_successful_stream_ordering + _order_test( + [dest[1][0], dest[2][0], dest[0][0]], "last_successful_stream_ordering" + ) + _order_test( + [dest[1][0], dest[2][0], dest[0][0]], "last_successful_stream_ordering", "f" + ) + _order_test( + [dest[0][0], dest[2][0], dest[1][0]], "last_successful_stream_ordering", "b" + ) + + def test_search_term(self): + """Test that searching for a destination works correctly""" + + def _search_test( + expected_destination: Optional[str], + search_term: str, + ): + """Search for a destination and check that the returned destinationis a match + + Args: + expected_destination: The room_id expected to be returned by the API. + Set to None to expect zero results for the search + search_term: The term to search for room names with + """ + url = f"{self.url}?destination={search_term}" + channel = self.make_request( + "GET", + url.encode("ascii"), + access_token=self.admin_user_tok, + ) + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + + # Check that destinations were returned + self.assertTrue("destinations" in channel.json_body) + self._check_fields(channel.json_body["destinations"]) + destinations = channel.json_body["destinations"] + + # Check that the expected number of destinations were returned + expected_destination_count = 1 if expected_destination else 0 + self.assertEqual(len(destinations), expected_destination_count) + self.assertEqual(channel.json_body["total"], expected_destination_count) + + if expected_destination: + # Check that the first returned destination is correct + self.assertEqual(expected_destination, destinations[0]["destination"]) + + number_destinations = 3 + self._create_destinations(number_destinations) + + # Test searching + _search_test("sub0.example.com", "0") + _search_test("sub0.example.com", "sub0") + + _search_test("sub1.example.com", "1") + _search_test("sub1.example.com", "1.") + + # Test case insensitive + _search_test("sub0.example.com", "SUB0") + + _search_test(None, "foo") + _search_test(None, "bar") + + def test_get_single_destination(self): + """ + Get one specific destinations. + """ + self._create_destinations(5) + + channel = self.make_request( + "GET", + self.url + "/sub0.example.com", + access_token=self.admin_user_tok, + ) + + self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) + self.assertEqual("sub0.example.com", channel.json_body["destination"]) + + # Check that all fields are available + # convert channel.json_body into a List + self._check_fields([channel.json_body]) + + def _create_destinations(self, number_destinations: int): + """Create a number of destinations + + Args: + number_destinations: Number of destinations to be created + """ + for i in range(0, number_destinations): + dest = f"sub{i}.example.com" + self.get_success(self.store.set_destination_retry_timings(dest, 50, 50, 50)) + self.get_success( + self.store.set_destination_last_successful_stream_ordering(dest, 100) + ) + + def _check_fields(self, content: List[JsonDict]): + """Checks that the expected destination attributes are present in content + + Args: + content: List that is checked for content + """ + for c in content: + self.assertIn("destination", c) + self.assertIn("retry_last_ts", c) + self.assertIn("retry_interval", c) + self.assertIn("failure_ts", c) + self.assertIn("last_successful_stream_ordering", c) -- cgit 1.5.1 From a15a893df8428395df7cb95b729431575001c38a Mon Sep 17 00:00:00 2001 From: Quentin Gliech Date: Mon, 6 Dec 2021 18:43:06 +0100 Subject: Save the OIDC session ID (sid) with the device on login (#11482) As a step towards allowing back-channel logout for OIDC. --- changelog.d/11482.misc | 1 + synapse/handlers/auth.py | 34 +++++- synapse/handlers/device.py | 8 ++ synapse/handlers/oidc.py | 58 +++++---- synapse/handlers/register.py | 15 ++- synapse/handlers/sso.py | 4 + synapse/module_api/__init__.py | 2 + synapse/replication/http/login.py | 8 ++ synapse/rest/client/login.py | 7 +- synapse/storage/databases/main/devices.py | 50 +++++++- .../delta/65/11_devices_auth_provider_session.sql | 27 +++++ tests/handlers/test_auth.py | 6 +- tests/handlers/test_cas.py | 40 +++++- tests/handlers/test_oidc.py | 135 ++++++++++++++++++--- tests/handlers/test_saml.py | 40 +++++- 15 files changed, 370 insertions(+), 65 deletions(-) create mode 100644 changelog.d/11482.misc create mode 100644 synapse/storage/schema/main/delta/65/11_devices_auth_provider_session.sql (limited to 'synapse/storage/databases') diff --git a/changelog.d/11482.misc b/changelog.d/11482.misc new file mode 100644 index 0000000000..e78662988f --- /dev/null +++ b/changelog.d/11482.misc @@ -0,0 +1 @@ +Save the OpenID Connect session ID on login. diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 4d9c4e5834..61607cf2ba 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -39,6 +39,7 @@ import attr import bcrypt import pymacaroons import unpaddedbase64 +from pymacaroons.exceptions import MacaroonVerificationFailedException from twisted.web.server import Request @@ -182,8 +183,11 @@ class LoginTokenAttributes: user_id = attr.ib(type=str) - # the SSO Identity Provider that the user authenticated with, to get this token auth_provider_id = attr.ib(type=str) + """The SSO Identity Provider that the user authenticated with, to get this token.""" + + auth_provider_session_id = attr.ib(type=Optional[str]) + """The session ID advertised by the SSO Identity Provider.""" class AuthHandler: @@ -1650,6 +1654,7 @@ class AuthHandler: client_redirect_url: str, extra_attributes: Optional[JsonDict] = None, new_user: bool = False, + auth_provider_session_id: Optional[str] = None, ) -> None: """Having figured out a mxid for this user, complete the HTTP request @@ -1665,6 +1670,7 @@ class AuthHandler: during successful login. Must be JSON serializable. new_user: True if we should use wording appropriate to a user who has just registered. + auth_provider_session_id: The session ID from the SSO IdP received during login. """ # If the account has been deactivated, do not proceed with the login # flow. @@ -1685,6 +1691,7 @@ class AuthHandler: extra_attributes, new_user=new_user, user_profile_data=profile, + auth_provider_session_id=auth_provider_session_id, ) def _complete_sso_login( @@ -1696,6 +1703,7 @@ class AuthHandler: extra_attributes: Optional[JsonDict] = None, new_user: bool = False, user_profile_data: Optional[ProfileInfo] = None, + auth_provider_session_id: Optional[str] = None, ) -> None: """ The synchronous portion of complete_sso_login. @@ -1717,7 +1725,9 @@ class AuthHandler: # Create a login token login_token = self.macaroon_gen.generate_short_term_login_token( - registered_user_id, auth_provider_id=auth_provider_id + registered_user_id, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) # Append the login token to the original redirect URL (i.e. with its query @@ -1822,6 +1832,7 @@ class MacaroonGenerator: self, user_id: str, auth_provider_id: str, + auth_provider_session_id: Optional[str] = None, duration_in_ms: int = (2 * 60 * 1000), ) -> str: macaroon = self._generate_base_macaroon(user_id) @@ -1830,6 +1841,10 @@ class MacaroonGenerator: expiry = now + duration_in_ms macaroon.add_first_party_caveat("time < %d" % (expiry,)) macaroon.add_first_party_caveat("auth_provider_id = %s" % (auth_provider_id,)) + if auth_provider_session_id is not None: + macaroon.add_first_party_caveat( + "auth_provider_session_id = %s" % (auth_provider_session_id,) + ) return macaroon.serialize() def verify_short_term_login_token(self, token: str) -> LoginTokenAttributes: @@ -1851,15 +1866,28 @@ class MacaroonGenerator: user_id = get_value_from_macaroon(macaroon, "user_id") auth_provider_id = get_value_from_macaroon(macaroon, "auth_provider_id") + auth_provider_session_id: Optional[str] = None + try: + auth_provider_session_id = get_value_from_macaroon( + macaroon, "auth_provider_session_id" + ) + except MacaroonVerificationFailedException: + pass + v = pymacaroons.Verifier() v.satisfy_exact("gen = 1") v.satisfy_exact("type = login") v.satisfy_general(lambda c: c.startswith("user_id = ")) v.satisfy_general(lambda c: c.startswith("auth_provider_id = ")) + v.satisfy_general(lambda c: c.startswith("auth_provider_session_id = ")) satisfy_expiry(v, self.hs.get_clock().time_msec) v.verify(macaroon, self.hs.config.key.macaroon_secret_key) - return LoginTokenAttributes(user_id=user_id, auth_provider_id=auth_provider_id) + return LoginTokenAttributes( + user_id=user_id, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, + ) def generate_delete_pusher_token(self, user_id: str) -> str: macaroon = self._generate_base_macaroon(user_id) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 68b446eb66..82ee11e921 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -301,6 +301,8 @@ class DeviceHandler(DeviceWorkerHandler): user_id: str, device_id: Optional[str], initial_device_display_name: Optional[str] = None, + auth_provider_id: Optional[str] = None, + auth_provider_session_id: Optional[str] = None, ) -> str: """ If the given device has not been registered, register it with the @@ -312,6 +314,8 @@ class DeviceHandler(DeviceWorkerHandler): user_id: @user:id device_id: device id supplied by client initial_device_display_name: device display name from client + auth_provider_id: The SSO IdP the user used, if any. + auth_provider_session_id: The session ID (sid) got from the SSO IdP. Returns: device id (generated if none was supplied) """ @@ -323,6 +327,8 @@ class DeviceHandler(DeviceWorkerHandler): user_id=user_id, device_id=device_id, initial_device_display_name=initial_device_display_name, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) if new_device: await self.notify_device_update(user_id, [device_id]) @@ -337,6 +343,8 @@ class DeviceHandler(DeviceWorkerHandler): user_id=user_id, device_id=new_device_id, initial_device_display_name=initial_device_display_name, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) if new_device: await self.notify_device_update(user_id, [new_device_id]) diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py index 3665d91513..deb3539751 100644 --- a/synapse/handlers/oidc.py +++ b/synapse/handlers/oidc.py @@ -23,7 +23,7 @@ from authlib.common.security import generate_token from authlib.jose import JsonWebToken, jwt from authlib.oauth2.auth import ClientAuth from authlib.oauth2.rfc6749.parameters import prepare_grant_uri -from authlib.oidc.core import CodeIDToken, ImplicitIDToken, UserInfo +from authlib.oidc.core import CodeIDToken, UserInfo from authlib.oidc.discovery import OpenIDProviderMetadata, get_well_known_url from jinja2 import Environment, Template from pymacaroons.exceptions import ( @@ -117,7 +117,8 @@ class OidcHandler: for idp_id, p in self._providers.items(): try: await p.load_metadata() - await p.load_jwks() + if not p._uses_userinfo: + await p.load_jwks() except Exception as e: raise Exception( "Error while initialising OIDC provider %r" % (idp_id,) @@ -498,10 +499,6 @@ class OidcProvider: return await self._jwks.get() async def _load_jwks(self) -> JWKS: - if self._uses_userinfo: - # We're not using jwt signing, return an empty jwk set - return {"keys": []} - metadata = await self.load_metadata() # Load the JWKS using the `jwks_uri` metadata. @@ -663,7 +660,7 @@ class OidcProvider: return UserInfo(resp) - async def _parse_id_token(self, token: Token, nonce: str) -> UserInfo: + async def _parse_id_token(self, token: Token, nonce: str) -> CodeIDToken: """Return an instance of UserInfo from token's ``id_token``. Args: @@ -673,7 +670,7 @@ class OidcProvider: request. This value should match the one inside the token. Returns: - An object representing the user. + The decoded claims in the ID token. """ metadata = await self.load_metadata() claims_params = { @@ -684,9 +681,6 @@ class OidcProvider: # If we got an `access_token`, there should be an `at_hash` claim # in the `id_token` that we can check against. claims_params["access_token"] = token["access_token"] - claims_cls = CodeIDToken - else: - claims_cls = ImplicitIDToken alg_values = metadata.get("id_token_signing_alg_values_supported", ["RS256"]) jwt = JsonWebToken(alg_values) @@ -703,7 +697,7 @@ class OidcProvider: claims = jwt.decode( id_token, key=jwk_set, - claims_cls=claims_cls, + claims_cls=CodeIDToken, claims_options=claim_options, claims_params=claims_params, ) @@ -713,7 +707,7 @@ class OidcProvider: claims = jwt.decode( id_token, key=jwk_set, - claims_cls=claims_cls, + claims_cls=CodeIDToken, claims_options=claim_options, claims_params=claims_params, ) @@ -721,7 +715,8 @@ class OidcProvider: logger.debug("Decoded id_token JWT %r; validating", claims) claims.validate(leeway=120) # allows 2 min of clock skew - return UserInfo(claims) + + return claims async def handle_redirect_request( self, @@ -837,8 +832,22 @@ class OidcProvider: logger.debug("Successfully obtained OAuth2 token data: %r", token) - # Now that we have a token, get the userinfo, either by decoding the - # `id_token` or by fetching the `userinfo_endpoint`. + # If there is an id_token, it should be validated, regardless of the + # userinfo endpoint is used or not. + if token.get("id_token") is not None: + try: + id_token = await self._parse_id_token(token, nonce=session_data.nonce) + sid = id_token.get("sid") + except Exception as e: + logger.exception("Invalid id_token") + self._sso_handler.render_error(request, "invalid_token", str(e)) + return + else: + id_token = None + sid = None + + # Now that we have a token, get the userinfo either from the `id_token` + # claims or by fetching the `userinfo_endpoint`. if self._uses_userinfo: try: userinfo = await self._fetch_userinfo(token) @@ -846,13 +855,14 @@ class OidcProvider: logger.exception("Could not fetch userinfo") self._sso_handler.render_error(request, "fetch_error", str(e)) return + elif id_token is not None: + userinfo = UserInfo(id_token) else: - try: - userinfo = await self._parse_id_token(token, nonce=session_data.nonce) - except Exception as e: - logger.exception("Invalid id_token") - self._sso_handler.render_error(request, "invalid_token", str(e)) - return + logger.error("Missing id_token in token response") + self._sso_handler.render_error( + request, "invalid_token", "Missing id_token in token response" + ) + return # first check if we're doing a UIA if session_data.ui_auth_session_id: @@ -884,7 +894,7 @@ class OidcProvider: # Call the mapper to register/login the user try: await self._complete_oidc_login( - userinfo, token, request, session_data.client_redirect_url + userinfo, token, request, session_data.client_redirect_url, sid ) except MappingException as e: logger.exception("Could not map user") @@ -896,6 +906,7 @@ class OidcProvider: token: Token, request: SynapseRequest, client_redirect_url: str, + sid: Optional[str], ) -> None: """Given a UserInfo response, complete the login flow @@ -1008,6 +1019,7 @@ class OidcProvider: oidc_response_to_user_attributes, grandfather_existing_users, extra_attributes, + auth_provider_session_id=sid, ) def _remote_id_from_userinfo(self, userinfo: UserInfo) -> str: diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index b14ddd8267..f08a516a75 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -746,6 +746,7 @@ class RegistrationHandler: is_appservice_ghost: bool = False, auth_provider_id: Optional[str] = None, should_issue_refresh_token: bool = False, + auth_provider_session_id: Optional[str] = None, ) -> Tuple[str, str, Optional[int], Optional[str]]: """Register a device for a user and generate an access token. @@ -756,9 +757,9 @@ class RegistrationHandler: device_id: The device ID to check, or None to generate a new one. initial_display_name: An optional display name for the device. is_guest: Whether this is a guest account - auth_provider_id: The SSO IdP the user used, if any (just used for the - prometheus metrics). + auth_provider_id: The SSO IdP the user used, if any. should_issue_refresh_token: Whether it should also issue a refresh token + auth_provider_session_id: The session ID received during login from the SSO IdP. Returns: Tuple of device ID, access token, access token expiration time and refresh token """ @@ -769,6 +770,8 @@ class RegistrationHandler: is_guest=is_guest, is_appservice_ghost=is_appservice_ghost, should_issue_refresh_token=should_issue_refresh_token, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) login_counter.labels( @@ -791,6 +794,8 @@ class RegistrationHandler: is_guest: bool = False, is_appservice_ghost: bool = False, should_issue_refresh_token: bool = False, + auth_provider_id: Optional[str] = None, + auth_provider_session_id: Optional[str] = None, ) -> LoginDict: """Helper for register_device @@ -822,7 +827,11 @@ class RegistrationHandler: refresh_token_id = None registered_device_id = await self.device_handler.check_device_registered( - user_id, device_id, initial_display_name + user_id, + device_id, + initial_display_name, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) if is_guest: assert access_token_expiry is None diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 49fde01cf0..65c27bc64a 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -365,6 +365,7 @@ class SsoHandler: sso_to_matrix_id_mapper: Callable[[int], Awaitable[UserAttributes]], grandfather_existing_users: Callable[[], Awaitable[Optional[str]]], extra_login_attributes: Optional[JsonDict] = None, + auth_provider_session_id: Optional[str] = None, ) -> None: """ Given an SSO ID, retrieve the user ID for it and possibly register the user. @@ -415,6 +416,8 @@ class SsoHandler: extra_login_attributes: An optional dictionary of extra attributes to be provided to the client in the login response. + auth_provider_session_id: An optional session ID from the IdP. + Raises: MappingException if there was a problem mapping the response to a user. RedirectException: if the mapping provider needs to redirect the user @@ -490,6 +493,7 @@ class SsoHandler: client_redirect_url, extra_login_attributes, new_user=new_user, + auth_provider_session_id=auth_provider_session_id, ) async def _call_attribute_mapper( diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index a8154168be..6bfb4b8d1b 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -626,6 +626,7 @@ class ModuleApi: user_id: str, duration_in_ms: int = (2 * 60 * 1000), auth_provider_id: str = "", + auth_provider_session_id: Optional[str] = None, ) -> str: """Generate a login token suitable for m.login.token authentication @@ -643,6 +644,7 @@ class ModuleApi: return self._hs.get_macaroon_generator().generate_short_term_login_token( user_id, auth_provider_id, + auth_provider_session_id, duration_in_ms, ) diff --git a/synapse/replication/http/login.py b/synapse/replication/http/login.py index 0db419ea57..daacc34cea 100644 --- a/synapse/replication/http/login.py +++ b/synapse/replication/http/login.py @@ -46,6 +46,8 @@ class RegisterDeviceReplicationServlet(ReplicationEndpoint): is_guest, is_appservice_ghost, should_issue_refresh_token, + auth_provider_id, + auth_provider_session_id, ): """ Args: @@ -63,6 +65,8 @@ class RegisterDeviceReplicationServlet(ReplicationEndpoint): "is_guest": is_guest, "is_appservice_ghost": is_appservice_ghost, "should_issue_refresh_token": should_issue_refresh_token, + "auth_provider_id": auth_provider_id, + "auth_provider_session_id": auth_provider_session_id, } async def _handle_request(self, request, user_id): @@ -73,6 +77,8 @@ class RegisterDeviceReplicationServlet(ReplicationEndpoint): is_guest = content["is_guest"] is_appservice_ghost = content["is_appservice_ghost"] should_issue_refresh_token = content["should_issue_refresh_token"] + auth_provider_id = content["auth_provider_id"] + auth_provider_session_id = content["auth_provider_session_id"] res = await self.registration_handler.register_device_inner( user_id, @@ -81,6 +87,8 @@ class RegisterDeviceReplicationServlet(ReplicationEndpoint): is_guest, is_appservice_ghost=is_appservice_ghost, should_issue_refresh_token=should_issue_refresh_token, + auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, ) return 200, res diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index a66ee4fb3d..1b23fa18cf 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -303,6 +303,7 @@ class LoginRestServlet(RestServlet): ratelimit: bool = True, auth_provider_id: Optional[str] = None, should_issue_refresh_token: bool = False, + auth_provider_session_id: Optional[str] = None, ) -> LoginResponse: """Called when we've successfully authed the user and now need to actually login them in (e.g. create devices). This gets called on @@ -318,10 +319,10 @@ class LoginRestServlet(RestServlet): create_non_existent_users: Whether to create the user if they don't exist. Defaults to False. ratelimit: Whether to ratelimit the login request. - auth_provider_id: The SSO IdP the user used, if any (just used for the - prometheus metrics). + auth_provider_id: The SSO IdP the user used, if any. should_issue_refresh_token: True if this login should issue a refresh token alongside the access token. + auth_provider_session_id: The session ID got during login from the SSO IdP. Returns: result: Dictionary of account information after successful login. @@ -354,6 +355,7 @@ class LoginRestServlet(RestServlet): initial_display_name, auth_provider_id=auth_provider_id, should_issue_refresh_token=should_issue_refresh_token, + auth_provider_session_id=auth_provider_session_id, ) result = LoginResponse( @@ -399,6 +401,7 @@ class LoginRestServlet(RestServlet): self.auth_handler._sso_login_callback, auth_provider_id=res.auth_provider_id, should_issue_refresh_token=should_issue_refresh_token, + auth_provider_session_id=res.auth_provider_session_id, ) async def _do_jwt_login( diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 9ccc66e589..d5a4a661cd 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -139,6 +139,27 @@ class DeviceWorkerStore(SQLBaseStore): return {d["device_id"]: d for d in devices} + async def get_devices_by_auth_provider_session_id( + self, auth_provider_id: str, auth_provider_session_id: str + ) -> List[Dict[str, Any]]: + """Retrieve the list of devices associated with a SSO IdP session ID. + + Args: + auth_provider_id: The SSO IdP ID as defined in the server config + auth_provider_session_id: The session ID within the IdP + Returns: + A list of dicts containing the device_id and the user_id of each device + """ + return await self.db_pool.simple_select_list( + table="device_auth_providers", + keyvalues={ + "auth_provider_id": auth_provider_id, + "auth_provider_session_id": auth_provider_session_id, + }, + retcols=("user_id", "device_id"), + desc="get_devices_by_auth_provider_session_id", + ) + @trace async def get_device_updates_by_remote( self, destination: str, from_stream_id: int, limit: int @@ -1070,7 +1091,12 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ) async def store_device( - self, user_id: str, device_id: str, initial_device_display_name: Optional[str] + self, + user_id: str, + device_id: str, + initial_device_display_name: Optional[str], + auth_provider_id: Optional[str] = None, + auth_provider_session_id: Optional[str] = None, ) -> bool: """Ensure the given device is known; add it to the store if not @@ -1079,6 +1105,8 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): device_id: id of device initial_device_display_name: initial displayname of the device. Ignored if device exists. + auth_provider_id: The SSO IdP the user used, if any. + auth_provider_session_id: The session ID (sid) got from a OIDC login. Returns: Whether the device was inserted or an existing device existed with that ID. @@ -1115,6 +1143,18 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): if hidden: raise StoreError(400, "The device ID is in use", Codes.FORBIDDEN) + if auth_provider_id and auth_provider_session_id: + await self.db_pool.simple_insert( + "device_auth_providers", + values={ + "user_id": user_id, + "device_id": device_id, + "auth_provider_id": auth_provider_id, + "auth_provider_session_id": auth_provider_session_id, + }, + desc="store_device_auth_provider", + ) + self.device_id_exists_cache.set(key, True) return inserted except StoreError: @@ -1168,6 +1208,14 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): keyvalues={"user_id": user_id}, ) + self.db_pool.simple_delete_many_txn( + txn, + table="device_auth_providers", + column="device_id", + values=device_ids, + keyvalues={"user_id": user_id}, + ) + await self.db_pool.runInteraction("delete_devices", _delete_devices_txn) for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) diff --git a/synapse/storage/schema/main/delta/65/11_devices_auth_provider_session.sql b/synapse/storage/schema/main/delta/65/11_devices_auth_provider_session.sql new file mode 100644 index 0000000000..a65bfb520d --- /dev/null +++ b/synapse/storage/schema/main/delta/65/11_devices_auth_provider_session.sql @@ -0,0 +1,27 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Track the auth provider used by each login as well as the session ID +CREATE TABLE device_auth_providers ( + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + auth_provider_id TEXT NOT NULL, + auth_provider_session_id TEXT NOT NULL +); + +CREATE INDEX device_auth_providers_devices + ON device_auth_providers (user_id, device_id); +CREATE INDEX device_auth_providers_sessions + ON device_auth_providers (auth_provider_id, auth_provider_session_id); diff --git a/tests/handlers/test_auth.py b/tests/handlers/test_auth.py index 72e176da75..03b8b8615c 100644 --- a/tests/handlers/test_auth.py +++ b/tests/handlers/test_auth.py @@ -71,7 +71,7 @@ class AuthTestCase(unittest.HomeserverTestCase): def test_short_term_login_token_gives_user_id(self): token = self.macaroon_generator.generate_short_term_login_token( - self.user1, "", 5000 + self.user1, "", duration_in_ms=5000 ) res = self.get_success(self.auth_handler.validate_short_term_login_token(token)) self.assertEqual(self.user1, res.user_id) @@ -94,7 +94,7 @@ class AuthTestCase(unittest.HomeserverTestCase): def test_short_term_login_token_cannot_replace_user_id(self): token = self.macaroon_generator.generate_short_term_login_token( - self.user1, "", 5000 + self.user1, "", duration_in_ms=5000 ) macaroon = pymacaroons.Macaroon.deserialize(token) @@ -213,6 +213,6 @@ class AuthTestCase(unittest.HomeserverTestCase): def _get_macaroon(self): token = self.macaroon_generator.generate_short_term_login_token( - self.user1, "", 5000 + self.user1, "", duration_in_ms=5000 ) return pymacaroons.Macaroon.deserialize(token) diff --git a/tests/handlers/test_cas.py b/tests/handlers/test_cas.py index b625995d12..8705ff8943 100644 --- a/tests/handlers/test_cas.py +++ b/tests/handlers/test_cas.py @@ -66,7 +66,13 @@ class CasHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "cas", request, "redirect_uri", None, new_user=True + "@test_user:test", + "cas", + request, + "redirect_uri", + None, + new_user=True, + auth_provider_session_id=None, ) def test_map_cas_user_to_existing_user(self): @@ -89,7 +95,13 @@ class CasHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "cas", request, "redirect_uri", None, new_user=False + "@test_user:test", + "cas", + request, + "redirect_uri", + None, + new_user=False, + auth_provider_session_id=None, ) # Subsequent calls should map to the same mxid. @@ -98,7 +110,13 @@ class CasHandlerTestCase(HomeserverTestCase): self.handler._handle_cas_response(request, cas_response, "redirect_uri", "") ) auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "cas", request, "redirect_uri", None, new_user=False + "@test_user:test", + "cas", + request, + "redirect_uri", + None, + new_user=False, + auth_provider_session_id=None, ) def test_map_cas_user_to_invalid_localpart(self): @@ -116,7 +134,13 @@ class CasHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@f=c3=b6=c3=b6:test", "cas", request, "redirect_uri", None, new_user=True + "@f=c3=b6=c3=b6:test", + "cas", + request, + "redirect_uri", + None, + new_user=True, + auth_provider_session_id=None, ) @override_config( @@ -160,7 +184,13 @@ class CasHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "cas", request, "redirect_uri", None, new_user=True + "@test_user:test", + "cas", + request, + "redirect_uri", + None, + new_user=True, + auth_provider_session_id=None, ) diff --git a/tests/handlers/test_oidc.py b/tests/handlers/test_oidc.py index a25c89bd5b..cfe3de5266 100644 --- a/tests/handlers/test_oidc.py +++ b/tests/handlers/test_oidc.py @@ -252,13 +252,6 @@ class OidcHandlerTestCase(HomeserverTestCase): with patch.object(self.provider, "load_metadata", patched_load_metadata): self.get_failure(self.provider.load_jwks(force=True), RuntimeError) - # Return empty key set if JWKS are not used - self.provider._scopes = [] # not asking the openid scope - self.http_client.get_json.reset_mock() - jwks = self.get_success(self.provider.load_jwks(force=True)) - self.http_client.get_json.assert_not_called() - self.assertEqual(jwks, {"keys": []}) - @override_config({"oidc_config": DEFAULT_CONFIG}) def test_validate_config(self): """Provider metadatas are extensively validated.""" @@ -455,7 +448,13 @@ class OidcHandlerTestCase(HomeserverTestCase): self.get_success(self.handler.handle_oidc_callback(request)) auth_handler.complete_sso_login.assert_called_once_with( - expected_user_id, "oidc", request, client_redirect_url, None, new_user=True + expected_user_id, + "oidc", + request, + client_redirect_url, + None, + new_user=True, + auth_provider_session_id=None, ) self.provider._exchange_code.assert_called_once_with(code) self.provider._parse_id_token.assert_called_once_with(token, nonce=nonce) @@ -482,17 +481,58 @@ class OidcHandlerTestCase(HomeserverTestCase): self.provider._fetch_userinfo.reset_mock() # With userinfo fetching - self.provider._scopes = [] # do not ask the "openid" scope + self.provider._user_profile_method = "userinfo_endpoint" + token = { + "type": "bearer", + "access_token": "access_token", + } + self.provider._exchange_code = simple_async_mock(return_value=token) self.get_success(self.handler.handle_oidc_callback(request)) auth_handler.complete_sso_login.assert_called_once_with( - expected_user_id, "oidc", request, client_redirect_url, None, new_user=False + expected_user_id, + "oidc", + request, + client_redirect_url, + None, + new_user=False, + auth_provider_session_id=None, ) self.provider._exchange_code.assert_called_once_with(code) self.provider._parse_id_token.assert_not_called() self.provider._fetch_userinfo.assert_called_once_with(token) self.render_error.assert_not_called() + # With an ID token, userinfo fetching and sid in the ID token + self.provider._user_profile_method = "userinfo_endpoint" + token = { + "type": "bearer", + "access_token": "access_token", + "id_token": "id_token", + } + id_token = { + "sid": "abcdefgh", + } + self.provider._parse_id_token = simple_async_mock(return_value=id_token) + self.provider._exchange_code = simple_async_mock(return_value=token) + auth_handler.complete_sso_login.reset_mock() + self.provider._fetch_userinfo.reset_mock() + self.get_success(self.handler.handle_oidc_callback(request)) + + auth_handler.complete_sso_login.assert_called_once_with( + expected_user_id, + "oidc", + request, + client_redirect_url, + None, + new_user=False, + auth_provider_session_id=id_token["sid"], + ) + self.provider._exchange_code.assert_called_once_with(code) + self.provider._parse_id_token.assert_called_once_with(token, nonce=nonce) + self.provider._fetch_userinfo.assert_called_once_with(token) + self.render_error.assert_not_called() + # Handle userinfo fetching error self.provider._fetch_userinfo = simple_async_mock(raises=Exception()) self.get_success(self.handler.handle_oidc_callback(request)) @@ -776,6 +816,7 @@ class OidcHandlerTestCase(HomeserverTestCase): client_redirect_url, {"phone": "1234567"}, new_user=True, + auth_provider_session_id=None, ) @override_config({"oidc_config": DEFAULT_CONFIG}) @@ -790,7 +831,13 @@ class OidcHandlerTestCase(HomeserverTestCase): } self.get_success(_make_callback_with_userinfo(self.hs, userinfo)) auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "oidc", ANY, ANY, None, new_user=True + "@test_user:test", + "oidc", + ANY, + ANY, + None, + new_user=True, + auth_provider_session_id=None, ) auth_handler.complete_sso_login.reset_mock() @@ -801,7 +848,13 @@ class OidcHandlerTestCase(HomeserverTestCase): } self.get_success(_make_callback_with_userinfo(self.hs, userinfo)) auth_handler.complete_sso_login.assert_called_once_with( - "@test_user_2:test", "oidc", ANY, ANY, None, new_user=True + "@test_user_2:test", + "oidc", + ANY, + ANY, + None, + new_user=True, + auth_provider_session_id=None, ) auth_handler.complete_sso_login.reset_mock() @@ -838,14 +891,26 @@ class OidcHandlerTestCase(HomeserverTestCase): } self.get_success(_make_callback_with_userinfo(self.hs, userinfo)) auth_handler.complete_sso_login.assert_called_once_with( - user.to_string(), "oidc", ANY, ANY, None, new_user=False + user.to_string(), + "oidc", + ANY, + ANY, + None, + new_user=False, + auth_provider_session_id=None, ) auth_handler.complete_sso_login.reset_mock() # Subsequent calls should map to the same mxid. self.get_success(_make_callback_with_userinfo(self.hs, userinfo)) auth_handler.complete_sso_login.assert_called_once_with( - user.to_string(), "oidc", ANY, ANY, None, new_user=False + user.to_string(), + "oidc", + ANY, + ANY, + None, + new_user=False, + auth_provider_session_id=None, ) auth_handler.complete_sso_login.reset_mock() @@ -860,7 +925,13 @@ class OidcHandlerTestCase(HomeserverTestCase): } self.get_success(_make_callback_with_userinfo(self.hs, userinfo)) auth_handler.complete_sso_login.assert_called_once_with( - user.to_string(), "oidc", ANY, ANY, None, new_user=False + user.to_string(), + "oidc", + ANY, + ANY, + None, + new_user=False, + auth_provider_session_id=None, ) auth_handler.complete_sso_login.reset_mock() @@ -896,7 +967,13 @@ class OidcHandlerTestCase(HomeserverTestCase): self.get_success(_make_callback_with_userinfo(self.hs, userinfo)) auth_handler.complete_sso_login.assert_called_once_with( - "@TEST_USER_2:test", "oidc", ANY, ANY, None, new_user=False + "@TEST_USER_2:test", + "oidc", + ANY, + ANY, + None, + new_user=False, + auth_provider_session_id=None, ) @override_config({"oidc_config": DEFAULT_CONFIG}) @@ -934,7 +1011,13 @@ class OidcHandlerTestCase(HomeserverTestCase): # test_user is already taken, so test_user1 gets registered instead. auth_handler.complete_sso_login.assert_called_once_with( - "@test_user1:test", "oidc", ANY, ANY, None, new_user=True + "@test_user1:test", + "oidc", + ANY, + ANY, + None, + new_user=True, + auth_provider_session_id=None, ) auth_handler.complete_sso_login.reset_mock() @@ -1018,7 +1101,13 @@ class OidcHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@tester:test", "oidc", ANY, ANY, None, new_user=True + "@tester:test", + "oidc", + ANY, + ANY, + None, + new_user=True, + auth_provider_session_id=None, ) @override_config( @@ -1043,7 +1132,13 @@ class OidcHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@tester:test", "oidc", ANY, ANY, None, new_user=True + "@tester:test", + "oidc", + ANY, + ANY, + None, + new_user=True, + auth_provider_session_id=None, ) @override_config( @@ -1156,7 +1251,7 @@ async def _make_callback_with_userinfo( handler = hs.get_oidc_handler() provider = handler._providers["oidc"] - provider._exchange_code = simple_async_mock(return_value={}) + provider._exchange_code = simple_async_mock(return_value={"id_token": ""}) provider._parse_id_token = simple_async_mock(return_value=userinfo) provider._fetch_userinfo = simple_async_mock(return_value=userinfo) diff --git a/tests/handlers/test_saml.py b/tests/handlers/test_saml.py index 8cfc184fef..50551aa6e3 100644 --- a/tests/handlers/test_saml.py +++ b/tests/handlers/test_saml.py @@ -130,7 +130,13 @@ class SamlHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "saml", request, "redirect_uri", None, new_user=True + "@test_user:test", + "saml", + request, + "redirect_uri", + None, + new_user=True, + auth_provider_session_id=None, ) @override_config({"saml2_config": {"grandfathered_mxid_source_attribute": "mxid"}}) @@ -156,7 +162,13 @@ class SamlHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "saml", request, "", None, new_user=False + "@test_user:test", + "saml", + request, + "", + None, + new_user=False, + auth_provider_session_id=None, ) # Subsequent calls should map to the same mxid. @@ -165,7 +177,13 @@ class SamlHandlerTestCase(HomeserverTestCase): self.handler._handle_authn_response(request, saml_response, "") ) auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "saml", request, "", None, new_user=False + "@test_user:test", + "saml", + request, + "", + None, + new_user=False, + auth_provider_session_id=None, ) def test_map_saml_response_to_invalid_localpart(self): @@ -213,7 +231,13 @@ class SamlHandlerTestCase(HomeserverTestCase): # test_user is already taken, so test_user1 gets registered instead. auth_handler.complete_sso_login.assert_called_once_with( - "@test_user1:test", "saml", request, "", None, new_user=True + "@test_user1:test", + "saml", + request, + "", + None, + new_user=True, + auth_provider_session_id=None, ) auth_handler.complete_sso_login.reset_mock() @@ -309,7 +333,13 @@ class SamlHandlerTestCase(HomeserverTestCase): # check that the auth handler got called as expected auth_handler.complete_sso_login.assert_called_once_with( - "@test_user:test", "saml", request, "redirect_uri", None, new_user=True + "@test_user:test", + "saml", + request, + "redirect_uri", + None, + new_user=True, + auth_provider_session_id=None, ) -- cgit 1.5.1 From 8541809cb952ebf0da2a95dd93eccd5644dab49d Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 8 Dec 2021 05:01:38 -0500 Subject: Send and handle cross-signing messages using the stable prefix. (#10520) --- changelog.d/10520.misc | 1 + synapse/handlers/e2e_keys.py | 8 ++++++-- synapse/storage/databases/main/devices.py | 4 +++- tests/federation/test_federation_sender.py | 5 +++-- 4 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 changelog.d/10520.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/10520.misc b/changelog.d/10520.misc new file mode 100644 index 0000000000..a911e165da --- /dev/null +++ b/changelog.d/10520.misc @@ -0,0 +1 @@ +Send and handle cross-signing messages using the stable prefix. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 60c11e3d21..b2554bda04 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -65,8 +65,12 @@ class E2eKeysHandler: else: # Only register this edu handler on master as it requires writing # device updates to the db - # - # FIXME: switch to m.signing_key_update when MSC1756 is merged into the spec + federation_registry.register_edu_handler( + "m.signing_key_update", + self._edu_updater.incoming_signing_key_update, + ) + # also handle the unstable version + # FIXME: remove this when enough servers have upgraded federation_registry.register_edu_handler( "org.matrix.signing_key_update", self._edu_updater.incoming_signing_key_update, diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index d5a4a661cd..838a2a6a3d 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -274,7 +274,9 @@ class DeviceWorkerStore(SQLBaseStore): # add the updated cross-signing keys to the results list for user_id, result in cross_signing_keys_by_user.items(): result["user_id"] = user_id - # FIXME: switch to m.signing_key_update when MSC1756 is merged into the spec + results.append(("m.signing_key_update", result)) + # also send the unstable version + # FIXME: remove this when enough servers have upgraded results.append(("org.matrix.signing_key_update", result)) return now_stream_id, results diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py index b457dad6d2..b2376e2db9 100644 --- a/tests/federation/test_federation_sender.py +++ b/tests/federation/test_federation_sender.py @@ -266,7 +266,8 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): ) # expect signing key update edu - self.assertEqual(len(self.edus), 1) + self.assertEqual(len(self.edus), 2) + self.assertEqual(self.edus.pop(0)["edu_type"], "m.signing_key_update") self.assertEqual(self.edus.pop(0)["edu_type"], "org.matrix.signing_key_update") # sign the devices @@ -491,7 +492,7 @@ class FederationSenderDevicesTestCases(HomeserverTestCase): ) -> None: """Check that the txn has an EDU with a signing key update.""" edus = txn["edus"] - self.assertEqual(len(edus), 1) + self.assertEqual(len(edus), 2) def generate_and_upload_device_signing_key( self, user_id: str, device_id: str -- cgit 1.5.1 From ff7cc17b5706ff3f386b15fda668511c0502ab9c Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 8 Dec 2021 14:15:14 +0000 Subject: Improve log messages for stream ids (#11536) Somehow I'd managed to get my database in a pickle with stream ids. These changes were useful to debug. --- changelog.d/11536.misc | 1 + synapse/storage/databases/main/state_deltas.py | 4 +++- synapse/storage/util/id_generators.py | 6 +++--- 3 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 changelog.d/11536.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11536.misc b/changelog.d/11536.misc new file mode 100644 index 0000000000..b9191c111b --- /dev/null +++ b/changelog.d/11536.misc @@ -0,0 +1 @@ +Improvements to log messages around handling stream ids. diff --git a/synapse/storage/databases/main/state_deltas.py b/synapse/storage/databases/main/state_deltas.py index 7f3624b128..188afec332 100644 --- a/synapse/storage/databases/main/state_deltas.py +++ b/synapse/storage/databases/main/state_deltas.py @@ -56,7 +56,9 @@ class StateDeltasStore(SQLBaseStore): prev_stream_id = int(prev_stream_id) # check we're not going backwards - assert prev_stream_id <= max_stream_id + assert ( + prev_stream_id <= max_stream_id + ), f"New stream id {max_stream_id} is smaller than prev stream id {prev_stream_id}" if not self._curr_state_delta_stream_cache.has_any_entity_changed( prev_stream_id diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py index 4ff3013908..b8112e1c05 100644 --- a/synapse/storage/util/id_generators.py +++ b/synapse/storage/util/id_generators.py @@ -74,8 +74,6 @@ class IdGenerator: def _load_current_id( db_conn: LoggingDatabaseConnection, table: str, column: str, step: int = 1 ) -> int: - # debug logging for https://github.com/matrix-org/synapse/issues/7968 - logger.info("initialising stream generator for %s(%s)", table, column) cur = db_conn.cursor(txn_name="_load_current_id") if step == 1: cur.execute("SELECT MAX(%s) FROM %s" % (column, table)) @@ -86,7 +84,9 @@ def _load_current_id( (val,) = result cur.close() current_id = int(val) if val else step - return (max if step > 0 else min)(current_id, step) + res = (max if step > 0 else min)(current_id, step) + logger.info("Initialising stream generator for %s(%s): %i", table, column, res) + return res class AbstractStreamIdTracker(metaclass=abc.ABCMeta): -- cgit 1.5.1 From d93362d87fbbf4941da06ade65eaf5df1672bccb Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 8 Dec 2021 12:26:29 -0500 Subject: Add a constant for receipt types (m.read). (#11531) And expand some type hints in the receipts storage module. --- changelog.d/11531.misc | 1 + synapse/api/constants.py | 4 ++ synapse/handlers/receipts.py | 6 +- synapse/handlers/sync.py | 4 +- synapse/push/push_tools.py | 3 +- synapse/rest/client/notifications.py | 3 +- synapse/rest/client/read_marker.py | 6 +- synapse/rest/client/receipts.py | 4 +- synapse/storage/databases/main/receipts.py | 101 +++++++++++++++++++---------- 9 files changed, 87 insertions(+), 45 deletions(-) create mode 100644 changelog.d/11531.misc (limited to 'synapse/storage/databases') diff --git a/changelog.d/11531.misc b/changelog.d/11531.misc new file mode 100644 index 0000000000..ed6ef3bb3e --- /dev/null +++ b/changelog.d/11531.misc @@ -0,0 +1 @@ +Add a receipt types constant for `m.read`. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index f7d29b4319..52c083a20b 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -253,5 +253,9 @@ class GuestAccess: FORBIDDEN: Final = "forbidden" +class ReceiptTypes: + READ: Final = "m.read" + + class ReadReceiptEventFields: MSC2285_HIDDEN: Final = "org.matrix.msc2285.hidden" diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 4911a11535..5cb1ff749d 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -14,7 +14,7 @@ import logging from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple -from synapse.api.constants import ReadReceiptEventFields +from synapse.api.constants import ReadReceiptEventFields, ReceiptTypes from synapse.appservice import ApplicationService from synapse.streams import EventSource from synapse.types import JsonDict, ReadReceipt, UserID, get_domain_from_id @@ -178,7 +178,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]): for event_id in content.keys(): event_content = content.get(event_id, {}) - m_read = event_content.get("m.read", {}) + m_read = event_content.get(ReceiptTypes.READ, {}) # If m_read is missing copy over the original event_content as there is nothing to process here if not m_read: @@ -206,7 +206,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]): # Set new users unless empty if len(new_users.keys()) > 0: - new_event["content"][event_id] = {"m.read": new_users} + new_event["content"][event_id] = {ReceiptTypes.READ: new_users} # Append new_event to visible_events unless empty if len(new_event["content"].keys()) > 0: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index f3039c3c3f..96f37e9f42 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -28,7 +28,7 @@ from typing import ( import attr from prometheus_client import Counter -from synapse.api.constants import AccountDataTypes, EventTypes, Membership +from synapse.api.constants import AccountDataTypes, EventTypes, Membership, ReceiptTypes from synapse.api.filtering import FilterCollection from synapse.api.presence import UserPresenceState from synapse.api.room_versions import KNOWN_ROOM_VERSIONS @@ -1046,7 +1046,7 @@ class SyncHandler: last_unread_event_id = await self.store.get_last_receipt_event_id_for_user( user_id=sync_config.user.to_string(), room_id=room_id, - receipt_type="m.read", + receipt_type=ReceiptTypes.READ, ) notifs = await self.store.get_unread_event_push_actions_by_room_for_user( diff --git a/synapse/push/push_tools.py b/synapse/push/push_tools.py index 9c85200c0f..da641aca47 100644 --- a/synapse/push/push_tools.py +++ b/synapse/push/push_tools.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import Dict +from synapse.api.constants import ReceiptTypes from synapse.events import EventBase from synapse.push.presentable_names import calculate_room_name, name_from_member_event from synapse.storage import Storage @@ -23,7 +24,7 @@ async def get_badge_count(store: DataStore, user_id: str, group_by_room: bool) - invites = await store.get_invited_rooms_for_local_user(user_id) joins = await store.get_rooms_for_user(user_id) - my_receipts_by_room = await store.get_receipts_for_user(user_id, "m.read") + my_receipts_by_room = await store.get_receipts_for_user(user_id, ReceiptTypes.READ) badge = len(invites) diff --git a/synapse/rest/client/notifications.py b/synapse/rest/client/notifications.py index d1d8a984c6..b12a332776 100644 --- a/synapse/rest/client/notifications.py +++ b/synapse/rest/client/notifications.py @@ -15,6 +15,7 @@ import logging from typing import TYPE_CHECKING, Tuple +from synapse.api.constants import ReceiptTypes from synapse.events.utils import format_event_for_client_v2_without_room_id from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_integer, parse_string @@ -54,7 +55,7 @@ class NotificationsServlet(RestServlet): ) receipts_by_room = await self.store.get_receipts_for_user_with_orderings( - user_id, "m.read" + user_id, ReceiptTypes.READ ) notif_event_ids = [pa["event_id"] for pa in push_actions] diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py index 43c04fac6f..f51be511d1 100644 --- a/synapse/rest/client/read_marker.py +++ b/synapse/rest/client/read_marker.py @@ -15,7 +15,7 @@ import logging from typing import TYPE_CHECKING, Tuple -from synapse.api.constants import ReadReceiptEventFields +from synapse.api.constants import ReadReceiptEventFields, ReceiptTypes from synapse.api.errors import Codes, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request @@ -48,7 +48,7 @@ class ReadMarkerRestServlet(RestServlet): await self.presence_handler.bump_presence_active_time(requester.user) body = parse_json_object_from_request(request) - read_event_id = body.get("m.read", None) + read_event_id = body.get(ReceiptTypes.READ, None) hidden = body.get(ReadReceiptEventFields.MSC2285_HIDDEN, False) if not isinstance(hidden, bool): @@ -62,7 +62,7 @@ class ReadMarkerRestServlet(RestServlet): if read_event_id: await self.receipts_handler.received_client_receipt( room_id, - "m.read", + ReceiptTypes.READ, user_id=requester.user.to_string(), event_id=read_event_id, hidden=hidden, diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py index 2b25b9aad6..b24ad2d1be 100644 --- a/synapse/rest/client/receipts.py +++ b/synapse/rest/client/receipts.py @@ -16,7 +16,7 @@ import logging import re from typing import TYPE_CHECKING, Tuple -from synapse.api.constants import ReadReceiptEventFields +from synapse.api.constants import ReadReceiptEventFields, ReceiptTypes from synapse.api.errors import Codes, SynapseError from synapse.http import get_request_user_agent from synapse.http.server import HttpServer @@ -53,7 +53,7 @@ class ReceiptRestServlet(RestServlet): ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - if receipt_type != "m.read": + if receipt_type != ReceiptTypes.READ: raise SynapseError(400, "Receipt type must be 'm.read'") # Do not allow older SchildiChat and Element Android clients (prior to Element/1.[012].x) to send an empty body. diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index c99f8aebdb..9c5625c8bb 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -14,14 +14,25 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, +) from twisted.internet import defer +from synapse.api.constants import ReceiptTypes from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.replication.tcp.streams import ReceiptsStream from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.types import JsonDict @@ -78,17 +89,13 @@ class ReceiptsWorkerStore(SQLBaseStore): "ReceiptsRoomChangeCache", self.get_max_receipt_stream_id() ) - def get_max_receipt_stream_id(self): - """Get the current max stream ID for receipts stream - - Returns: - int - """ + def get_max_receipt_stream_id(self) -> int: + """Get the current max stream ID for receipts stream""" return self._receipts_id_gen.get_current_token() @cached() - async def get_users_with_read_receipts_in_room(self, room_id): - receipts = await self.get_receipts_for_room(room_id, "m.read") + async def get_users_with_read_receipts_in_room(self, room_id: str) -> Set[str]: + receipts = await self.get_receipts_for_room(room_id, ReceiptTypes.READ) return {r["user_id"] for r in receipts} @cached(num_args=2) @@ -119,7 +126,9 @@ class ReceiptsWorkerStore(SQLBaseStore): ) @cached(num_args=2) - async def get_receipts_for_user(self, user_id, receipt_type): + async def get_receipts_for_user( + self, user_id: str, receipt_type: str + ) -> Dict[str, str]: rows = await self.db_pool.simple_select_list( table="receipts_linearized", keyvalues={"user_id": user_id, "receipt_type": receipt_type}, @@ -129,8 +138,10 @@ class ReceiptsWorkerStore(SQLBaseStore): return {row["room_id"]: row["event_id"] for row in rows} - async def get_receipts_for_user_with_orderings(self, user_id, receipt_type): - def f(txn): + async def get_receipts_for_user_with_orderings( + self, user_id: str, receipt_type: str + ) -> JsonDict: + def f(txn: LoggingTransaction) -> List[Tuple[str, str, int, int]]: sql = ( "SELECT rl.room_id, rl.event_id," " e.topological_ordering, e.stream_ordering" @@ -209,10 +220,10 @@ class ReceiptsWorkerStore(SQLBaseStore): @cached(num_args=3, tree=True) async def _get_linearized_receipts_for_room( self, room_id: str, to_key: int, from_key: Optional[int] = None - ) -> List[dict]: + ) -> List[JsonDict]: """See get_linearized_receipts_for_room""" - def f(txn): + def f(txn: LoggingTransaction) -> List[Dict[str, Any]]: if from_key: sql = ( "SELECT * FROM receipts_linearized WHERE" @@ -250,11 +261,13 @@ class ReceiptsWorkerStore(SQLBaseStore): list_name="room_ids", num_args=3, ) - async def _get_linearized_receipts_for_rooms(self, room_ids, to_key, from_key=None): + async def _get_linearized_receipts_for_rooms( + self, room_ids: Collection[str], to_key: int, from_key: Optional[int] = None + ) -> Dict[str, List[JsonDict]]: if not room_ids: return {} - def f(txn): + def f(txn: LoggingTransaction) -> List[Dict[str, Any]]: if from_key: sql = """ SELECT * FROM receipts_linearized WHERE @@ -323,7 +336,7 @@ class ReceiptsWorkerStore(SQLBaseStore): A dictionary of roomids to a list of receipts. """ - def f(txn): + def f(txn: LoggingTransaction) -> List[Dict[str, Any]]: if from_key: sql = """ SELECT * FROM receipts_linearized WHERE @@ -379,7 +392,7 @@ class ReceiptsWorkerStore(SQLBaseStore): if last_id == current_id: return defer.succeed([]) - def _get_users_sent_receipts_between_txn(txn): + def _get_users_sent_receipts_between_txn(txn: LoggingTransaction) -> List[str]: sql = """ SELECT DISTINCT user_id FROM receipts_linearized WHERE ? < stream_id AND stream_id <= ? @@ -419,7 +432,9 @@ class ReceiptsWorkerStore(SQLBaseStore): if last_id == current_id: return [], current_id, False - def get_all_updated_receipts_txn(txn): + def get_all_updated_receipts_txn( + txn: LoggingTransaction, + ) -> Tuple[List[Tuple[int, list]], int, bool]: sql = """ SELECT stream_id, room_id, receipt_type, user_id, event_id, data FROM receipts_linearized @@ -446,8 +461,8 @@ class ReceiptsWorkerStore(SQLBaseStore): def _invalidate_get_users_with_receipts_in_room( self, room_id: str, receipt_type: str, user_id: str - ): - if receipt_type != "m.read": + ) -> None: + if receipt_type != ReceiptTypes.READ: return res = self.get_users_with_read_receipts_in_room.cache.get_immediate( @@ -461,7 +476,9 @@ class ReceiptsWorkerStore(SQLBaseStore): self.get_users_with_read_receipts_in_room.invalidate((room_id,)) - def invalidate_caches_for_receipt(self, room_id, receipt_type, user_id): + def invalidate_caches_for_receipt( + self, room_id: str, receipt_type: str, user_id: str + ) -> None: self.get_receipts_for_user.invalidate((user_id, receipt_type)) self._get_linearized_receipts_for_room.invalidate((room_id,)) self.get_last_receipt_event_id_for_user.invalidate( @@ -482,11 +499,18 @@ class ReceiptsWorkerStore(SQLBaseStore): return super().process_replication_rows(stream_name, instance_name, token, rows) def insert_linearized_receipt_txn( - self, txn, room_id, receipt_type, user_id, event_id, data, stream_id - ): + self, + txn: LoggingTransaction, + room_id: str, + receipt_type: str, + user_id: str, + event_id: str, + data: JsonDict, + stream_id: int, + ) -> Optional[int]: """Inserts a read-receipt into the database if it's newer than the current RR - Returns: int|None + Returns: None if the RR is older than the current RR otherwise, the rx timestamp of the event that the RR corresponds to (or 0 if the event is unknown) @@ -550,7 +574,7 @@ class ReceiptsWorkerStore(SQLBaseStore): lock=False, ) - if receipt_type == "m.read" and stream_ordering is not None: + if receipt_type == ReceiptTypes.READ and stream_ordering is not None: self._remove_old_push_actions_before_txn( txn, room_id=room_id, user_id=user_id, stream_ordering=stream_ordering ) @@ -580,7 +604,7 @@ class ReceiptsWorkerStore(SQLBaseStore): else: # we need to points in graph -> linearized form. # TODO: Make this better. - def graph_to_linear(txn): + def graph_to_linear(txn: LoggingTransaction) -> str: clause, args = make_in_list_sql_clause( self.database_engine, "event_id", event_ids ) @@ -634,11 +658,16 @@ class ReceiptsWorkerStore(SQLBaseStore): return stream_id, max_persisted_id async def insert_graph_receipt( - self, room_id, receipt_type, user_id, event_ids, data - ): + self, + room_id: str, + receipt_type: str, + user_id: str, + event_ids: List[str], + data: JsonDict, + ) -> None: assert self._can_write_to_receipts - return await self.db_pool.runInteraction( + await self.db_pool.runInteraction( "insert_graph_receipt", self.insert_graph_receipt_txn, room_id, @@ -649,8 +678,14 @@ class ReceiptsWorkerStore(SQLBaseStore): ) def insert_graph_receipt_txn( - self, txn, room_id, receipt_type, user_id, event_ids, data - ): + self, + txn: LoggingTransaction, + room_id: str, + receipt_type: str, + user_id: str, + event_ids: List[str], + data: JsonDict, + ) -> None: assert self._can_write_to_receipts txn.call_after(self.get_receipts_for_room.invalidate, (room_id, receipt_type)) -- cgit 1.5.1