From 8d46fac98e07ac319c7ae21dfc24502993de3f1d Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 27 Oct 2021 17:01:18 +0200 Subject: Delete messages from `device_inbox` table when deleting device (#10969) Fixes: #9346 --- synapse/storage/databases/main/deviceinbox.py | 92 +++++++++++++++++++++- synapse/storage/databases/main/devices.py | 35 ++++---- .../02remove_deleted_devices_from_device_inbox.sql | 22 ++++++ 3 files changed, 134 insertions(+), 15 deletions(-) create mode 100644 synapse/storage/schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql (limited to 'synapse/storage') diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 8143168107..b0ccab0c9b 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -19,9 +19,10 @@ from synapse.logging import issue9533_logger from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.replication.tcp.streams import ToDeviceStream from synapse.storage._base import SQLBaseStore, db_to_json -from synapse.storage.database import DatabasePool +from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator +from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -555,6 +556,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): class DeviceInboxBackgroundUpdateStore(SQLBaseStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" + REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox" def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) @@ -570,6 +572,11 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox ) + self.db_pool.updates.register_background_update_handler( + self.REMOVE_DELETED_DEVICES, + self._remove_deleted_devices_from_device_inbox, + ) + async def _background_drop_index_device_inbox(self, progress, batch_size): def reindex_txn(conn): txn = conn.cursor() @@ -582,6 +589,89 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): return 1 + async def _remove_deleted_devices_from_device_inbox( + self, progress: JsonDict, batch_size: int + ) -> int: + """A background update that deletes all device_inboxes for deleted devices. + + This should only need to be run once (when users upgrade to v1.46.0) + + Args: + progress: JsonDict used to store progress of this background update + batch_size: the maximum number of rows to retrieve in a single select query + + Returns: + The number of deleted rows + """ + + def _remove_deleted_devices_from_device_inbox_txn( + txn: LoggingTransaction, + ) -> int: + """stream_id is not unique + we need to use an inclusive `stream_id >= ?` clause, + since we might not have deleted all dead device messages for the stream_id + returned from the previous query + + Then delete only rows matching the `(user_id, device_id, stream_id)` tuple, + to avoid problems of deleting a large number of rows all at once + due to a single device having lots of device messages. + """ + + last_stream_id = progress.get("stream_id", 0) + + sql = """ + SELECT device_id, user_id, stream_id + FROM device_inbox + WHERE + stream_id >= ? + AND (device_id, user_id) NOT IN ( + SELECT device_id, user_id FROM devices + ) + ORDER BY stream_id + LIMIT ? + """ + + txn.execute(sql, (last_stream_id, batch_size)) + rows = txn.fetchall() + + num_deleted = 0 + for row in rows: + num_deleted += self.db_pool.simple_delete_txn( + txn, + "device_inbox", + {"device_id": row[0], "user_id": row[1], "stream_id": row[2]}, + ) + + if rows: + # send more than stream_id to progress + # otherwise it can happen in large deployments that + # no change of status is visible in the log file + # it may be that the stream_id does not change in several runs + self.db_pool.updates._background_update_progress_txn( + txn, + self.REMOVE_DELETED_DEVICES, + { + "device_id": rows[-1][0], + "user_id": rows[-1][1], + "stream_id": rows[-1][2], + }, + ) + + return num_deleted + + number_deleted = await self.db_pool.runInteraction( + "_remove_deleted_devices_from_device_inbox", + _remove_deleted_devices_from_device_inbox_txn, + ) + + # The task is finished when no more lines are deleted. + if not number_deleted: + await self.db_pool.updates._end_background_update( + self.REMOVE_DELETED_DEVICES + ) + + return number_deleted + class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): pass diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index a01bf2c5b7..b15cd030e0 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1134,19 +1134,14 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): raise StoreError(500, "Problem storing device.") async def delete_device(self, user_id: str, device_id: str) -> None: - """Delete a device. + """Delete a device and its device_inbox. Args: user_id: The ID of the user which owns the device device_id: The ID of the device to delete """ - await self.db_pool.simple_delete_one( - table="devices", - keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False}, - desc="delete_device", - ) - self.device_id_exists_cache.invalidate((user_id, device_id)) + await self.delete_devices(user_id, [device_id]) async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: """Deletes several devices. @@ -1155,13 +1150,25 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): user_id: The ID of the user which owns the devices device_ids: The IDs of the devices to delete """ - await self.db_pool.simple_delete_many( - table="devices", - column="device_id", - iterable=device_ids, - keyvalues={"user_id": user_id, "hidden": False}, - desc="delete_devices", - ) + + def _delete_devices_txn(txn: LoggingTransaction) -> None: + self.db_pool.simple_delete_many_txn( + txn, + table="devices", + column="device_id", + values=device_ids, + keyvalues={"user_id": user_id, "hidden": False}, + ) + + self.db_pool.simple_delete_many_txn( + txn, + table="device_inbox", + column="device_id", + values=device_ids, + keyvalues={"user_id": user_id}, + ) + + await self.db_pool.runInteraction("delete_devices", _delete_devices_txn) for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) diff --git a/synapse/storage/schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql b/synapse/storage/schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql new file mode 100644 index 0000000000..efe702f621 --- /dev/null +++ b/synapse/storage/schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql @@ -0,0 +1,22 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Remove messages from the device_inbox table which were orphaned +-- when a device was deleted using Synapse earlier than 1.46.0. +-- This runs as background task, but may take a bit to finish. + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6402, 'remove_deleted_devices_from_device_inbox', '{}'); -- cgit 1.5.1 From 75ca0a6168f92dab3255839cf85fb0df3a0076c3 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Wed, 27 Oct 2021 17:27:23 +0100 Subject: Annotate `log_function` decorator (#10943) Co-authored-by: Patrick Cloke --- changelog.d/10943.misc | 1 + synapse/federation/federation_client.py | 17 +++++++++++++++-- synapse/federation/federation_server.py | 10 ++++++---- synapse/federation/sender/transaction_manager.py | 1 - synapse/federation/transport/client.py | 22 ++++++++++++++++++---- synapse/handlers/directory.py | 2 +- synapse/handlers/federation_event.py | 2 +- synapse/handlers/presence.py | 2 ++ synapse/handlers/profile.py | 4 ++++ synapse/logging/utils.py | 8 ++++++-- synapse/state/__init__.py | 5 +++-- synapse/storage/databases/main/profile.py | 2 +- 12 files changed, 58 insertions(+), 18 deletions(-) create mode 100644 changelog.d/10943.misc (limited to 'synapse/storage') diff --git a/changelog.d/10943.misc b/changelog.d/10943.misc new file mode 100644 index 0000000000..3ce28d1a67 --- /dev/null +++ b/changelog.d/10943.misc @@ -0,0 +1 @@ +Add type annotations for the `log_function` decorator. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 2ab4dec88f..670186f548 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -227,7 +227,7 @@ class FederationClient(FederationBase): ) async def backfill( - self, dest: str, room_id: str, limit: int, extremities: Iterable[str] + self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> Optional[List[EventBase]]: """Requests some more historic PDUs for the given room from the given destination server. @@ -237,6 +237,8 @@ class FederationClient(FederationBase): room_id: The room_id to backfill. limit: The maximum number of events to return. extremities: our current backwards extremities, to backfill from + Must be a Collection that is falsy when empty. + (Iterable is not enough here!) """ logger.debug("backfill extrem=%s", extremities) @@ -250,11 +252,22 @@ class FederationClient(FederationBase): logger.debug("backfill transaction_data=%r", transaction_data) + if not isinstance(transaction_data, dict): + # TODO we probably want an exception type specific to federation + # client validation. + raise TypeError("Backfill transaction_data is not a dict.") + + transaction_data_pdus = transaction_data.get("pdus") + if not isinstance(transaction_data_pdus, list): + # TODO we probably want an exception type specific to federation + # client validation. + raise TypeError("transaction_data.pdus is not a list.") + room_version = await self.store.get_room_version(room_id) pdus = [ event_from_pdu_json(p, room_version, outlier=False) - for p in transaction_data["pdus"] + for p in transaction_data_pdus ] # Check signatures and hash of pdus, removing any from the list that fail checks diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 0d66034f44..32a75993d9 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -295,14 +295,16 @@ class FederationServer(FederationBase): Returns: HTTP response code and body """ - response = await self.transaction_actions.have_responded(origin, transaction) + existing_response = await self.transaction_actions.have_responded( + origin, transaction + ) - if response: + if existing_response: logger.debug( "[%s] We've already responded to this request", transaction.transaction_id, ) - return response + return existing_response logger.debug("[%s] Transaction is new", transaction.transaction_id) @@ -632,7 +634,7 @@ class FederationServer(FederationBase): async def on_make_knock_request( self, origin: str, room_id: str, user_id: str, supported_versions: List[str] - ) -> Dict[str, Union[EventBase, str]]: + ) -> JsonDict: """We've received a /make_knock/ request, so we create a partial knock event for the room and hand that back, along with the room version, to the knocking homeserver. We do *not* persist or process this event until the other server has diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index dc555cca0b..ab935e5a7e 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -149,7 +149,6 @@ class TransactionManager: ) except HttpResponseException as e: code = e.code - response = e.response set_tag(tags.ERROR, True) diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 8b247fe206..d963178838 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -15,7 +15,19 @@ import logging import urllib -from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Tuple, Union +from typing import ( + Any, + Awaitable, + Callable, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Tuple, + Union, +) import attr import ijson @@ -100,7 +112,7 @@ class TransportLayerClient: @log_function async def backfill( - self, destination: str, room_id: str, event_tuples: Iterable[str], limit: int + self, destination: str, room_id: str, event_tuples: Collection[str], limit: int ) -> Optional[JsonDict]: """Requests `limit` previous PDUs in a given context before list of PDUs. @@ -108,7 +120,9 @@ class TransportLayerClient: Args: destination room_id - event_tuples + event_tuples: + Must be a Collection that is falsy when empty. + (Iterable is not enough here!) limit Returns: @@ -786,7 +800,7 @@ class TransportLayerClient: @log_function def join_group( self, destination: str, group_id: str, user_id: str, content: JsonDict - ) -> JsonDict: + ) -> Awaitable[JsonDict]: """Attempts to join a group""" path = _create_v1_path("/groups/%s/users/%s/join", group_id, user_id) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 8567cb0e00..8ca5f60b1c 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -245,7 +245,7 @@ class DirectoryHandler: servers = result.servers else: try: - fed_result = await self.federation.make_query( + fed_result: Optional[JsonDict] = await self.federation.make_query( destination=room_alias.domain, query_type="directory", args={"room_alias": room_alias.to_string()}, diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index bd1fa08cef..e617db4c0d 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -477,7 +477,7 @@ class FederationEventHandler: @log_function async def backfill( - self, dest: str, room_id: str, limit: int, extremities: Iterable[str] + self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> None: """Trigger a backfill request to `dest` for the given `room_id` diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index fdab50da37..3df872c578 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -52,6 +52,7 @@ import synapse.metrics from synapse.api.constants import EventTypes, Membership, PresenceState from synapse.api.errors import SynapseError from synapse.api.presence import UserPresenceState +from synapse.appservice import ApplicationService from synapse.events.presence_router import PresenceRouter from synapse.logging.context import run_in_background from synapse.logging.utils import log_function @@ -1551,6 +1552,7 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): is_guest: bool = False, explicit_room_id: Optional[str] = None, include_offline: bool = True, + service: Optional[ApplicationService] = None, ) -> Tuple[List[UserPresenceState], int]: # The process for getting presence events are: # 1. Get the rooms the user is in. diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index e6c3cf585b..6b5a6ded8b 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -456,7 +456,11 @@ class ProfileHandler: continue new_name = profile.get("displayname") + if not isinstance(new_name, str): + new_name = None new_avatar = profile.get("avatar_url") + if not isinstance(new_avatar, str): + new_avatar = None # We always hit update to update the last_check timestamp await self.store.update_remote_profile_cache(user_id, new_name, new_avatar) diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py index 08895e72ee..4a01b902c2 100644 --- a/synapse/logging/utils.py +++ b/synapse/logging/utils.py @@ -16,6 +16,7 @@ import logging from functools import wraps from inspect import getcallargs +from typing import Callable, TypeVar, cast _TIME_FUNC_ID = 0 @@ -41,7 +42,10 @@ def _log_debug_as_f(f, msg, msg_args): logger.handle(record) -def log_function(f): +F = TypeVar("F", bound=Callable) + + +def log_function(f: F) -> F: """Function decorator that logs every call to that function.""" func_name = f.__name__ @@ -69,4 +73,4 @@ def log_function(f): return f(*args, **kwargs) wrapped.__name__ = func_name - return wrapped + return cast(F, wrapped) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 5cf2e12575..98a0239759 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -26,6 +26,7 @@ from typing import ( FrozenSet, Iterable, List, + Mapping, Optional, Sequence, Set, @@ -519,7 +520,7 @@ class StateResolutionHandler: self, room_id: str, room_version: str, - state_groups_ids: Dict[int, StateMap[str]], + state_groups_ids: Mapping[int, StateMap[str]], event_map: Optional[Dict[str, EventBase]], state_res_store: "StateResolutionStore", ) -> _StateCacheEntry: @@ -703,7 +704,7 @@ class StateResolutionHandler: def _make_state_cache_entry( - new_state: StateMap[str], state_groups_ids: Dict[int, StateMap[str]] + new_state: StateMap[str], state_groups_ids: Mapping[int, StateMap[str]] ) -> _StateCacheEntry: """Given a resolved state, and a set of input state groups, pick one to base a new state group on (if any), and return an appropriately-constructed diff --git a/synapse/storage/databases/main/profile.py b/synapse/storage/databases/main/profile.py index ba7075caa5..dd8e27e226 100644 --- a/synapse/storage/databases/main/profile.py +++ b/synapse/storage/databases/main/profile.py @@ -91,7 +91,7 @@ class ProfileWorkerStore(SQLBaseStore): ) async def update_remote_profile_cache( - self, user_id: str, displayname: str, avatar_url: str + self, user_id: str, displayname: Optional[str], avatar_url: Optional[str] ) -> int: return await self.db_pool.simple_update( table="remote_profile_cache", -- cgit 1.5.1 From 56e281bf6c4f58929d56e3901856f6d0fa4b1816 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 28 Oct 2021 14:35:12 -0400 Subject: Additional type hints for relations database class. (#11205) --- changelog.d/11205.misc | 1 + mypy.ini | 1 + synapse/storage/databases/main/relations.py | 38 +++++++++++++++++------------ 3 files changed, 25 insertions(+), 15 deletions(-) create mode 100644 changelog.d/11205.misc (limited to 'synapse/storage') diff --git a/changelog.d/11205.misc b/changelog.d/11205.misc new file mode 100644 index 0000000000..62395c9432 --- /dev/null +++ b/changelog.d/11205.misc @@ -0,0 +1 @@ +Improve type hints for the relations datastore. diff --git a/mypy.ini b/mypy.ini index 8f5386c179..119a7d8c91 100644 --- a/mypy.ini +++ b/mypy.ini @@ -53,6 +53,7 @@ files = synapse/storage/databases/main/keys.py, synapse/storage/databases/main/pusher.py, synapse/storage/databases/main/registration.py, + synapse/storage/databases/main/relations.py, synapse/storage/databases/main/session.py, synapse/storage/databases/main/stream.py, synapse/storage/databases/main/ui_auth.py, diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 40760fbd1b..53576ad52f 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -13,13 +13,14 @@ # limitations under the License. import logging -from typing import Optional, Tuple +from typing import List, Optional, Tuple, Union import attr from synapse.api.constants import RelationTypes from synapse.events import EventBase from synapse.storage._base import SQLBaseStore +from synapse.storage.database import LoggingTransaction from synapse.storage.databases.main.stream import generate_pagination_where_clause from synapse.storage.relations import ( AggregationPaginationToken, @@ -63,7 +64,7 @@ class RelationsWorkerStore(SQLBaseStore): """ where_clause = ["relates_to_id = ?"] - where_args = [event_id] + where_args: List[Union[str, int]] = [event_id] if relation_type is not None: where_clause.append("relation_type = ?") @@ -80,8 +81,8 @@ class RelationsWorkerStore(SQLBaseStore): pagination_clause = generate_pagination_where_clause( direction=direction, column_names=("topological_ordering", "stream_ordering"), - from_token=attr.astuple(from_token) if from_token else None, - to_token=attr.astuple(to_token) if to_token else None, + from_token=attr.astuple(from_token) if from_token else None, # type: ignore[arg-type] + to_token=attr.astuple(to_token) if to_token else None, # type: ignore[arg-type] engine=self.database_engine, ) @@ -106,7 +107,9 @@ class RelationsWorkerStore(SQLBaseStore): order, ) - def _get_recent_references_for_event_txn(txn): + def _get_recent_references_for_event_txn( + txn: LoggingTransaction, + ) -> PaginationChunk: txn.execute(sql, where_args + [limit + 1]) last_topo_id = None @@ -160,7 +163,7 @@ class RelationsWorkerStore(SQLBaseStore): """ where_clause = ["relates_to_id = ?", "relation_type = ?"] - where_args = [event_id, RelationTypes.ANNOTATION] + where_args: List[Union[str, int]] = [event_id, RelationTypes.ANNOTATION] if event_type: where_clause.append("type = ?") @@ -169,8 +172,8 @@ class RelationsWorkerStore(SQLBaseStore): having_clause = generate_pagination_where_clause( direction=direction, column_names=("COUNT(*)", "MAX(stream_ordering)"), - from_token=attr.astuple(from_token) if from_token else None, - to_token=attr.astuple(to_token) if to_token else None, + from_token=attr.astuple(from_token) if from_token else None, # type: ignore[arg-type] + to_token=attr.astuple(to_token) if to_token else None, # type: ignore[arg-type] engine=self.database_engine, ) @@ -199,7 +202,9 @@ class RelationsWorkerStore(SQLBaseStore): having_clause=having_clause, ) - def _get_aggregation_groups_for_event_txn(txn): + def _get_aggregation_groups_for_event_txn( + txn: LoggingTransaction, + ) -> PaginationChunk: txn.execute(sql, where_args + [limit + 1]) next_batch = None @@ -254,11 +259,12 @@ class RelationsWorkerStore(SQLBaseStore): LIMIT 1 """ - def _get_applicable_edit_txn(txn): + def _get_applicable_edit_txn(txn: LoggingTransaction) -> Optional[str]: txn.execute(sql, (event_id, RelationTypes.REPLACE)) row = txn.fetchone() if row: return row[0] + return None edit_id = await self.db_pool.runInteraction( "get_applicable_edit", _get_applicable_edit_txn @@ -267,7 +273,7 @@ class RelationsWorkerStore(SQLBaseStore): if not edit_id: return None - return await self.get_event(edit_id, allow_none=True) + return await self.get_event(edit_id, allow_none=True) # type: ignore[attr-defined] @cached() async def get_thread_summary( @@ -283,7 +289,9 @@ class RelationsWorkerStore(SQLBaseStore): The number of items in the thread and the most recent response, if any. """ - def _get_thread_summary_txn(txn) -> Tuple[int, Optional[str]]: + def _get_thread_summary_txn( + txn: LoggingTransaction, + ) -> Tuple[int, Optional[str]]: # Fetch the count of threaded events and the latest event ID. # TODO Should this only allow m.room.message events. sql = """ @@ -312,7 +320,7 @@ class RelationsWorkerStore(SQLBaseStore): AND relation_type = ? """ txn.execute(sql, (event_id, RelationTypes.THREAD)) - count = txn.fetchone()[0] + count = txn.fetchone()[0] # type: ignore[index] return count, latest_event_id @@ -322,7 +330,7 @@ class RelationsWorkerStore(SQLBaseStore): latest_event = None if latest_event_id: - latest_event = await self.get_event(latest_event_id, allow_none=True) + latest_event = await self.get_event(latest_event_id, allow_none=True) # type: ignore[attr-defined] return count, latest_event @@ -354,7 +362,7 @@ class RelationsWorkerStore(SQLBaseStore): LIMIT 1; """ - def _get_if_user_has_annotated_event(txn): + def _get_if_user_has_annotated_event(txn: LoggingTransaction) -> bool: txn.execute( sql, ( -- cgit 1.5.1 From bfd7a9b65c5e092c6a7ccdd46e59a278b1cbbd57 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Fri, 29 Oct 2021 19:43:51 +0200 Subject: Fix comments referencing v1.46.0 from PR #10969. (#11212) #10969 was merged after 1.46.0rc1 was cut and will be included in v1.47.0rc1 instead. --- changelog.d/11212.bugfix | 1 + synapse/storage/databases/main/deviceinbox.py | 2 +- .../schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/11212.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/11212.bugfix b/changelog.d/11212.bugfix new file mode 100644 index 0000000000..ba6efab25b --- /dev/null +++ b/changelog.d/11212.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where messages in the `device_inbox` table for deleted devices would persist indefinitely. Contributed by @dklimpel and @JohannesKleine. \ No newline at end of file diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index b0ccab0c9b..d03b5e5a7d 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -594,7 +594,7 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): ) -> int: """A background update that deletes all device_inboxes for deleted devices. - This should only need to be run once (when users upgrade to v1.46.0) + This should only need to be run once (when users upgrade to v1.47.0) Args: progress: JsonDict used to store progress of this background update diff --git a/synapse/storage/schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql b/synapse/storage/schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql index efe702f621..fca7290741 100644 --- a/synapse/storage/schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql +++ b/synapse/storage/schema/main/delta/64/02remove_deleted_devices_from_device_inbox.sql @@ -15,7 +15,7 @@ -- Remove messages from the device_inbox table which were orphaned --- when a device was deleted using Synapse earlier than 1.46.0. +-- when a device was deleted using Synapse earlier than 1.47.0. -- This runs as background task, but may take a bit to finish. INSERT INTO background_updates (ordering, update_name, progress_json) VALUES -- cgit 1.5.1 From 29ffd680bf0d0bf50383ad23404b348bf9cf90aa Mon Sep 17 00:00:00 2001 From: JohannesKleine Date: Mon, 1 Nov 2021 11:40:41 +0100 Subject: Stop synapse from saving messages in device_inbox for hidden devices. (#10097) Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/10097.bugfix | 1 + synapse/storage/databases/main/deviceinbox.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10097.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/10097.bugfix b/changelog.d/10097.bugfix new file mode 100644 index 0000000000..5d3d9587c2 --- /dev/null +++ b/changelog.d/10097.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug which allowed hidden devices to receive to-device messages, resulting in unnecessary database bloat. diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index d03b5e5a7d..25e9c1efe1 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -489,10 +489,12 @@ class DeviceInboxWorkerStore(SQLBaseStore): devices = list(messages_by_device.keys()) if len(devices) == 1 and devices[0] == "*": # Handle wildcard device_ids. + # We exclude hidden devices (such as cross-signing keys) here as they are + # not expected to receive to-device messages. devices = self.db_pool.simple_select_onecol_txn( txn, table="devices", - keyvalues={"user_id": user_id}, + keyvalues={"user_id": user_id, "hidden": False}, retcol="device_id", ) @@ -505,10 +507,12 @@ class DeviceInboxWorkerStore(SQLBaseStore): if not devices: continue + # We exclude hidden devices (such as cross-signing keys) here as they are + # not expected to receive to-device messages. rows = self.db_pool.simple_select_many_txn( txn, table="devices", - keyvalues={"user_id": user_id}, + keyvalues={"user_id": user_id, "hidden": False}, column="device_id", iterable=devices, retcols=("device_id",), -- cgit 1.5.1 From 82d2168a15741ed4546c12c06d797627469fb684 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 1 Nov 2021 11:21:36 +0000 Subject: Add metrics to the threadpools (#11178) --- changelog.d/11178.feature | 1 + synapse/app/_base.py | 5 +++++ synapse/metrics/__init__.py | 37 +++++++++++++++++++++++++++++++++++++ synapse/storage/database.py | 7 ++++++- 4 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 changelog.d/11178.feature (limited to 'synapse/storage') diff --git a/changelog.d/11178.feature b/changelog.d/11178.feature new file mode 100644 index 0000000000..10b1cdffdc --- /dev/null +++ b/changelog.d/11178.feature @@ -0,0 +1 @@ +Add metrics for thread pool usage. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index f4c3f867a8..f2c1028b5d 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -45,6 +45,7 @@ from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.events.third_party_rules import load_legacy_third_party_event_rules from synapse.handlers.auth import load_legacy_password_auth_providers from synapse.logging.context import PreserveLoggingContext +from synapse.metrics import register_threadpool from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.metrics.jemalloc import setup_jemalloc_stats from synapse.util.caches.lrucache import setup_expire_lru_cache_entries @@ -351,6 +352,10 @@ async def start(hs: "HomeServer"): GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool) ) + # Register the threadpools with our metrics. + register_threadpool("default", reactor.getThreadPool()) + register_threadpool("gai_resolver", resolver_threadpool) + # Set up the SIGHUP machinery. if hasattr(signal, "SIGHUP"): diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e902109af3..91ee5c8193 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -32,6 +32,7 @@ from prometheus_client.core import ( ) from twisted.internet import reactor +from twisted.python.threadpool import ThreadPool import synapse from synapse.metrics._exposition import ( @@ -526,6 +527,42 @@ threepid_send_requests = Histogram( labelnames=("type", "reason"), ) +threadpool_total_threads = Gauge( + "synapse_threadpool_total_threads", + "Total number of threads currently in the threadpool", + ["name"], +) + +threadpool_total_working_threads = Gauge( + "synapse_threadpool_working_threads", + "Number of threads currently working in the threadpool", + ["name"], +) + +threadpool_total_min_threads = Gauge( + "synapse_threadpool_min_threads", + "Minimum number of threads configured in the threadpool", + ["name"], +) + +threadpool_total_max_threads = Gauge( + "synapse_threadpool_max_threads", + "Maximum number of threads configured in the threadpool", + ["name"], +) + + +def register_threadpool(name: str, threadpool: ThreadPool) -> None: + """Add metrics for the threadpool.""" + + threadpool_total_min_threads.labels(name).set(threadpool.min) + threadpool_total_max_threads.labels(name).set(threadpool.max) + + threadpool_total_threads.labels(name).set_function(lambda: len(threadpool.threads)) + threadpool_total_working_threads.labels(name).set_function( + lambda: len(threadpool.working) + ) + class ReactorLastSeenMetric: def collect(self): diff --git a/synapse/storage/database.py b/synapse/storage/database.py index fa4e89d35c..5c71e27518 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -48,6 +48,7 @@ from synapse.logging.context import ( current_context, make_deferred_yieldable, ) +from synapse.metrics import register_threadpool from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.background_updates import BackgroundUpdater from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine @@ -104,13 +105,17 @@ def make_pool( LoggingDatabaseConnection(conn, engine, "on_new_connection") ) - return adbapi.ConnectionPool( + connection_pool = adbapi.ConnectionPool( db_config.config["name"], cp_reactor=reactor, cp_openfun=_on_new_connection, **db_args, ) + register_threadpool(f"database-{db_config.name}", connection_pool.threadpool) + + return connection_pool + def make_conn( db_config: DatabaseConnectionConfig, -- cgit 1.5.1 From 753720184042e01bf56478d15bd8c8db11da4b69 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 2 Nov 2021 11:01:13 +0100 Subject: Add search by room ID and room alias to List Room admin API (#11099) Fixes: #10874 Signed-off-by: Dirk Klimpel dirk@klimpel.org --- changelog.d/11099.feature | 1 + docs/admin_api/rooms.md | 11 +++-- synapse/storage/databases/main/room.py | 29 ++++++----- tests/rest/admin/test_room.py | 88 +++++++++++++++++++--------------- 4 files changed, 76 insertions(+), 53 deletions(-) create mode 100644 changelog.d/11099.feature (limited to 'synapse/storage') diff --git a/changelog.d/11099.feature b/changelog.d/11099.feature new file mode 100644 index 0000000000..c9126d4a9d --- /dev/null +++ b/changelog.d/11099.feature @@ -0,0 +1 @@ +Add search by room ID and room alias to List Room admin API. \ No newline at end of file diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md index 62eeff9e1a..1fc3cc3c42 100644 --- a/docs/admin_api/rooms.md +++ b/docs/admin_api/rooms.md @@ -38,9 +38,14 @@ The following query parameters are available: - `history_visibility` - Rooms are ordered alphabetically by visibility of history of the room. - `state_events` - Rooms are ordered by number of state events. Largest to smallest. * `dir` - Direction of room order. Either `f` for forwards or `b` for backwards. Setting - this value to `b` will reverse the above sort order. Defaults to `f`. -* `search_term` - Filter rooms by their room name. Search term can be contained in any - part of the room name. Defaults to no filtering. + this value to `b` will reverse the above sort order. Defaults to `f`. +* `search_term` - Filter rooms by their room name, canonical alias and room id. + Specifically, rooms are selected if the search term is contained in + - the room's name, + - the local part of the room's canonical alias, or + - the complete (local and server part) room's id (case sensitive). + + Defaults to no filtering. **Response** diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index f879bbe7c7..cefc77fa0f 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -412,22 +412,33 @@ class RoomWorkerStore(SQLBaseStore): limit: maximum amount of rooms to retrieve order_by: the sort order of the returned list reverse_order: whether to reverse the room list - search_term: a string to filter room names by + search_term: a string to filter room names, + canonical alias and room ids by. + Room ID must match exactly. Canonical alias must match a substring of the local part. Returns: A list of room dicts and an integer representing the total number of rooms that exist given this query """ # Filter room names by a string where_statement = "" + search_pattern = [] if search_term: - where_statement = "WHERE LOWER(state.name) LIKE ?" + where_statement = """ + WHERE LOWER(state.name) LIKE ? + OR LOWER(state.canonical_alias) LIKE ? + OR state.room_id = ? + """ # Our postgres db driver converts ? -> %s in SQL strings as that's the # placeholder for postgres. # HOWEVER, if you put a % into your SQL then everything goes wibbly. # To get around this, we're going to surround search_term with %'s # before giving it to the database in python instead - search_term = "%" + search_term.lower() + "%" + search_pattern = [ + "%" + search_term.lower() + "%", + "#%" + search_term.lower() + "%:%", + search_term, + ] # Set ordering if RoomSortOrder(order_by) == RoomSortOrder.SIZE: @@ -519,12 +530,9 @@ class RoomWorkerStore(SQLBaseStore): ) def _get_rooms_paginate_txn(txn): - # Execute the data query - sql_values = (limit, start) - if search_term: - # Add the search term into the WHERE clause - sql_values = (search_term,) + sql_values - txn.execute(info_sql, sql_values) + # Add the search term into the WHERE clause + # and execute the data query + txn.execute(info_sql, search_pattern + [limit, start]) # Refactor room query data into a structured dictionary rooms = [] @@ -551,8 +559,7 @@ class RoomWorkerStore(SQLBaseStore): # Execute the count query # Add the search term into the WHERE clause if present - sql_values = (search_term,) if search_term else () - txn.execute(count_sql, sql_values) + txn.execute(count_sql, search_pattern) room_count = txn.fetchone() return rooms, room_count[0] diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index b62a7248e8..46116644ce 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -680,36 +680,6 @@ class RoomTestCase(unittest.HomeserverTestCase): reversing the order, etc. """ - def _set_canonical_alias(room_id: str, test_alias: str, admin_user_tok: str): - # Create a new alias to this room - url = "/_matrix/client/r0/directory/room/%s" % ( - urllib.parse.quote(test_alias), - ) - channel = self.make_request( - "PUT", - url.encode("ascii"), - {"room_id": room_id}, - access_token=admin_user_tok, - ) - self.assertEqual( - 200, int(channel.result["code"]), msg=channel.result["body"] - ) - - # Set this new alias as the canonical alias for this room - self.helper.send_state( - room_id, - "m.room.aliases", - {"aliases": [test_alias]}, - tok=admin_user_tok, - state_key="test", - ) - self.helper.send_state( - room_id, - "m.room.canonical_alias", - {"alias": test_alias}, - tok=admin_user_tok, - ) - def _order_test( order_type: str, expected_room_list: List[str], @@ -781,9 +751,9 @@ class RoomTestCase(unittest.HomeserverTestCase): ) # Set room canonical room aliases - _set_canonical_alias(room_id_1, "#A_alias:test", self.admin_user_tok) - _set_canonical_alias(room_id_2, "#B_alias:test", self.admin_user_tok) - _set_canonical_alias(room_id_3, "#C_alias:test", self.admin_user_tok) + self._set_canonical_alias(room_id_1, "#A_alias:test", self.admin_user_tok) + self._set_canonical_alias(room_id_2, "#B_alias:test", self.admin_user_tok) + self._set_canonical_alias(room_id_3, "#C_alias:test", self.admin_user_tok) # Set room member size in the reverse order. room 1 -> 1 member, 2 -> 2, 3 -> 3 user_1 = self.register_user("bob1", "pass") @@ -850,7 +820,7 @@ class RoomTestCase(unittest.HomeserverTestCase): room_id_2 = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok) room_name_1 = "something" - room_name_2 = "else" + room_name_2 = "LoremIpsum" # Set the name for each room self.helper.send_state( @@ -866,6 +836,8 @@ class RoomTestCase(unittest.HomeserverTestCase): tok=self.admin_user_tok, ) + self._set_canonical_alias(room_id_1, "#Room_Alias1:test", self.admin_user_tok) + def _search_test( expected_room_id: Optional[str], search_term: str, @@ -914,24 +886,36 @@ class RoomTestCase(unittest.HomeserverTestCase): r = rooms[0] self.assertEqual(expected_room_id, r["room_id"]) - # Perform search tests + # Test searching by room name _search_test(room_id_1, "something") _search_test(room_id_1, "thing") - _search_test(room_id_2, "else") - _search_test(room_id_2, "se") + _search_test(room_id_2, "LoremIpsum") + _search_test(room_id_2, "lorem") # Test case insensitive _search_test(room_id_1, "SOMETHING") _search_test(room_id_1, "THING") - _search_test(room_id_2, "ELSE") - _search_test(room_id_2, "SE") + _search_test(room_id_2, "LOREMIPSUM") + _search_test(room_id_2, "LOREM") _search_test(None, "foo") _search_test(None, "bar") _search_test(None, "", expected_http_code=400) + # Test that the whole room id returns the room + _search_test(room_id_1, room_id_1) + # Test that the search by room_id is case sensitive + _search_test(None, room_id_1.lower()) + # Test search part of local part of room id do not match + _search_test(None, room_id_1[1:10]) + + # Test that whole room alias return no result, because of domain + _search_test(None, "#Room_Alias1:test") + # Test search local part of alias + _search_test(room_id_1, "alias1") + def test_search_term_non_ascii(self): """Test that searching for a room with non-ASCII characters works correctly""" @@ -1114,6 +1098,32 @@ class RoomTestCase(unittest.HomeserverTestCase): # the create_room already does the right thing, so no need to verify that we got # the state events it created. + def _set_canonical_alias(self, room_id: str, test_alias: str, admin_user_tok: str): + # Create a new alias to this room + url = "/_matrix/client/r0/directory/room/%s" % (urllib.parse.quote(test_alias),) + channel = self.make_request( + "PUT", + url.encode("ascii"), + {"room_id": room_id}, + access_token=admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + + # Set this new alias as the canonical alias for this room + self.helper.send_state( + room_id, + "m.room.aliases", + {"aliases": [test_alias]}, + tok=admin_user_tok, + state_key="test", + ) + self.helper.send_state( + room_id, + "m.room.canonical_alias", + {"alias": test_alias}, + tok=admin_user_tok, + ) + class JoinAliasRoomTestCase(unittest.HomeserverTestCase): -- cgit 1.5.1 From c9c3aea9b189cb606d7ec2905dad2c87acc039ef Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Tue, 2 Nov 2021 10:39:02 +0000 Subject: Fix providing a `RoomStreamToken` instance to `_notify_app_services_ephemeral` (#11137) Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/11137.misc | 1 + synapse/handlers/appservice.py | 22 +++++++++++++---- synapse/notifier.py | 38 +++++++----------------------- synapse/storage/databases/main/devices.py | 4 ++-- synapse/storage/databases/main/presence.py | 2 +- 5 files changed, 30 insertions(+), 37 deletions(-) create mode 100644 changelog.d/11137.misc (limited to 'synapse/storage') diff --git a/changelog.d/11137.misc b/changelog.d/11137.misc new file mode 100644 index 0000000000..f0d6476f48 --- /dev/null +++ b/changelog.d/11137.misc @@ -0,0 +1 @@ +Remove and document unnecessary `RoomStreamToken` checks in application service ephemeral event code. \ No newline at end of file diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 36c206dae6..67f8ffcaff 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -182,7 +182,7 @@ class ApplicationServicesHandler: def notify_interested_services_ephemeral( self, stream_key: str, - new_token: Optional[int], + new_token: Union[int, RoomStreamToken], users: Optional[Collection[Union[str, UserID]]] = None, ) -> None: """ @@ -203,7 +203,7 @@ class ApplicationServicesHandler: Appservices will only receive ephemeral events that fall within their registered user and room namespaces. - new_token: The latest stream token. + new_token: The stream token of the event. users: The users that should be informed of the new event, if any. """ if not self.notify_appservices: @@ -212,6 +212,19 @@ class ApplicationServicesHandler: if stream_key not in ("typing_key", "receipt_key", "presence_key"): return + # Assert that new_token is an integer (and not a RoomStreamToken). + # All of the supported streams that this function handles use an + # integer to track progress (rather than a RoomStreamToken - a + # vector clock implementation) as they don't support multiple + # stream writers. + # + # As a result, we simply assert that new_token is an integer. + # If we do end up needing to pass a RoomStreamToken down here + # in the future, using RoomStreamToken.stream (the minimum stream + # position) to convert to an ascending integer value should work. + # Additional context: https://github.com/matrix-org/synapse/pull/11137 + assert isinstance(new_token, int) + services = [ service for service in self.store.get_app_services() @@ -231,14 +244,13 @@ class ApplicationServicesHandler: self, services: List[ApplicationService], stream_key: str, - new_token: Optional[int], + new_token: int, users: Collection[Union[str, UserID]], ) -> None: logger.debug("Checking interested services for %s" % (stream_key)) with Measure(self.clock, "notify_interested_services_ephemeral"): for service in services: - # Only handle typing if we have the latest token - if stream_key == "typing_key" and new_token is not None: + if stream_key == "typing_key": # Note that we don't persist the token (via set_type_stream_id_for_appservice) # for typing_key due to performance reasons and due to their highly # ephemeral nature. diff --git a/synapse/notifier.py b/synapse/notifier.py index 1882fffd2a..60e5409895 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -383,29 +383,6 @@ class Notifier: except Exception: logger.exception("Error notifying application services of event") - def _notify_app_services_ephemeral( - self, - stream_key: str, - new_token: Union[int, RoomStreamToken], - users: Optional[Collection[Union[str, UserID]]] = None, - ) -> None: - """Notify application services of ephemeral event activity. - - Args: - stream_key: The stream the event came from. - new_token: The value of the new stream token. - users: The users that should be informed of the new event, if any. - """ - try: - stream_token = None - if isinstance(new_token, int): - stream_token = new_token - self.appservice_handler.notify_interested_services_ephemeral( - stream_key, stream_token, users or [] - ) - except Exception: - logger.exception("Error notifying application services of event") - def _notify_pusher_pool(self, max_room_stream_token: RoomStreamToken): try: self._pusher_pool.on_new_notifications(max_room_stream_token) @@ -467,12 +444,15 @@ class Notifier: self.notify_replication() - # Notify appservices - self._notify_app_services_ephemeral( - stream_key, - new_token, - users, - ) + # Notify appservices. + try: + self.appservice_handler.notify_interested_services_ephemeral( + stream_key, + new_token, + users, + ) + except Exception: + logger.exception("Error notifying application services of event") def on_new_replication_data(self) -> None: """Used to inform replication listeners that something has happened diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index b15cd030e0..9ccc66e589 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -427,7 +427,7 @@ class DeviceWorkerStore(SQLBaseStore): user_ids: the users who were signed Returns: - THe new stream ID. + The new stream ID. """ async with self._device_list_id_gen.get_next() as stream_id: @@ -1322,7 +1322,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): async def add_device_change_to_streams( self, user_id: str, device_ids: Collection[str], hosts: List[str] - ): + ) -> int: """Persist that a user's devices have been updated, and which hosts (if any) should be poked. """ diff --git a/synapse/storage/databases/main/presence.py b/synapse/storage/databases/main/presence.py index 12cf6995eb..cc0eebdb46 100644 --- a/synapse/storage/databases/main/presence.py +++ b/synapse/storage/databases/main/presence.py @@ -92,7 +92,7 @@ class PresenceStore(PresenceBackgroundUpdateStore): prefilled_cache=presence_cache_prefill, ) - async def update_presence(self, presence_states): + async def update_presence(self, presence_states) -> Tuple[int, int]: assert self._can_persist_presence stream_ordering_manager = self._presence_id_gen.get_next_mult( -- cgit 1.5.1 From 4535532526581834ab798996ffe73f6d19c25123 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 2 Nov 2021 14:18:30 +0100 Subject: Delete messages for hidden devices from `device_inbox` (#11199) --- changelog.d/11199.bugfix | 1 + synapse/storage/databases/main/deviceinbox.py | 89 ++++++++++++++++++++++ .../03remove_hidden_devices_from_device_inbox.sql | 22 ++++++ tests/storage/databases/main/test_deviceinbox.py | 74 ++++++++++++++++++ 4 files changed, 186 insertions(+) create mode 100644 changelog.d/11199.bugfix create mode 100644 synapse/storage/schema/main/delta/65/03remove_hidden_devices_from_device_inbox.sql (limited to 'synapse/storage') diff --git a/changelog.d/11199.bugfix b/changelog.d/11199.bugfix new file mode 100644 index 0000000000..dc3ea8d515 --- /dev/null +++ b/changelog.d/11199.bugfix @@ -0,0 +1 @@ +Delete `to_device` messages for hidden devices that will never be read, reducing database size. \ No newline at end of file diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 25e9c1efe1..264e625bd7 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -561,6 +561,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): class DeviceInboxBackgroundUpdateStore(SQLBaseStore): DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop" REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox" + REMOVE_HIDDEN_DEVICES = "remove_hidden_devices_from_device_inbox" def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"): super().__init__(database, db_conn, hs) @@ -581,6 +582,11 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): self._remove_deleted_devices_from_device_inbox, ) + self.db_pool.updates.register_background_update_handler( + self.REMOVE_HIDDEN_DEVICES, + self._remove_hidden_devices_from_device_inbox, + ) + async def _background_drop_index_device_inbox(self, progress, batch_size): def reindex_txn(conn): txn = conn.cursor() @@ -676,6 +682,89 @@ class DeviceInboxBackgroundUpdateStore(SQLBaseStore): return number_deleted + async def _remove_hidden_devices_from_device_inbox( + self, progress: JsonDict, batch_size: int + ) -> int: + """A background update that deletes all device_inboxes for hidden devices. + + This should only need to be run once (when users upgrade to v1.47.0) + + Args: + progress: JsonDict used to store progress of this background update + batch_size: the maximum number of rows to retrieve in a single select query + + Returns: + The number of deleted rows + """ + + def _remove_hidden_devices_from_device_inbox_txn( + txn: LoggingTransaction, + ) -> int: + """stream_id is not unique + we need to use an inclusive `stream_id >= ?` clause, + since we might not have deleted all hidden device messages for the stream_id + returned from the previous query + + Then delete only rows matching the `(user_id, device_id, stream_id)` tuple, + to avoid problems of deleting a large number of rows all at once + due to a single device having lots of device messages. + """ + + last_stream_id = progress.get("stream_id", 0) + + sql = """ + SELECT device_id, user_id, stream_id + FROM device_inbox + WHERE + stream_id >= ? + AND (device_id, user_id) IN ( + SELECT device_id, user_id FROM devices WHERE hidden = ? + ) + ORDER BY stream_id + LIMIT ? + """ + + txn.execute(sql, (last_stream_id, True, batch_size)) + rows = txn.fetchall() + + num_deleted = 0 + for row in rows: + num_deleted += self.db_pool.simple_delete_txn( + txn, + "device_inbox", + {"device_id": row[0], "user_id": row[1], "stream_id": row[2]}, + ) + + if rows: + # We don't just save the `stream_id` in progress as + # otherwise it can happen in large deployments that + # no change of status is visible in the log file, as + # it may be that the stream_id does not change in several runs + self.db_pool.updates._background_update_progress_txn( + txn, + self.REMOVE_HIDDEN_DEVICES, + { + "device_id": rows[-1][0], + "user_id": rows[-1][1], + "stream_id": rows[-1][2], + }, + ) + + return num_deleted + + number_deleted = await self.db_pool.runInteraction( + "_remove_hidden_devices_from_device_inbox", + _remove_hidden_devices_from_device_inbox_txn, + ) + + # The task is finished when no more lines are deleted. + if not number_deleted: + await self.db_pool.updates._end_background_update( + self.REMOVE_HIDDEN_DEVICES + ) + + return number_deleted + class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore): pass diff --git a/synapse/storage/schema/main/delta/65/03remove_hidden_devices_from_device_inbox.sql b/synapse/storage/schema/main/delta/65/03remove_hidden_devices_from_device_inbox.sql new file mode 100644 index 0000000000..7b3592dcf0 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/03remove_hidden_devices_from_device_inbox.sql @@ -0,0 +1,22 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Remove messages from the device_inbox table which were orphaned +-- because a device was hidden using Synapse earlier than 1.47.0. +-- This runs as background task, but may take a bit to finish. + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6503, 'remove_hidden_devices_from_device_inbox', '{}'); diff --git a/tests/storage/databases/main/test_deviceinbox.py b/tests/storage/databases/main/test_deviceinbox.py index 4cfd2677f7..4b67bd15b7 100644 --- a/tests/storage/databases/main/test_deviceinbox.py +++ b/tests/storage/databases/main/test_deviceinbox.py @@ -88,3 +88,77 @@ class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase): ) self.assertEqual(1, len(res)) self.assertEqual(res[0], "cur_device") + + def test_background_remove_hidden_devices_from_device_inbox(self): + """Test that the background task to delete hidden devices + from device_inboxes works properly.""" + + # create a valid device + self.get_success( + self.store.store_device(self.user_id, "cur_device", "display_name") + ) + + # create a hidden device + self.get_success( + self.store.db_pool.simple_insert( + "devices", + values={ + "user_id": self.user_id, + "device_id": "hidden_device", + "display_name": "hidden_display_name", + "hidden": True, + }, + ) + ) + + # Add device_inbox to devices + self.get_success( + self.store.db_pool.simple_insert( + "device_inbox", + { + "user_id": self.user_id, + "device_id": "cur_device", + "stream_id": 1, + "message_json": "{}", + }, + ) + ) + self.get_success( + self.store.db_pool.simple_insert( + "device_inbox", + { + "user_id": self.user_id, + "device_id": "hidden_device", + "stream_id": 2, + "message_json": "{}", + }, + ) + ) + + # Insert and run the background update. + self.get_success( + self.store.db_pool.simple_insert( + "background_updates", + { + "update_name": "remove_hidden_devices_from_device_inbox", + "progress_json": "{}", + }, + ) + ) + + # ... and tell the DataStore that it hasn't finished all updates yet + self.store.db_pool.updates._all_done = False + + self.wait_for_background_updates() + + # Make sure the background task deleted hidden devices from device_inbox + res = self.get_success( + self.store.db_pool.simple_select_onecol( + table="device_inbox", + keyvalues={}, + retcol="device_id", + desc="get_device_id_from_device_inbox", + ) + ) + self.assertEqual(1, len(res)) + self.assertEqual(res[0], "cur_device") -- cgit 1.5.1 From c01bc5f43d1c7d0a25f397b542ced57894395519 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 2 Nov 2021 09:55:52 -0400 Subject: Add remaining type hints to `synapse.events`. (#11098) --- changelog.d/11098.misc | 1 + mypy.ini | 8 +- synapse/events/__init__.py | 227 +++++++++++++++++---------- synapse/events/validator.py | 2 +- synapse/handlers/federation_event.py | 2 +- synapse/handlers/message.py | 14 +- synapse/handlers/room.py | 2 +- synapse/handlers/room_batch.py | 2 +- synapse/handlers/room_member.py | 4 +- synapse/push/bulk_push_rule_evaluator.py | 4 +- synapse/push/push_rule_evaluator.py | 10 +- synapse/rest/client/room_batch.py | 2 +- synapse/state/__init__.py | 2 +- synapse/storage/databases/main/events.py | 7 +- synapse/storage/databases/main/roommember.py | 8 +- 15 files changed, 185 insertions(+), 110 deletions(-) create mode 100644 changelog.d/11098.misc (limited to 'synapse/storage') diff --git a/changelog.d/11098.misc b/changelog.d/11098.misc new file mode 100644 index 0000000000..1e337bee54 --- /dev/null +++ b/changelog.d/11098.misc @@ -0,0 +1 @@ +Add type hints to `synapse.events`. diff --git a/mypy.ini b/mypy.ini index 119a7d8c91..600402a5d3 100644 --- a/mypy.ini +++ b/mypy.ini @@ -22,13 +22,7 @@ files = synapse/config, synapse/crypto, synapse/event_auth.py, - synapse/events/builder.py, - synapse/events/presence_router.py, - synapse/events/snapshot.py, - synapse/events/spamcheck.py, - synapse/events/third_party_rules.py, - synapse/events/utils.py, - synapse/events/validator.py, + synapse/events, synapse/federation, synapse/groups, synapse/handlers, diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 157669ea88..38f3cf4d33 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -16,8 +16,23 @@ import abc import os -from typing import Dict, Optional, Tuple, Type - +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generic, + Iterable, + List, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, + overload, +) + +from typing_extensions import Literal from unpaddedbase64 import encode_base64 from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions @@ -26,6 +41,9 @@ from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze from synapse.util.stringutils import strtobool +if TYPE_CHECKING: + from synapse.events.builder import EventBuilder + # Whether we should use frozen_dict in FrozenEvent. Using frozen_dicts prevents # bugs where we accidentally share e.g. signature dicts. However, converting a # dict to frozen_dicts is expensive. @@ -37,7 +55,23 @@ from synapse.util.stringutils import strtobool USE_FROZEN_DICTS = strtobool(os.environ.get("SYNAPSE_USE_FROZEN_DICTS", "0")) -class DictProperty: +T = TypeVar("T") + + +# DictProperty (and DefaultDictProperty) require the classes they're used with to +# have a _dict property to pull properties from. +# +# TODO _DictPropertyInstance should not include EventBuilder but due to +# https://github.com/python/mypy/issues/5570 it thinks the DictProperty and +# DefaultDictProperty get applied to EventBuilder when it is in a Union with +# EventBase. This is the least invasive hack to get mypy to comply. +# +# Note that DictProperty/DefaultDictProperty cannot actually be used with +# EventBuilder as it lacks a _dict property. +_DictPropertyInstance = Union["_EventInternalMetadata", "EventBase", "EventBuilder"] + + +class DictProperty(Generic[T]): """An object property which delegates to the `_dict` within its parent object.""" __slots__ = ["key"] @@ -45,12 +79,33 @@ class DictProperty: def __init__(self, key: str): self.key = key - def __get__(self, instance, owner=None): + @overload + def __get__( + self, + instance: Literal[None], + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> "DictProperty": + ... + + @overload + def __get__( + self, + instance: _DictPropertyInstance, + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> T: + ... + + def __get__( + self, + instance: Optional[_DictPropertyInstance], + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> Union[T, "DictProperty"]: # if the property is accessed as a class property rather than an instance # property, return the property itself rather than the value if instance is None: return self try: + assert isinstance(instance, (EventBase, _EventInternalMetadata)) return instance._dict[self.key] except KeyError as e1: # We want this to look like a regular attribute error (mostly so that @@ -65,10 +120,12 @@ class DictProperty: "'%s' has no '%s' property" % (type(instance), self.key) ) from e1.__context__ - def __set__(self, instance, v): + def __set__(self, instance: _DictPropertyInstance, v: T) -> None: + assert isinstance(instance, (EventBase, _EventInternalMetadata)) instance._dict[self.key] = v - def __delete__(self, instance): + def __delete__(self, instance: _DictPropertyInstance) -> None: + assert isinstance(instance, (EventBase, _EventInternalMetadata)) try: del instance._dict[self.key] except KeyError as e1: @@ -77,7 +134,7 @@ class DictProperty: ) from e1.__context__ -class DefaultDictProperty(DictProperty): +class DefaultDictProperty(DictProperty, Generic[T]): """An extension of DictProperty which provides a default if the property is not present in the parent's _dict. @@ -86,13 +143,34 @@ class DefaultDictProperty(DictProperty): __slots__ = ["default"] - def __init__(self, key, default): + def __init__(self, key: str, default: T): super().__init__(key) self.default = default - def __get__(self, instance, owner=None): + @overload + def __get__( + self, + instance: Literal[None], + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> "DefaultDictProperty": + ... + + @overload + def __get__( + self, + instance: _DictPropertyInstance, + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> T: + ... + + def __get__( + self, + instance: Optional[_DictPropertyInstance], + owner: Optional[Type[_DictPropertyInstance]] = None, + ) -> Union[T, "DefaultDictProperty"]: if instance is None: return self + assert isinstance(instance, (EventBase, _EventInternalMetadata)) return instance._dict.get(self.key, self.default) @@ -111,22 +189,22 @@ class _EventInternalMetadata: # in the DAG) self.outlier = False - out_of_band_membership: bool = DictProperty("out_of_band_membership") - send_on_behalf_of: str = DictProperty("send_on_behalf_of") - recheck_redaction: bool = DictProperty("recheck_redaction") - soft_failed: bool = DictProperty("soft_failed") - proactively_send: bool = DictProperty("proactively_send") - redacted: bool = DictProperty("redacted") - txn_id: str = DictProperty("txn_id") - token_id: int = DictProperty("token_id") - historical: bool = DictProperty("historical") + out_of_band_membership: DictProperty[bool] = DictProperty("out_of_band_membership") + send_on_behalf_of: DictProperty[str] = DictProperty("send_on_behalf_of") + recheck_redaction: DictProperty[bool] = DictProperty("recheck_redaction") + soft_failed: DictProperty[bool] = DictProperty("soft_failed") + proactively_send: DictProperty[bool] = DictProperty("proactively_send") + redacted: DictProperty[bool] = DictProperty("redacted") + txn_id: DictProperty[str] = DictProperty("txn_id") + token_id: DictProperty[int] = DictProperty("token_id") + historical: DictProperty[bool] = DictProperty("historical") # XXX: These are set by StreamWorkerStore._set_before_and_after. # I'm pretty sure that these are never persisted to the database, so shouldn't # be here - before: RoomStreamToken = DictProperty("before") - after: RoomStreamToken = DictProperty("after") - order: Tuple[int, int] = DictProperty("order") + before: DictProperty[RoomStreamToken] = DictProperty("before") + after: DictProperty[RoomStreamToken] = DictProperty("after") + order: DictProperty[Tuple[int, int]] = DictProperty("order") def get_dict(self) -> JsonDict: return dict(self._dict) @@ -162,9 +240,6 @@ class _EventInternalMetadata: If the sender of the redaction event is allowed to redact any event due to auth rules, then this will always return false. - - Returns: - bool """ return self._dict.get("recheck_redaction", False) @@ -176,32 +251,23 @@ class _EventInternalMetadata: sent to clients. 2. They should not be added to the forward extremities (and therefore not to current state). - - Returns: - bool """ return self._dict.get("soft_failed", False) - def should_proactively_send(self): + def should_proactively_send(self) -> bool: """Whether the event, if ours, should be sent to other clients and servers. This is used for sending dummy events internally. Servers and clients can still explicitly fetch the event. - - Returns: - bool """ return self._dict.get("proactively_send", True) - def is_redacted(self): + def is_redacted(self) -> bool: """Whether the event has been redacted. This is used for efficiently checking whether an event has been marked as redacted without needing to make another database call. - - Returns: - bool """ return self._dict.get("redacted", False) @@ -241,29 +307,31 @@ class EventBase(metaclass=abc.ABCMeta): self.internal_metadata = _EventInternalMetadata(internal_metadata_dict) - auth_events = DictProperty("auth_events") - depth = DictProperty("depth") - content = DictProperty("content") - hashes = DictProperty("hashes") - origin = DictProperty("origin") - origin_server_ts = DictProperty("origin_server_ts") - prev_events = DictProperty("prev_events") - redacts = DefaultDictProperty("redacts", None) - room_id = DictProperty("room_id") - sender = DictProperty("sender") - state_key = DictProperty("state_key") - type = DictProperty("type") - user_id = DictProperty("sender") + depth: DictProperty[int] = DictProperty("depth") + content: DictProperty[JsonDict] = DictProperty("content") + hashes: DictProperty[Dict[str, str]] = DictProperty("hashes") + origin: DictProperty[str] = DictProperty("origin") + origin_server_ts: DictProperty[int] = DictProperty("origin_server_ts") + redacts: DefaultDictProperty[Optional[str]] = DefaultDictProperty("redacts", None) + room_id: DictProperty[str] = DictProperty("room_id") + sender: DictProperty[str] = DictProperty("sender") + # TODO state_key should be Optional[str], this is generally asserted in Synapse + # by calling is_state() first (which ensures this), but it is hard (not possible?) + # to properly annotate that calling is_state() asserts that state_key exists + # and is non-None. + state_key: DictProperty[str] = DictProperty("state_key") + type: DictProperty[str] = DictProperty("type") + user_id: DictProperty[str] = DictProperty("sender") @property def event_id(self) -> str: raise NotImplementedError() @property - def membership(self): + def membership(self) -> str: return self.content["membership"] - def is_state(self): + def is_state(self) -> bool: return hasattr(self, "state_key") and self.state_key is not None def get_dict(self) -> JsonDict: @@ -272,13 +340,13 @@ class EventBase(metaclass=abc.ABCMeta): return d - def get(self, key, default=None): + def get(self, key: str, default: Optional[Any] = None) -> Any: return self._dict.get(key, default) - def get_internal_metadata_dict(self): + def get_internal_metadata_dict(self) -> JsonDict: return self.internal_metadata.get_dict() - def get_pdu_json(self, time_now=None) -> JsonDict: + def get_pdu_json(self, time_now: Optional[int] = None) -> JsonDict: pdu_json = self.get_dict() if time_now is not None and "age_ts" in pdu_json["unsigned"]: @@ -305,49 +373,46 @@ class EventBase(metaclass=abc.ABCMeta): return template_json - def __set__(self, instance, value): - raise AttributeError("Unrecognized attribute %s" % (instance,)) - - def __getitem__(self, field): + def __getitem__(self, field: str) -> Optional[Any]: return self._dict[field] - def __contains__(self, field): + def __contains__(self, field: str) -> bool: return field in self._dict - def items(self): + def items(self) -> List[Tuple[str, Optional[Any]]]: return list(self._dict.items()) - def keys(self): + def keys(self) -> Iterable[str]: return self._dict.keys() - def prev_event_ids(self): + def prev_event_ids(self) -> Sequence[str]: """Returns the list of prev event IDs. The order matches the order specified in the event, though there is no meaning to it. Returns: - list[str]: The list of event IDs of this event's prev_events + The list of event IDs of this event's prev_events """ - return [e for e, _ in self.prev_events] + return [e for e, _ in self._dict["prev_events"]] - def auth_event_ids(self): + def auth_event_ids(self) -> Sequence[str]: """Returns the list of auth event IDs. The order matches the order specified in the event, though there is no meaning to it. Returns: - list[str]: The list of event IDs of this event's auth_events + The list of event IDs of this event's auth_events """ - return [e for e, _ in self.auth_events] + return [e for e, _ in self._dict["auth_events"]] - def freeze(self): + def freeze(self) -> None: """'Freeze' the event dict, so it cannot be modified by accident""" # this will be a no-op if the event dict is already frozen. self._dict = freeze(self._dict) - def __str__(self): + def __str__(self) -> str: return self.__repr__() - def __repr__(self): + def __repr__(self) -> str: rejection = f"REJECTED={self.rejected_reason}, " if self.rejected_reason else "" return ( @@ -443,7 +508,7 @@ class FrozenEventV2(EventBase): else: frozen_dict = event_dict - self._event_id = None + self._event_id: Optional[str] = None super().__init__( frozen_dict, @@ -455,7 +520,7 @@ class FrozenEventV2(EventBase): ) @property - def event_id(self): + def event_id(self) -> str: # We have to import this here as otherwise we get an import loop which # is hard to break. from synapse.crypto.event_signing import compute_event_reference_hash @@ -465,23 +530,23 @@ class FrozenEventV2(EventBase): self._event_id = "$" + encode_base64(compute_event_reference_hash(self)[1]) return self._event_id - def prev_event_ids(self): + def prev_event_ids(self) -> Sequence[str]: """Returns the list of prev event IDs. The order matches the order specified in the event, though there is no meaning to it. Returns: - list[str]: The list of event IDs of this event's prev_events + The list of event IDs of this event's prev_events """ - return self.prev_events + return self._dict["prev_events"] - def auth_event_ids(self): + def auth_event_ids(self) -> Sequence[str]: """Returns the list of auth event IDs. The order matches the order specified in the event, though there is no meaning to it. Returns: - list[str]: The list of event IDs of this event's auth_events + The list of event IDs of this event's auth_events """ - return self.auth_events + return self._dict["auth_events"] class FrozenEventV3(FrozenEventV2): @@ -490,7 +555,7 @@ class FrozenEventV3(FrozenEventV2): format_version = EventFormatVersions.V3 # All events of this type are V3 @property - def event_id(self): + def event_id(self) -> str: # We have to import this here as otherwise we get an import loop which # is hard to break. from synapse.crypto.event_signing import compute_event_reference_hash @@ -503,12 +568,14 @@ class FrozenEventV3(FrozenEventV2): return self._event_id -def _event_type_from_format_version(format_version: int) -> Type[EventBase]: +def _event_type_from_format_version( + format_version: int, +) -> Type[Union[FrozenEvent, FrozenEventV2, FrozenEventV3]]: """Returns the python type to use to construct an Event object for the given event format version. Args: - format_version (int): The event format version + format_version: The event format version Returns: type: A type that can be initialized as per the initializer of diff --git a/synapse/events/validator.py b/synapse/events/validator.py index 4d459c17f1..cf86934968 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -55,7 +55,7 @@ class EventValidator: ] for k in required: - if not hasattr(event, k): + if k not in event: raise SynapseError(400, "Event does not have key %s" % (k,)) # Check that the following keys have string values diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index e617db4c0d..1a1cd93b1a 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1643,7 +1643,7 @@ class FederationEventHandler: event: the event whose auth_events we want Returns: - all of the events in `event.auth_events`, after deduplication + all of the events listed in `event.auth_events_ids`, after deduplication Raises: AuthError if we were unable to fetch the auth_events for any reason. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4a0fccfcc6..b7bc187169 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1318,6 +1318,8 @@ class EventCreationHandler: # user is actually admin or not). is_admin_redaction = False if event.type == EventTypes.Redaction: + assert event.redacts is not None + original_event = await self.store.get_event( event.redacts, redact_behaviour=EventRedactBehaviour.AS_IS, @@ -1413,6 +1415,8 @@ class EventCreationHandler: ) if event.type == EventTypes.Redaction: + assert event.redacts is not None + original_event = await self.store.get_event( event.redacts, redact_behaviour=EventRedactBehaviour.AS_IS, @@ -1500,11 +1504,13 @@ class EventCreationHandler: next_batch_id = event.content.get( EventContentFields.MSC2716_NEXT_BATCH_ID ) - conflicting_insertion_event_id = ( - await self.store.get_insertion_event_by_batch_id( - event.room_id, next_batch_id + conflicting_insertion_event_id = None + if next_batch_id: + conflicting_insertion_event_id = ( + await self.store.get_insertion_event_by_batch_id( + event.room_id, next_batch_id + ) ) - ) if conflicting_insertion_event_id is not None: # The current insertion event that we're processing is invalid # because an insertion event already exists in the room with the diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 99e9b37344..969eb3b9b0 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -525,7 +525,7 @@ class RoomCreationHandler: ): await self.room_member_handler.update_membership( requester, - UserID.from_string(old_event["state_key"]), + UserID.from_string(old_event.state_key), new_room_id, "ban", ratelimit=False, diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 2f5a3e4d19..0723286383 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -355,7 +355,7 @@ class RoomBatchHandler: for (event, context) in reversed(events_to_persist): await self.event_creation_handler.handle_new_client_event( await self.create_requester_for_user_id_from_app_service( - event["sender"], app_service_requester.app_service + event.sender, app_service_requester.app_service ), event=event, context=context, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 74e6c7eca6..08244b690d 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -1669,7 +1669,9 @@ class RoomMemberMasterHandler(RoomMemberHandler): # # the prev_events consist solely of the previous membership event. prev_event_ids = [previous_membership_event.event_id] - auth_event_ids = previous_membership_event.auth_event_ids() + prev_event_ids + auth_event_ids = ( + list(previous_membership_event.auth_event_ids()) + prev_event_ids + ) event, context = await self.event_creation_handler.create_event( requester, diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 0622a37ae8..009d8e77b0 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -232,6 +232,8 @@ class BulkPushRuleEvaluator: # that user, as they might not be already joined. if event.type == EventTypes.Member and event.state_key == uid: display_name = event.content.get("displayname", None) + if not isinstance(display_name, str): + display_name = None if count_as_unread: # Add an element for the current user if the event needs to be marked as @@ -268,7 +270,7 @@ def _condition_checker( evaluator: PushRuleEvaluatorForEvent, conditions: List[dict], uid: str, - display_name: str, + display_name: Optional[str], cache: Dict[str, bool], ) -> bool: for cond in conditions: diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 7a8dc63976..7f68092ec5 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -18,7 +18,7 @@ import re from typing import Any, Dict, List, Optional, Pattern, Tuple, Union from synapse.events import EventBase -from synapse.types import UserID +from synapse.types import JsonDict, UserID from synapse.util import glob_to_regex, re_word_boundary from synapse.util.caches.lrucache import LruCache @@ -129,7 +129,7 @@ class PushRuleEvaluatorForEvent: self._value_cache = _flatten_dict(event) def matches( - self, condition: Dict[str, Any], user_id: str, display_name: str + self, condition: Dict[str, Any], user_id: str, display_name: Optional[str] ) -> bool: if condition["kind"] == "event_match": return self._event_match(condition, user_id) @@ -172,7 +172,7 @@ class PushRuleEvaluatorForEvent: return _glob_matches(pattern, haystack) - def _contains_display_name(self, display_name: str) -> bool: + def _contains_display_name(self, display_name: Optional[str]) -> bool: if not display_name: return False @@ -222,7 +222,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: def _flatten_dict( - d: Union[EventBase, dict], + d: Union[EventBase, JsonDict], prefix: Optional[List[str]] = None, result: Optional[Dict[str, str]] = None, ) -> Dict[str, str]: @@ -233,7 +233,7 @@ def _flatten_dict( for key, value in d.items(): if isinstance(value, str): result[".".join(prefix + [key])] = value.lower() - elif hasattr(value, "items"): + elif isinstance(value, dict): _flatten_dict(value, prefix=(prefix + [key]), result=result) return result diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 99f8156ad0..ab9a743bba 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -191,7 +191,7 @@ class RoomBatchSendEventRestServlet(RestServlet): depth=inherited_depth, ) - batch_id_to_connect_to = base_insertion_event["content"][ + batch_id_to_connect_to = base_insertion_event.content[ EventContentFields.MSC2716_NEXT_BATCH_ID ] diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 98a0239759..1605411b00 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -247,7 +247,7 @@ class StateHandler: return await self.get_hosts_in_room_at_events(room_id, event_ids) async def get_hosts_in_room_at_events( - self, room_id: str, event_ids: List[str] + self, room_id: str, event_ids: Iterable[str] ) -> Set[str]: """Get the hosts that were in a room at the given event ids diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 8d9086ecf0..596275c23c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -24,6 +24,7 @@ from typing import ( Iterable, List, Optional, + Sequence, Set, Tuple, ) @@ -494,7 +495,7 @@ class PersistEventsStore: event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], event_to_types: Dict[str, Tuple[str, str]], - event_to_auth_chain: Dict[str, List[str]], + event_to_auth_chain: Dict[str, Sequence[str]], ) -> None: """Calculate the chain cover index for the given events. @@ -786,7 +787,7 @@ class PersistEventsStore: event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], event_to_types: Dict[str, Tuple[str, str]], - event_to_auth_chain: Dict[str, List[str]], + event_to_auth_chain: Dict[str, Sequence[str]], events_to_calc_chain_id_for: Set[str], chain_map: Dict[str, Tuple[int, int]], ) -> Dict[str, Tuple[int, int]]: @@ -1794,7 +1795,7 @@ class PersistEventsStore: ) # Insert an edge for every prev_event connection - for prev_event_id in event.prev_events: + for prev_event_id in event.prev_event_ids(): self.db_pool.simple_insert_txn( txn, table="insertion_event_edges", diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 4b288bb2e7..033a9831d6 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -570,7 +570,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): async def get_joined_users_from_context( self, event: EventBase, context: EventContext - ): + ) -> Dict[str, ProfileInfo]: state_group = context.state_group if not state_group: # If state_group is None it means it has yet to be assigned a @@ -584,7 +584,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): event.room_id, state_group, current_state_ids, event=event, context=context ) - async def get_joined_users_from_state(self, room_id, state_entry): + async def get_joined_users_from_state( + self, room_id, state_entry + ) -> Dict[str, ProfileInfo]: state_group = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a @@ -607,7 +609,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): cache_context, event=None, context=None, - ): + ) -> Dict[str, ProfileInfo]: # We don't use `state_group`, it's there so that we can cache based # on it. However, it's important that it's never None, since two current_states # with a state_group of None are likely to be different. -- cgit 1.5.1 From 6250b95efe88385bb3ec2842d5eb76f42ef762ef Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 2 Nov 2021 15:46:48 +0000 Subject: Add index to `local_group_updates.stream_id` (#11231) This should speed up startup times and generally increase performance of groups. --- changelog.d/11231.misc | 1 + scripts/synapse_port_db | 2 ++ synapse/storage/databases/main/group_server.py | 17 ++++++++++++++++- .../schema/main/delta/65/04_local_group_updates.sql | 18 ++++++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 changelog.d/11231.misc create mode 100644 synapse/storage/schema/main/delta/65/04_local_group_updates.sql (limited to 'synapse/storage') diff --git a/changelog.d/11231.misc b/changelog.d/11231.misc new file mode 100644 index 0000000000..c7fca7071e --- /dev/null +++ b/changelog.d/11231.misc @@ -0,0 +1 @@ +Minor speed up to start up times and getting updates for groups by adding missing index to `local_group_updates.stream_id`. diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 349866eb9a..640ff15277 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -43,6 +43,7 @@ from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyBackground from synapse.storage.databases.main.events_bg_updates import ( EventsBackgroundUpdatesStore, ) +from synapse.storage.databases.main.group_server import GroupServerWorkerStore from synapse.storage.databases.main.media_repository import ( MediaRepositoryBackgroundUpdateStore, ) @@ -181,6 +182,7 @@ class Store( StatsStore, PusherWorkerStore, PresenceBackgroundUpdateStore, + GroupServerWorkerStore, ): def execute(self, f, *args, **kwargs): return self.db_pool.runInteraction(f.__name__, f, *args, **kwargs) diff --git a/synapse/storage/databases/main/group_server.py b/synapse/storage/databases/main/group_server.py index e70d3649ff..bb621df0dd 100644 --- a/synapse/storage/databases/main/group_server.py +++ b/synapse/storage/databases/main/group_server.py @@ -13,15 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple from typing_extensions import TypedDict from synapse.api.errors import SynapseError from synapse.storage._base import SQLBaseStore, db_to_json +from synapse.storage.database import DatabasePool +from synapse.storage.types import Connection from synapse.types import JsonDict from synapse.util import json_encoder +if TYPE_CHECKING: + from synapse.server import HomeServer + # The category ID for the "default" category. We don't store as null in the # database to avoid the fun of null != null _DEFAULT_CATEGORY_ID = "" @@ -35,6 +40,16 @@ class _RoomInGroup(TypedDict): class GroupServerWorkerStore(SQLBaseStore): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): + database.updates.register_background_index_update( + update_name="local_group_updates_index", + index_name="local_group_updates_stream_id_index", + table="local_group_updates", + columns=("stream_id",), + unique=True, + ) + super().__init__(database, db_conn, hs) + async def get_group(self, group_id: str) -> Optional[Dict[str, Any]]: return await self.db_pool.simple_select_one( table="groups", diff --git a/synapse/storage/schema/main/delta/65/04_local_group_updates.sql b/synapse/storage/schema/main/delta/65/04_local_group_updates.sql new file mode 100644 index 0000000000..a178abfe12 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/04_local_group_updates.sql @@ -0,0 +1,18 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Check index on `local_group_updates.stream_id`. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6504, 'local_group_updates_index', '{}'); -- cgit 1.5.1 From 8eec25a1d9d656905db18a2c62a5552e63db2667 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Thu, 4 Nov 2021 10:33:53 +0000 Subject: Track ongoing event fetches correctly in the presence of failure (#11240) When an event fetcher aborts due to an exception, `_event_fetch_ongoing` must be decremented, otherwise the event fetcher would never be replaced. If enough event fetchers were to fail, no more events would be fetched and requests would get stuck waiting for events. --- changelog.d/11240.bugfix | 1 + synapse/storage/databases/main/events_worker.py | 56 +++++++++++++++---------- 2 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 changelog.d/11240.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/11240.bugfix b/changelog.d/11240.bugfix new file mode 100644 index 0000000000..94d73f67e3 --- /dev/null +++ b/changelog.d/11240.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where all requests that read events from the database could get stuck as a result of losing the database connection. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index ae37901be9..c6bf316d5b 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -28,6 +28,7 @@ from typing import ( import attr from constantly import NamedConstant, Names +from prometheus_client import Gauge from typing_extensions import Literal from twisted.internet import defer @@ -81,6 +82,12 @@ EVENT_QUEUE_ITERATIONS = 3 # No. times we block waiting for requests for events EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events +event_fetch_ongoing_gauge = Gauge( + "synapse_event_fetch_ongoing", + "The number of event fetchers that are running", +) + + @attr.s(slots=True, auto_attribs=True) class _EventCacheEntry: event: EventBase @@ -222,6 +229,7 @@ class EventsWorkerStore(SQLBaseStore): self._event_fetch_lock = threading.Condition() self._event_fetch_list = [] self._event_fetch_ongoing = 0 + event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) # We define this sequence here so that it can be referenced from both # the DataStore and PersistEventStore. @@ -732,28 +740,31 @@ class EventsWorkerStore(SQLBaseStore): """Takes a database connection and waits for requests for events from the _event_fetch_list queue. """ - i = 0 - while True: - with self._event_fetch_lock: - event_list = self._event_fetch_list - self._event_fetch_list = [] - - if not event_list: - single_threaded = self.database_engine.single_threaded - if ( - not self.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING - or single_threaded - or i > EVENT_QUEUE_ITERATIONS - ): - self._event_fetch_ongoing -= 1 - return - else: - self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) - i += 1 - continue - i = 0 - - self._fetch_event_list(conn, event_list) + try: + i = 0 + while True: + with self._event_fetch_lock: + event_list = self._event_fetch_list + self._event_fetch_list = [] + + if not event_list: + single_threaded = self.database_engine.single_threaded + if ( + not self.USE_DEDICATED_DB_THREADS_FOR_EVENT_FETCHING + or single_threaded + or i > EVENT_QUEUE_ITERATIONS + ): + break + else: + self._event_fetch_lock.wait(EVENT_QUEUE_TIMEOUT_S) + i += 1 + continue + i = 0 + + self._fetch_event_list(conn, event_list) + finally: + self._event_fetch_ongoing -= 1 + event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) def _fetch_event_list( self, conn: Connection, event_list: List[Tuple[List[str], defer.Deferred]] @@ -977,6 +988,7 @@ class EventsWorkerStore(SQLBaseStore): if self._event_fetch_ongoing < EVENT_QUEUE_THREADS: self._event_fetch_ongoing += 1 + event_fetch_ongoing_gauge.set(self._event_fetch_ongoing) should_start = True else: should_start = False -- cgit 1.5.1 From a37df1b091c3cc9c5549243ef02c4f2a9d90bd16 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 5 Nov 2021 11:12:10 +0000 Subject: Fix rolling back when using workers (#11255) Fixes #11252 --- changelog.d/11255.bugfix | 1 + synapse/storage/prepare_database.py | 23 ++++++------ tests/storage/test_rollback_worker.py | 69 +++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 11 deletions(-) create mode 100644 changelog.d/11255.bugfix create mode 100644 tests/storage/test_rollback_worker.py (limited to 'synapse/storage') diff --git a/changelog.d/11255.bugfix b/changelog.d/11255.bugfix new file mode 100644 index 0000000000..ce72592624 --- /dev/null +++ b/changelog.d/11255.bugfix @@ -0,0 +1 @@ +Fix rolling back Synapse version when using workers. diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 1629d2a53c..b5c1c14ee3 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -133,22 +133,23 @@ def prepare_database( # if it's a worker app, refuse to upgrade the database, to avoid multiple # workers doing it at once. - if ( - config.worker.worker_app is not None - and version_info.current_version != SCHEMA_VERSION - ): + if config.worker.worker_app is None: + _upgrade_existing_database( + cur, + version_info, + database_engine, + config, + databases=databases, + ) + elif version_info.current_version < SCHEMA_VERSION: + # If the DB is on an older version than we expect the we refuse + # to start the worker (as the main process needs to run first to + # update the schema). raise UpgradeDatabaseException( OUTDATED_SCHEMA_ON_WORKER_ERROR % (SCHEMA_VERSION, version_info.current_version) ) - _upgrade_existing_database( - cur, - version_info, - database_engine, - config, - databases=databases, - ) else: logger.info("%r: Initialising new database", databases) diff --git a/tests/storage/test_rollback_worker.py b/tests/storage/test_rollback_worker.py new file mode 100644 index 0000000000..a6be9a1bb1 --- /dev/null +++ b/tests/storage/test_rollback_worker.py @@ -0,0 +1,69 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.app.generic_worker import GenericWorkerServer +from synapse.storage.database import LoggingDatabaseConnection +from synapse.storage.prepare_database import PrepareDatabaseException, prepare_database +from synapse.storage.schema import SCHEMA_VERSION + +from tests.unittest import HomeserverTestCase + + +class WorkerSchemaTests(HomeserverTestCase): + def make_homeserver(self, reactor, clock): + hs = self.setup_test_homeserver( + federation_http_client=None, homeserver_to_use=GenericWorkerServer + ) + return hs + + def default_config(self): + conf = super().default_config() + + # Mark this as a worker app. + conf["worker_app"] = "yes" + + return conf + + def test_rolling_back(self): + """Test that workers can start if the DB is a newer schema version""" + + db_pool = self.hs.get_datastore().db_pool + db_conn = LoggingDatabaseConnection( + db_pool._db_pool.connect(), + db_pool.engine, + "tests", + ) + + cur = db_conn.cursor() + cur.execute("UPDATE schema_version SET version = ?", (SCHEMA_VERSION + 1,)) + + db_conn.commit() + + prepare_database(db_conn, db_pool.engine, self.hs.config) + + def test_not_upgraded(self): + """Test that workers don't start if the DB has an older schema version""" + db_pool = self.hs.get_datastore().db_pool + db_conn = LoggingDatabaseConnection( + db_pool._db_pool.connect(), + db_pool.engine, + "tests", + ) + + cur = db_conn.cursor() + cur.execute("UPDATE schema_version SET version = ?", (SCHEMA_VERSION - 1,)) + + db_conn.commit() + + with self.assertRaises(PrepareDatabaseException): + prepare_database(db_conn, db_pool.engine, self.hs.config) -- cgit 1.5.1 From 98c8fc6ce82d9d6b1bd21bf70df6a0e1ce91c1dc Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Nov 2021 09:54:47 +0000 Subject: Handle federation inbound instances being killed more gracefully (#11262) * Make lock better handle process being killed If the process gets killed and restarted (so that it didn't have a chance to drop its locks gracefully) then there may still be locks in the DB that are for the same instance that haven't yet timed out but are safe to delete. We handle this case by a) checking if the current instance already has taken out the lock, and b) if not then ignoring locks that are for the same instance. * Periodically check for old staged events This is to protect against other instances dying and their locks timing out. --- changelog.d/11262.bugfix | 1 + synapse/federation/federation_server.py | 5 +++++ synapse/storage/databases/main/lock.py | 31 +++++++++++++++++++++---------- 3 files changed, 27 insertions(+), 10 deletions(-) create mode 100644 changelog.d/11262.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/11262.bugfix b/changelog.d/11262.bugfix new file mode 100644 index 0000000000..768fbb8973 --- /dev/null +++ b/changelog.d/11262.bugfix @@ -0,0 +1 @@ +Fix a bug where if a remote event is being processed by a worker when it gets killed then it won't get processed on restart. Introduced in v1.37.1. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 42e3acecb4..9a8758e9a6 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -213,6 +213,11 @@ class FederationServer(FederationBase): self._started_handling_of_staged_events = True self._handle_old_staged_events() + # Start a periodic check for old staged events. This is to handle + # the case where locks time out, e.g. if another process gets killed + # without dropping its locks. + self._clock.looping_call(self._handle_old_staged_events, 60 * 1000) + # keep this as early as possible to make the calculated origin ts as # accurate as possible. request_time = self._clock.time_msec() diff --git a/synapse/storage/databases/main/lock.py b/synapse/storage/databases/main/lock.py index 3d1dff660b..3d0df0cbd4 100644 --- a/synapse/storage/databases/main/lock.py +++ b/synapse/storage/databases/main/lock.py @@ -14,6 +14,7 @@ import logging from types import TracebackType from typing import TYPE_CHECKING, Dict, Optional, Tuple, Type +from weakref import WeakValueDictionary from twisted.internet.interfaces import IReactorCore @@ -61,7 +62,7 @@ class LockStore(SQLBaseStore): # A map from `(lock_name, lock_key)` to the token of any locks that we # think we currently hold. - self._live_tokens: Dict[Tuple[str, str], str] = {} + self._live_tokens: Dict[Tuple[str, str], Lock] = WeakValueDictionary() # When we shut down we want to remove the locks. Technically this can # lead to a race, as we may drop the lock while we are still processing. @@ -80,10 +81,10 @@ class LockStore(SQLBaseStore): # We need to take a copy of the tokens dict as dropping the locks will # cause the dictionary to change. - tokens = dict(self._live_tokens) + locks = dict(self._live_tokens) - for (lock_name, lock_key), token in tokens.items(): - await self._drop_lock(lock_name, lock_key, token) + for lock in locks.values(): + await lock.release() logger.info("Dropped locks due to shutdown") @@ -93,6 +94,11 @@ class LockStore(SQLBaseStore): used (otherwise the lock will leak). """ + # Check if this process has taken out a lock and if it's still valid. + lock = self._live_tokens.get((lock_name, lock_key)) + if lock and await lock.is_still_valid(): + return None + now = self._clock.time_msec() token = random_string(6) @@ -100,7 +106,9 @@ class LockStore(SQLBaseStore): def _try_acquire_lock_txn(txn: LoggingTransaction) -> bool: # We take out the lock if either a) there is no row for the lock - # already or b) the existing row has timed out. + # already, b) the existing row has timed out, or c) the row is + # for this instance (which means the process got killed and + # restarted) sql = """ INSERT INTO worker_locks (lock_name, lock_key, instance_name, token, last_renewed_ts) VALUES (?, ?, ?, ?, ?) @@ -112,6 +120,7 @@ class LockStore(SQLBaseStore): last_renewed_ts = EXCLUDED.last_renewed_ts WHERE worker_locks.last_renewed_ts < ? + OR worker_locks.instance_name = EXCLUDED.instance_name """ txn.execute( sql, @@ -148,11 +157,11 @@ class LockStore(SQLBaseStore): WHERE lock_name = ? AND lock_key = ? - AND last_renewed_ts < ? + AND (last_renewed_ts < ? OR instance_name = ?) """ txn.execute( sql, - (lock_name, lock_key, now - _LOCK_TIMEOUT_MS), + (lock_name, lock_key, now - _LOCK_TIMEOUT_MS, self._instance_name), ) inserted = self.db_pool.simple_upsert_txn_emulated( @@ -179,9 +188,7 @@ class LockStore(SQLBaseStore): if not did_lock: return None - self._live_tokens[(lock_name, lock_key)] = token - - return Lock( + lock = Lock( self._reactor, self._clock, self, @@ -190,6 +197,10 @@ class LockStore(SQLBaseStore): token=token, ) + self._live_tokens[(lock_name, lock_key)] = lock + + return lock + async def _is_lock_still_valid( self, lock_name: str, lock_key: str, token: str ) -> bool: -- cgit 1.5.1 From 0c82d4aabee5dc1751d261b8b99623383f29a61d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 8 Nov 2021 09:36:49 -0500 Subject: Fix typo in comment from #11255. (#11276) --- changelog.d/11276.bugfix | 1 + synapse/storage/prepare_database.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/11276.bugfix (limited to 'synapse/storage') diff --git a/changelog.d/11276.bugfix b/changelog.d/11276.bugfix new file mode 100644 index 0000000000..ce72592624 --- /dev/null +++ b/changelog.d/11276.bugfix @@ -0,0 +1 @@ +Fix rolling back Synapse version when using workers. diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index b5c1c14ee3..8b9c6adae2 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -142,7 +142,7 @@ def prepare_database( databases=databases, ) elif version_info.current_version < SCHEMA_VERSION: - # If the DB is on an older version than we expect the we refuse + # If the DB is on an older version than we expect then we refuse # to start the worker (as the main process needs to run first to # update the schema). raise UpgradeDatabaseException( -- cgit 1.5.1 From 4ee71b96377c39a2b9d060c6aafbce62fb16ccc6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Nov 2021 16:08:02 +0000 Subject: Add some background update admin APIs (#11263) Fixes #11259 --- changelog.d/11263.feature | 1 + docs/SUMMARY.md | 1 + .../administration/admin_api/background_updates.md | 84 ++++++++ synapse/rest/admin/__init__.py | 6 + synapse/rest/admin/background_updates.py | 107 ++++++++++ synapse/storage/background_updates.py | 65 ++++-- synapse/storage/database.py | 4 + tests/rest/admin/test_background_updates.py | 218 +++++++++++++++++++++ 8 files changed, 468 insertions(+), 18 deletions(-) create mode 100644 changelog.d/11263.feature create mode 100644 docs/usage/administration/admin_api/background_updates.md create mode 100644 synapse/rest/admin/background_updates.py create mode 100644 tests/rest/admin/test_background_updates.py (limited to 'synapse/storage') diff --git a/changelog.d/11263.feature b/changelog.d/11263.feature new file mode 100644 index 0000000000..831e76ec9f --- /dev/null +++ b/changelog.d/11263.feature @@ -0,0 +1 @@ +Add some background update admin APIs. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 35412ea92c..04320ab07b 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -51,6 +51,7 @@ - [Administration](usage/administration/README.md) - [Admin API](usage/administration/admin_api/README.md) - [Account Validity](admin_api/account_validity.md) + - [Background Updates](usage/administration/admin_api/background_updates.md) - [Delete Group](admin_api/delete_group.md) - [Event Reports](admin_api/event_reports.md) - [Media](admin_api/media_admin_api.md) diff --git a/docs/usage/administration/admin_api/background_updates.md b/docs/usage/administration/admin_api/background_updates.md new file mode 100644 index 0000000000..b36d7fe398 --- /dev/null +++ b/docs/usage/administration/admin_api/background_updates.md @@ -0,0 +1,84 @@ +# Background Updates API + +This API allows a server administrator to manage the background updates being +run against the database. + +## Status + +This API gets the current status of the background updates. + + +The API is: + +``` +GET /_synapse/admin/v1/background_updates/status +``` + +Returning: + +```json +{ + "enabled": true, + "current_updates": { + "": { + "name": "", + "total_item_count": 50, + "total_duration_ms": 10000.0, + "average_items_per_ms": 2.2, + }, + } +} +``` + +`enabled` whether the background updates are enabled or disabled. + +`db_name` the database name (usually Synapse is configured with a single database named 'master'). + +For each update: + +`name` the name of the update. +`total_item_count` total number of "items" processed (the meaning of 'items' depends on the update in question). +`total_duration_ms` how long the background process has been running, not including time spent sleeping. +`average_items_per_ms` how many items are processed per millisecond based on an exponential average. + + + +## Enabled + +This API allow pausing background updates. + +Background updates should *not* be paused for significant periods of time, as +this can affect the performance of Synapse. + +*Note*: This won't persist over restarts. + +*Note*: This won't cancel any update query that is currently running. This is +usually fine since most queries are short lived, except for `CREATE INDEX` +background updates which won't be cancelled once started. + + +The API is: + +``` +POST /_synapse/admin/v1/background_updates/enabled +``` + +with the following body: + +```json +{ + "enabled": false +} +``` + +`enabled` sets whether the background updates are enabled or disabled. + +The API returns the `enabled` param. + +```json +{ + "enabled": false +} +``` + +There is also a `GET` version which returns the `enabled` state. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 70514e814f..81e98f81d6 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -25,6 +25,10 @@ from synapse.http.server import HttpServer, JsonResource from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin +from synapse.rest.admin.background_updates import ( + BackgroundUpdateEnabledRestServlet, + BackgroundUpdateRestServlet, +) from synapse.rest.admin.devices import ( DeleteDevicesRestServlet, DeviceRestServlet, @@ -247,6 +251,8 @@ def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: # Some servlets only get registered for the main process. if hs.config.worker.worker_app is None: SendServerNoticeServlet(hs).register(http_server) + BackgroundUpdateEnabledRestServlet(hs).register(http_server) + BackgroundUpdateRestServlet(hs).register(http_server) def register_servlets_for_client_rest_resource( diff --git a/synapse/rest/admin/background_updates.py b/synapse/rest/admin/background_updates.py new file mode 100644 index 0000000000..0d0183bf20 --- /dev/null +++ b/synapse/rest/admin/background_updates.py @@ -0,0 +1,107 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +from typing import TYPE_CHECKING, Tuple + +from synapse.api.errors import SynapseError +from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import admin_patterns, assert_user_is_admin +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class BackgroundUpdateEnabledRestServlet(RestServlet): + """Allows temporarily disabling background updates""" + + PATTERNS = admin_patterns("/background_updates/enabled") + + def __init__(self, hs: "HomeServer"): + self.group_server = hs.get_groups_server_handler() + self.is_mine_id = hs.is_mine_id + self.auth = hs.get_auth() + + self.data_stores = hs.get_datastores() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) + + # We need to check that all configured databases have updates enabled. + # (They *should* all be in sync.) + enabled = all(db.updates.enabled for db in self.data_stores.databases) + + return 200, {"enabled": enabled} + + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) + + body = parse_json_object_from_request(request) + + enabled = body.get("enabled", True) + + if not isinstance(enabled, bool): + raise SynapseError(400, "'enabled' parameter must be a boolean") + + for db in self.data_stores.databases: + db.updates.enabled = enabled + + # If we're re-enabling them ensure that we start the background + # process again. + if enabled: + db.updates.start_doing_background_updates() + + return 200, {"enabled": enabled} + + +class BackgroundUpdateRestServlet(RestServlet): + """Fetch information about background updates""" + + PATTERNS = admin_patterns("/background_updates/status") + + def __init__(self, hs: "HomeServer"): + self.group_server = hs.get_groups_server_handler() + self.is_mine_id = hs.is_mine_id + self.auth = hs.get_auth() + + self.data_stores = hs.get_datastores() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request) + await assert_user_is_admin(self.auth, requester.user) + + # We need to check that all configured databases have updates enabled. + # (They *should* all be in sync.) + enabled = all(db.updates.enabled for db in self.data_stores.databases) + + current_updates = {} + + for db in self.data_stores.databases: + update = db.updates.get_current_update() + if not update: + continue + + current_updates[db.name()] = { + "name": update.name, + "total_item_count": update.total_item_count, + "total_duration_ms": update.total_duration_ms, + "average_items_per_ms": update.average_items_per_ms(), + } + + return 200, {"enabled": enabled, "current_updates": current_updates} diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 82b31d24f1..b9a8ca997e 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -100,29 +100,58 @@ class BackgroundUpdater: ] = {} self._all_done = False + # Whether we're currently running updates + self._running = False + + # Whether background updates are enabled. This allows us to + # enable/disable background updates via the admin API. + self.enabled = True + + def get_current_update(self) -> Optional[BackgroundUpdatePerformance]: + """Returns the current background update, if any.""" + + update_name = self._current_background_update + if not update_name: + return None + + perf = self._background_update_performance.get(update_name) + if not perf: + perf = BackgroundUpdatePerformance(update_name) + + return perf + def start_doing_background_updates(self) -> None: - run_as_background_process("background_updates", self.run_background_updates) + if self.enabled: + run_as_background_process("background_updates", self.run_background_updates) async def run_background_updates(self, sleep: bool = True) -> None: - logger.info("Starting background schema updates") - while True: - if sleep: - await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.0) + if self._running or not self.enabled: + return - try: - result = await self.do_next_background_update( - self.BACKGROUND_UPDATE_DURATION_MS - ) - except Exception: - logger.exception("Error doing update") - else: - if result: - logger.info( - "No more background updates to do." - " Unscheduling background update task." + self._running = True + + try: + logger.info("Starting background schema updates") + while self.enabled: + if sleep: + await self._clock.sleep(self.BACKGROUND_UPDATE_INTERVAL_MS / 1000.0) + + try: + result = await self.do_next_background_update( + self.BACKGROUND_UPDATE_DURATION_MS ) - self._all_done = True - return None + except Exception: + logger.exception("Error doing update") + else: + if result: + logger.info( + "No more background updates to do." + " Unscheduling background update task." + ) + self._all_done = True + return None + finally: + self._running = False async def has_completed_background_updates(self) -> bool: """Check if all the background updates have completed diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 5c71e27518..d4cab69ebf 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -446,6 +446,10 @@ class DatabasePool: self._check_safe_to_upsert, ) + def name(self) -> str: + "Return the name of this database" + return self._database_config.name + def is_running(self) -> bool: """Is the database pool currently running""" return self._db_pool.running diff --git a/tests/rest/admin/test_background_updates.py b/tests/rest/admin/test_background_updates.py new file mode 100644 index 0000000000..78c48db552 --- /dev/null +++ b/tests/rest/admin/test_background_updates.py @@ -0,0 +1,218 @@ +# Copyright 2021 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import synapse.rest.admin +from synapse.rest.client import login +from synapse.server import HomeServer + +from tests import unittest + + +class BackgroundUpdatesTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs: HomeServer): + self.store = hs.get_datastore() + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + def _register_bg_update(self): + "Adds a bg update but doesn't start it" + + async def _fake_update(progress, batch_size) -> int: + await self.clock.sleep(0.2) + return batch_size + + self.store.db_pool.updates.register_background_update_handler( + "test_update", + _fake_update, + ) + + self.get_success( + self.store.db_pool.simple_insert( + table="background_updates", + values={ + "update_name": "test_update", + "progress_json": "{}", + }, + ) + ) + + def test_status_empty(self): + """Test the status API works.""" + + channel = self.make_request( + "GET", + "/_synapse/admin/v1/background_updates/status", + access_token=self.admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + + # Background updates should be enabled, but none should be running. + self.assertDictEqual( + channel.json_body, {"current_updates": {}, "enabled": True} + ) + + def test_status_bg_update(self): + """Test the status API works with a background update.""" + + # Create a new background update + + self._register_bg_update() + + self.store.db_pool.updates.start_doing_background_updates() + self.reactor.pump([1.0, 1.0]) + + channel = self.make_request( + "GET", + "/_synapse/admin/v1/background_updates/status", + access_token=self.admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + + # Background updates should be enabled, and one should be running. + self.assertDictEqual( + channel.json_body, + { + "current_updates": { + "master": { + "name": "test_update", + "average_items_per_ms": 0.1, + "total_duration_ms": 1000.0, + "total_item_count": 100, + } + }, + "enabled": True, + }, + ) + + def test_enabled(self): + """Test the enabled API works.""" + + # Create a new background update + + self._register_bg_update() + self.store.db_pool.updates.start_doing_background_updates() + + # Test that GET works and returns enabled is True. + channel = self.make_request( + "GET", + "/_synapse/admin/v1/background_updates/enabled", + access_token=self.admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertDictEqual(channel.json_body, {"enabled": True}) + + # Disable the BG updates + channel = self.make_request( + "POST", + "/_synapse/admin/v1/background_updates/enabled", + content={"enabled": False}, + access_token=self.admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertDictEqual(channel.json_body, {"enabled": False}) + + # Advance a bit and get the current status, note this will finish the in + # flight background update so we call it the status API twice and check + # there was no change. + self.reactor.pump([1.0, 1.0]) + + channel = self.make_request( + "GET", + "/_synapse/admin/v1/background_updates/status", + access_token=self.admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertDictEqual( + channel.json_body, + { + "current_updates": { + "master": { + "name": "test_update", + "average_items_per_ms": 0.1, + "total_duration_ms": 1000.0, + "total_item_count": 100, + } + }, + "enabled": False, + }, + ) + + # Run the reactor for a bit so the BG updates would have a chance to run + # if they were to. + self.reactor.pump([1.0, 1.0]) + + channel = self.make_request( + "GET", + "/_synapse/admin/v1/background_updates/status", + access_token=self.admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + + # There should be no change from the previous /status response. + self.assertDictEqual( + channel.json_body, + { + "current_updates": { + "master": { + "name": "test_update", + "average_items_per_ms": 0.1, + "total_duration_ms": 1000.0, + "total_item_count": 100, + } + }, + "enabled": False, + }, + ) + + # Re-enable the background updates. + + channel = self.make_request( + "POST", + "/_synapse/admin/v1/background_updates/enabled", + content={"enabled": True}, + access_token=self.admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + + self.assertDictEqual(channel.json_body, {"enabled": True}) + + self.reactor.pump([1.0, 1.0]) + + channel = self.make_request( + "GET", + "/_synapse/admin/v1/background_updates/status", + access_token=self.admin_user_tok, + ) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + + # Background updates should be enabled and making progress. + self.assertDictEqual( + channel.json_body, + { + "current_updates": { + "master": { + "name": "test_update", + "average_items_per_ms": 0.1, + "total_duration_ms": 2000.0, + "total_item_count": 200, + } + }, + "enabled": True, + }, + ) -- cgit 1.5.1 From 84f235aea47e2d2f9875f7334d8497660320f55e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 8 Nov 2021 21:21:10 -0600 Subject: Rename to more clear `get_insertion_event_id_by_batch_id` (MSC2716) (#11244) `get_insertion_event_by_batch_id` -> `get_insertion_event_id_by_batch_id` Split out from https://github.com/matrix-org/synapse/pull/11114 --- changelog.d/11244.misc | 1 + synapse/handlers/message.py | 2 +- synapse/rest/client/room_batch.py | 2 +- synapse/storage/databases/main/room_batch.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/11244.misc (limited to 'synapse/storage') diff --git a/changelog.d/11244.misc b/changelog.d/11244.misc new file mode 100644 index 0000000000..c6e65df97f --- /dev/null +++ b/changelog.d/11244.misc @@ -0,0 +1 @@ +Fix [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) historical messages backfilling in random order on remote homeservers. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index b7bc187169..d4c2a6ab7a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1507,7 +1507,7 @@ class EventCreationHandler: conflicting_insertion_event_id = None if next_batch_id: conflicting_insertion_event_id = ( - await self.store.get_insertion_event_by_batch_id( + await self.store.get_insertion_event_id_by_batch_id( event.room_id, next_batch_id ) ) diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 46f033eee2..e4c9451ae0 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -112,7 +112,7 @@ class RoomBatchSendEventRestServlet(RestServlet): # and have the batch connected. if batch_id_from_query: corresponding_insertion_event_id = ( - await self.store.get_insertion_event_by_batch_id( + await self.store.get_insertion_event_id_by_batch_id( room_id, batch_id_from_query ) ) diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index dcbce8fdcf..97b2618437 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -18,7 +18,7 @@ from synapse.storage._base import SQLBaseStore class RoomBatchStore(SQLBaseStore): - async def get_insertion_event_by_batch_id( + async def get_insertion_event_id_by_batch_id( self, room_id: str, batch_id: str ) -> Optional[str]: """Retrieve a insertion event ID. -- cgit 1.5.1