summary refs log tree commit diff
path: root/synapse
diff options
context:
space:
mode:
Diffstat (limited to 'synapse')
-rw-r--r--synapse/_scripts/update_synapse_database.py1
-rw-r--r--synapse/api/auth/internal.py2
-rw-r--r--synapse/api/auth/msc3861_delegated.py2
-rw-r--r--synapse/api/presence.py43
-rw-r--r--synapse/app/_base.py12
-rw-r--r--synapse/app/admin_cmd.py14
-rw-r--r--synapse/app/generic_worker.py2
-rw-r--r--synapse/appservice/api.py32
-rw-r--r--synapse/appservice/scheduler.py13
-rw-r--r--synapse/config/_base.py10
-rw-r--r--synapse/config/cas.py3
-rw-r--r--synapse/config/oembed.py2
-rw-r--r--synapse/config/server.py11
-rw-r--r--synapse/crypto/keyring.py35
-rw-r--r--synapse/events/__init__.py5
-rw-r--r--synapse/events/builder.py8
-rw-r--r--synapse/events/snapshot.py2
-rw-r--r--synapse/events/validator.py6
-rw-r--r--synapse/handlers/account.py2
-rw-r--r--synapse/handlers/admin.py65
-rw-r--r--synapse/handlers/cas.py2
-rw-r--r--synapse/handlers/device.py92
-rw-r--r--synapse/handlers/federation_event.py8
-rw-r--r--synapse/handlers/initial_sync.py8
-rw-r--r--synapse/handlers/message.py6
-rw-r--r--synapse/handlers/pagination.py456
-rw-r--r--synapse/handlers/presence.py300
-rw-r--r--synapse/handlers/receipts.py26
-rw-r--r--synapse/handlers/room.py177
-rw-r--r--synapse/handlers/room_member.py30
-rw-r--r--synapse/handlers/send_email.py14
-rw-r--r--synapse/handlers/sync.py54
-rw-r--r--synapse/http/client.py5
-rw-r--r--synapse/http/federation/matrix_federation_agent.py29
-rw-r--r--synapse/http/servlet.py33
-rw-r--r--synapse/logging/context.py4
-rw-r--r--synapse/logging/opentracing.py10
-rw-r--r--synapse/media/url_previewer.py4
-rw-r--r--synapse/metrics/__init__.py8
-rw-r--r--synapse/metrics/background_process_metrics.py28
-rw-r--r--synapse/module_api/__init__.py28
-rw-r--r--synapse/module_api/callbacks/third_party_event_rules_callbacks.py11
-rw-r--r--synapse/notifier.py6
-rw-r--r--synapse/push/mailer.py33
-rw-r--r--synapse/replication/http/devices.py4
-rw-r--r--synapse/replication/tcp/handler.py8
-rw-r--r--synapse/replication/tcp/resource.py7
-rw-r--r--synapse/rest/__init__.py2
-rw-r--r--synapse/rest/admin/__init__.py20
-rw-r--r--synapse/rest/admin/rooms.py78
-rw-r--r--synapse/rest/admin/users.py14
-rw-r--r--synapse/rest/client/_base.py4
-rw-r--r--synapse/rest/client/account.py116
-rw-r--r--synapse/rest/client/account_data.py10
-rw-r--r--synapse/rest/client/notifications.py2
-rw-r--r--synapse/rest/client/read_marker.py2
-rw-r--r--synapse/rest/client/receipts.py2
-rw-r--r--synapse/rest/consent/consent_resource.py2
-rw-r--r--synapse/rest/synapse/client/unsubscribe.py17
-rw-r--r--synapse/server_notices/consent_server_notices.py6
-rw-r--r--synapse/state/__init__.py13
-rw-r--r--synapse/state/v1.py5
-rw-r--r--synapse/state/v2.py7
-rw-r--r--synapse/storage/background_updates.py2
-rw-r--r--synapse/storage/controllers/persist_events.py17
-rw-r--r--synapse/storage/database.py8
-rw-r--r--synapse/storage/databases/main/__init__.py7
-rw-r--r--synapse/storage/databases/main/account_data.py14
-rw-r--r--synapse/storage/databases/main/client_ips.py11
-rw-r--r--synapse/storage/databases/main/deviceinbox.py28
-rw-r--r--synapse/storage/databases/main/devices.py34
-rw-r--r--synapse/storage/databases/main/event_federation.py6
-rw-r--r--synapse/storage/databases/main/event_push_actions.py72
-rw-r--r--synapse/storage/databases/main/events.py2
-rw-r--r--synapse/storage/databases/main/experimental_features.py7
-rw-r--r--synapse/storage/databases/main/keys.py236
-rw-r--r--synapse/storage/databases/main/purge_events.py4
-rw-r--r--synapse/storage/databases/main/receipts.py29
-rw-r--r--synapse/storage/databases/main/registration.py96
-rw-r--r--synapse/storage/databases/main/stats.py1
-rw-r--r--synapse/storage/databases/main/tags.py6
-rw-r--r--synapse/storage/databases/main/task_scheduler.py6
-rw-r--r--synapse/storage/engines/_base.py6
-rw-r--r--synapse/storage/engines/postgres.py4
-rw-r--r--synapse/storage/engines/sqlite.py4
-rw-r--r--synapse/storage/schema/__init__.py6
-rw-r--r--synapse/storage/schema/main/delta/48/group_unique_indexes.py4
-rw-r--r--synapse/storage/schema/main/delta/82/02_scheduled_tasks_index.sql16
-rw-r--r--synapse/types/__init__.py10
-rw-r--r--synapse/util/async_helpers.py25
-rw-r--r--synapse/util/caches/dictionary_cache.py10
-rw-r--r--synapse/util/caches/expiringcache.py24
-rw-r--r--synapse/util/caches/ttlcache.py10
-rw-r--r--synapse/util/gai_resolver.py2
-rw-r--r--synapse/util/task_scheduler.py158
-rw-r--r--synapse/visibility.py6
96 files changed, 1620 insertions, 1212 deletions
diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py

index f97aecf8d5..992ae43881 100644 --- a/synapse/_scripts/update_synapse_database.py +++ b/synapse/_scripts/update_synapse_database.py
@@ -1,4 +1,3 @@ -#!/usr/bin/env python # Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/synapse/api/auth/internal.py b/synapse/api/auth/internal.py
index 6a5fd44ec0..a75f6f2cc4 100644 --- a/synapse/api/auth/internal.py +++ b/synapse/api/auth/internal.py
@@ -268,7 +268,7 @@ class InternalAuth(BaseAuth): stored_user = await self.store.get_user_by_id(user_id) if not stored_user: raise InvalidClientTokenError("Unknown user_id %s" % user_id) - if not stored_user["is_guest"]: + if not stored_user.is_guest: raise InvalidClientTokenError( "Guest access token used for regular user" ) diff --git a/synapse/api/auth/msc3861_delegated.py b/synapse/api/auth/msc3861_delegated.py
index ef5d3f9b81..31bb035cc8 100644 --- a/synapse/api/auth/msc3861_delegated.py +++ b/synapse/api/auth/msc3861_delegated.py
@@ -300,7 +300,7 @@ class MSC3861DelegatedAuth(BaseAuth): user_id = UserID(username, self._hostname) # First try to find a user from the username claim - user_info = await self.store.get_userinfo_by_id(user_id=user_id.to_string()) + user_info = await self.store.get_user_by_id(user_id=user_id.to_string()) if user_info is None: # If the user does not exist, we should create it on the fly # TODO: we could use SCIM to provision users ahead of time and listen diff --git a/synapse/api/presence.py b/synapse/api/presence.py
index b80aa83cb3..b78f419994 100644 --- a/synapse/api/presence.py +++ b/synapse/api/presence.py
@@ -20,18 +20,53 @@ from synapse.api.constants import PresenceState from synapse.types import JsonDict +@attr.s(slots=True, auto_attribs=True) +class UserDevicePresenceState: + """ + Represents the current presence state of a user's device. + + user_id: The user ID. + device_id: The user's device ID. + state: The presence state, see PresenceState. + last_active_ts: Time in msec that the device last interacted with server. + last_sync_ts: Time in msec that the device last *completed* a sync + (or event stream). + """ + + user_id: str + device_id: Optional[str] + state: str + last_active_ts: int + last_sync_ts: int + + @classmethod + def default( + cls, user_id: str, device_id: Optional[str] + ) -> "UserDevicePresenceState": + """Returns a default presence state.""" + return cls( + user_id=user_id, + device_id=device_id, + state=PresenceState.OFFLINE, + last_active_ts=0, + last_sync_ts=0, + ) + + @attr.s(slots=True, frozen=True, auto_attribs=True) class UserPresenceState: """Represents the current presence state of the user. - user_id - last_active: Time in msec that the user last interacted with server. - last_federation_update: Time in msec since either a) we sent a presence + user_id: The user ID. + state: The presence state, see PresenceState. + last_active_ts: Time in msec that the user last interacted with server. + last_federation_update_ts: Time in msec since either a) we sent a presence update to other servers or b) we received a presence update, depending on if is a local user or not. - last_user_sync: Time in msec that the user last *completed* a sync + last_user_sync_ts: Time in msec that the user last *completed* a sync (or event stream). status_msg: User set status message. + currently_active: True if the user is currently syncing. """ user_id: str diff --git a/synapse/app/_base.py b/synapse/app/_base.py
index a94b57a671..9ac7e4313e 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py
@@ -27,9 +27,7 @@ from typing import ( Any, Awaitable, Callable, - Collection, Dict, - Iterable, List, NoReturn, Optional, @@ -76,7 +74,7 @@ from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_ from synapse.module_api.callbacks.third_party_event_rules_callbacks import ( load_legacy_third_party_event_rules, ) -from synapse.types import ISynapseReactor +from synapse.types import ISynapseReactor, StrCollection from synapse.util import SYNAPSE_VERSION from synapse.util.caches.lrucache import setup_expire_lru_cache_entries from synapse.util.daemonize import daemonize_process @@ -278,7 +276,7 @@ def register_start( reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper())) -def listen_metrics(bind_addresses: Iterable[str], port: int) -> None: +def listen_metrics(bind_addresses: StrCollection, port: int) -> None: """ Start Prometheus metrics server. """ @@ -315,7 +313,7 @@ def _set_prometheus_client_use_created_metrics(new_value: bool) -> None: def listen_manhole( - bind_addresses: Collection[str], + bind_addresses: StrCollection, port: int, manhole_settings: ManholeConfig, manhole_globals: dict, @@ -339,7 +337,7 @@ def listen_manhole( def listen_tcp( - bind_addresses: Collection[str], + bind_addresses: StrCollection, port: int, factory: ServerFactory, reactor: IReactorTCP = reactor, @@ -448,7 +446,7 @@ def listen_http( def listen_ssl( - bind_addresses: Collection[str], + bind_addresses: StrCollection, port: int, factory: ServerFactory, context_factory: IOpenSSLContextFactory, diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py
index f9aada269a..aa24f7da6c 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py
@@ -17,7 +17,7 @@ import logging import os import sys import tempfile -from typing import List, Mapping, Optional +from typing import List, Mapping, Optional, Sequence from twisted.internet import defer, task @@ -57,7 +57,7 @@ from synapse.storage.databases.main.state import StateGroupWorkerStore from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.storage.databases.main.tags import TagsWorkerStore from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore -from synapse.types import JsonDict, StateMap +from synapse.types import JsonMapping, StateMap from synapse.util import SYNAPSE_VERSION from synapse.util.logcontext import LoggingContext @@ -198,7 +198,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): for event in state.values(): json.dump(event, fp=f) - def write_profile(self, profile: JsonDict) -> None: + def write_profile(self, profile: JsonMapping) -> None: user_directory = os.path.join(self.base_directory, "user_data") os.makedirs(user_directory, exist_ok=True) profile_file = os.path.join(user_directory, "profile") @@ -206,7 +206,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(profile_file, "a") as f: json.dump(profile, fp=f) - def write_devices(self, devices: List[JsonDict]) -> None: + def write_devices(self, devices: Sequence[JsonMapping]) -> None: user_directory = os.path.join(self.base_directory, "user_data") os.makedirs(user_directory, exist_ok=True) device_file = os.path.join(user_directory, "devices") @@ -215,7 +215,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(device_file, "a") as f: json.dump(device, fp=f) - def write_connections(self, connections: List[JsonDict]) -> None: + def write_connections(self, connections: Sequence[JsonMapping]) -> None: user_directory = os.path.join(self.base_directory, "user_data") os.makedirs(user_directory, exist_ok=True) connection_file = os.path.join(user_directory, "connections") @@ -225,7 +225,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): json.dump(connection, fp=f) def write_account_data( - self, file_name: str, account_data: Mapping[str, JsonDict] + self, file_name: str, account_data: Mapping[str, JsonMapping] ) -> None: account_data_directory = os.path.join( self.base_directory, "user_data", "account_data" @@ -237,7 +237,7 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(account_data_file, "a") as f: json.dump(account_data, fp=f) - def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None: + def write_media_id(self, media_id: str, media_metadata: JsonMapping) -> None: file_directory = os.path.join(self.base_directory, "media_ids") os.makedirs(file_directory, exist_ok=True) media_id_file = os.path.join(file_directory, media_id) diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index d25e3548e0..f7c80eee21 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py
@@ -77,6 +77,7 @@ from synapse.storage.databases.main.monthly_active_users import ( ) from synapse.storage.databases.main.presence import PresenceStore from synapse.storage.databases.main.profile import ProfileWorkerStore +from synapse.storage.databases.main.purge_events import PurgeEventsStore from synapse.storage.databases.main.push_rule import PushRulesWorkerStore from synapse.storage.databases.main.pusher import PusherWorkerStore from synapse.storage.databases.main.receipts import ReceiptsWorkerStore @@ -134,6 +135,7 @@ class GenericWorkerStore( RelationsWorkerStore, EventFederationWorkerStore, EventPushActionsWorkerStore, + PurgeEventsStore, StateGroupWorkerStore, SignatureWorkerStore, UserErasureWorkerStore, diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py
index de7a94bf26..b1523be208 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py
@@ -40,6 +40,7 @@ from synapse.appservice import ( from synapse.events import EventBase from synapse.events.utils import SerializeEventConfig, serialize_event from synapse.http.client import SimpleHttpClient, is_unknown_endpoint +from synapse.logging import opentracing from synapse.types import DeviceListUpdates, JsonDict, ThirdPartyInstanceID from synapse.util.caches.response_cache import ResponseCache @@ -125,6 +126,17 @@ class ApplicationServiceApi(SimpleHttpClient): hs.get_clock(), "as_protocol_meta", timeout_ms=HOUR_IN_MS ) + def _get_headers(self, service: "ApplicationService") -> Dict[bytes, List[bytes]]: + """This makes sure we have always the auth header and opentracing headers set.""" + + # This is also ensured before in the functions. However this is needed to please + # the typechecks. + assert service.hs_token is not None + + headers = {b"Authorization": [b"Bearer " + service.hs_token.encode("ascii")]} + opentracing.inject_header_dict(headers, check_destination=False) + return headers + async def query_user(self, service: "ApplicationService", user_id: str) -> bool: if service.url is None: return False @@ -136,10 +148,11 @@ class ApplicationServiceApi(SimpleHttpClient): args = None if self.config.use_appservice_legacy_authorization: args = {"access_token": service.hs_token} + response = await self.get_json( f"{service.url}{APP_SERVICE_PREFIX}/users/{urllib.parse.quote(user_id)}", args, - headers={"Authorization": [f"Bearer {service.hs_token}"]}, + headers=self._get_headers(service), ) if response is not None: # just an empty json object return True @@ -162,10 +175,11 @@ class ApplicationServiceApi(SimpleHttpClient): args = None if self.config.use_appservice_legacy_authorization: args = {"access_token": service.hs_token} + response = await self.get_json( f"{service.url}{APP_SERVICE_PREFIX}/rooms/{urllib.parse.quote(alias)}", args, - headers={"Authorization": [f"Bearer {service.hs_token}"]}, + headers=self._get_headers(service), ) if response is not None: # just an empty json object return True @@ -203,10 +217,11 @@ class ApplicationServiceApi(SimpleHttpClient): **fields, b"access_token": service.hs_token, } + response = await self.get_json( f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/{kind}/{urllib.parse.quote(protocol)}", args=args, - headers={"Authorization": [f"Bearer {service.hs_token}"]}, + headers=self._get_headers(service), ) if not isinstance(response, list): logger.warning( @@ -243,10 +258,11 @@ class ApplicationServiceApi(SimpleHttpClient): args = None if self.config.use_appservice_legacy_authorization: args = {"access_token": service.hs_token} + info = await self.get_json( f"{service.url}{APP_SERVICE_PREFIX}/thirdparty/protocol/{urllib.parse.quote(protocol)}", args, - headers={"Authorization": [f"Bearer {service.hs_token}"]}, + headers=self._get_headers(service), ) if not _is_valid_3pe_metadata(info): @@ -283,7 +299,7 @@ class ApplicationServiceApi(SimpleHttpClient): await self.post_json_get_json( uri=f"{service.url}{APP_SERVICE_PREFIX}/ping", post_json={"transaction_id": txn_id}, - headers={"Authorization": [f"Bearer {service.hs_token}"]}, + headers=self._get_headers(service), ) async def push_bulk( @@ -364,7 +380,7 @@ class ApplicationServiceApi(SimpleHttpClient): f"{service.url}{APP_SERVICE_PREFIX}/transactions/{urllib.parse.quote(str(txn_id))}", json_body=body, args=args, - headers={"Authorization": [f"Bearer {service.hs_token}"]}, + headers=self._get_headers(service), ) if logger.isEnabledFor(logging.DEBUG): logger.debug( @@ -437,7 +453,7 @@ class ApplicationServiceApi(SimpleHttpClient): response = await self.post_json_get_json( uri, body, - headers={"Authorization": [f"Bearer {service.hs_token}"]}, + headers=self._get_headers(service), ) except HttpResponseException as e: # The appservice doesn't support this endpoint. @@ -498,7 +514,7 @@ class ApplicationServiceApi(SimpleHttpClient): response = await self.post_json_get_json( uri, query, - headers={"Authorization": [f"Bearer {service.hs_token}"]}, + headers=self._get_headers(service), ) except HttpResponseException as e: # The appservice doesn't support this endpoint. diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py
index 3a319b0d42..79f95f7653 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py
@@ -200,9 +200,7 @@ class _ServiceQueuer: if service.id in self.requests_in_flight: return - run_as_background_process( - "as-sender-%s" % (service.id,), self._send_request, service - ) + run_as_background_process("as-sender", self._send_request, service) async def _send_request(self, service: ApplicationService) -> None: # sanity-check: we shouldn't get here if this service already has a sender @@ -478,14 +476,11 @@ class _Recoverer: self.backoff_counter = 1 def recover(self) -> None: - def _retry() -> None: - run_as_background_process( - "as-recoverer-%s" % (self.service.id,), self.retry - ) - delay = 2**self.backoff_counter logger.info("Scheduling retries on %s in %fs", self.service.id, delay) - self.clock.call_later(delay, _retry) + self.clock.call_later( + delay, run_as_background_process, "as-recoverer", self.retry + ) def _backoff(self) -> None: # cap the backoff to be around 8.5min => (2^9) = 512 secs diff --git a/synapse/config/_base.py b/synapse/config/_base.py
index 69a8318127..c5816105f4 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py
@@ -26,7 +26,6 @@ from textwrap import dedent from typing import ( Any, ClassVar, - Collection, Dict, Iterable, Iterator, @@ -179,8 +178,9 @@ class Config: If an integer is provided it is treated as bytes and is unchanged. - String byte sizes can have a suffix of 'K' or `M`, representing kibibytes and - mebibytes respectively. No suffix is understood as a plain byte count. + String byte sizes can have a suffix of 'K', `M`, `G` or `T`, + representing kibibytes, mebibytes, gibibytes and tebibytes respectively. + No suffix is understood as a plain byte count. Raises: TypeError, if given something other than an integer or a string @@ -189,7 +189,7 @@ class Config: if type(value) is int: # noqa: E721 return value elif isinstance(value, str): - sizes = {"K": 1024, "M": 1024 * 1024} + sizes = {"K": 1024, "M": 1024 * 1024, "G": 1024**3, "T": 1024**4} size = 1 suffix = value[-1] if suffix in sizes: @@ -383,7 +383,7 @@ class RootConfig: config_classes: List[Type[Config]] = [] - def __init__(self, config_files: Collection[str] = ()): + def __init__(self, config_files: StrSequence = ()): # Capture absolute paths here, so we can reload config after we daemonize. self.config_files = [os.path.abspath(path) for path in config_files] diff --git a/synapse/config/cas.py b/synapse/config/cas.py
index 6e2d9addbf..bbc8f43073 100644 --- a/synapse/config/cas.py +++ b/synapse/config/cas.py
@@ -57,6 +57,8 @@ class CasConfig(Config): required_attributes ) + self.cas_enable_registration = cas_config.get("enable_registration", True) + self.idp_name = cas_config.get("idp_name", "CAS") self.idp_icon = cas_config.get("idp_icon") self.idp_brand = cas_config.get("idp_brand") @@ -67,6 +69,7 @@ class CasConfig(Config): self.cas_protocol_version = None self.cas_displayname_attribute = None self.cas_required_attributes = [] + self.cas_enable_registration = False # CAS uses a legacy required attributes mapping, not the one provided by diff --git a/synapse/config/oembed.py b/synapse/config/oembed.py
index d7959639ee..59bc0b55f4 100644 --- a/synapse/config/oembed.py +++ b/synapse/config/oembed.py
@@ -30,7 +30,7 @@ class OEmbedEndpointConfig: # The API endpoint to fetch. api_endpoint: str # The patterns to match. - url_patterns: List[Pattern] + url_patterns: List[Pattern[str]] # The supported formats. formats: Optional[List[str]] diff --git a/synapse/config/server.py b/synapse/config/server.py
index b46fa51593..72d30da300 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py
@@ -486,6 +486,17 @@ class ServerConfig(Config): else: self.redaction_retention_period = None + # How long to keep locally forgotten rooms before purging them from the DB. + forgotten_room_retention_period = config.get( + "forgotten_room_retention_period", None + ) + if forgotten_room_retention_period is not None: + self.forgotten_room_retention_period: Optional[int] = self.parse_duration( + forgotten_room_retention_period + ) + else: + self.forgotten_room_retention_period = None + # How long to keep entries in the `users_ips` table. user_ips_max_age = config.get("user_ips_max_age", "28d") if user_ips_max_age is not None: diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 260aab3241..fe86f54d80 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py
@@ -23,12 +23,7 @@ from signedjson.key import ( get_verify_key, is_signing_algorithm_supported, ) -from signedjson.sign import ( - SignatureVerifyException, - encode_canonical_json, - signature_ids, - verify_signed_json, -) +from signedjson.sign import SignatureVerifyException, signature_ids, verify_signed_json from signedjson.types import VerifyKey from unpaddedbase64 import decode_base64 @@ -596,24 +591,12 @@ class BaseV2KeyFetcher(KeyFetcher): verify_key=verify_key, valid_until_ts=key_data["expired_ts"] ) - key_json_bytes = encode_canonical_json(response_json) - - await make_deferred_yieldable( - defer.gatherResults( - [ - run_in_background( - self.store.store_server_keys_json, - server_name=server_name, - key_id=key_id, - from_server=from_server, - ts_now_ms=time_added_ms, - ts_expires_ms=ts_valid_until_ms, - key_json_bytes=key_json_bytes, - ) - for key_id in verify_keys - ], - consumeErrors=True, - ).addErrback(unwrapFirstError) + await self.store.store_server_keys_response( + server_name=server_name, + from_server=from_server, + ts_added_ms=time_added_ms, + verify_keys=verify_keys, + response_json=response_json, ) return verify_keys @@ -775,10 +758,6 @@ class PerspectivesKeyFetcher(BaseV2KeyFetcher): keys.setdefault(server_name, {}).update(processed_response) - await self.store.store_server_signature_keys( - perspective_name, time_now_ms, added_keys - ) - return keys def _validate_perspectives_response( diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py
index 35257a3b1b..3c1777b7ec 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py
@@ -25,7 +25,6 @@ from typing import ( Iterable, List, Optional, - Sequence, Tuple, Type, TypeVar, @@ -408,7 +407,7 @@ class EventBase(metaclass=abc.ABCMeta): def keys(self) -> Iterable[str]: return self._dict.keys() - def prev_event_ids(self) -> Sequence[str]: + def prev_event_ids(self) -> List[str]: """Returns the list of prev event IDs. The order matches the order specified in the event, though there is no meaning to it. @@ -553,7 +552,7 @@ class FrozenEventV2(EventBase): self._event_id = "$" + encode_base64(compute_event_reference_hash(self)[1]) return self._event_id - def prev_event_ids(self) -> Sequence[str]: + def prev_event_ids(self) -> List[str]: """Returns the list of prev event IDs. The order matches the order specified in the event, though there is no meaning to it. diff --git a/synapse/events/builder.py b/synapse/events/builder.py
index 14ea0e6640..43469b170f 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py
@@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union import attr from signedjson.types import SigningKey @@ -28,7 +28,7 @@ from synapse.event_auth import auth_types_for_event from synapse.events import EventBase, _EventInternalMetadata, make_event_from_dict from synapse.state import StateHandler from synapse.storage.databases.main import DataStore -from synapse.types import EventID, JsonDict +from synapse.types import EventID, JsonDict, StrCollection from synapse.types.state import StateFilter from synapse.util import Clock from synapse.util.stringutils import random_string @@ -103,7 +103,7 @@ class EventBuilder: async def build( self, - prev_event_ids: Collection[str], + prev_event_ids: List[str], auth_event_ids: Optional[List[str]], depth: Optional[int] = None, ) -> EventBase: @@ -136,7 +136,7 @@ class EventBuilder: format_version = self.room_version.event_format # The types of auth/prev events changes between event versions. - prev_events: Union[Collection[str], List[Tuple[str, Dict[str, str]]]] + prev_events: Union[StrCollection, List[Tuple[str, Dict[str, str]]]] auth_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] if format_version == EventFormatVersions.ROOM_V1_V2: auth_events = await self._store.add_event_hashes(auth_event_ids) diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py
index a9e3d4e556..5bdfa3a8ac 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py
@@ -55,7 +55,6 @@ class UnpersistedEventContextBase(ABC): A method to convert an UnpersistedEventContext to an EventContext, suitable for sending to the database with the associated event. """ - pass @abstractmethod async def get_prev_state_ids( @@ -69,7 +68,6 @@ class UnpersistedEventContextBase(ABC): state_filter: specifies the type of state event to fetch from DB, example: EventTypes.JoinRules """ - pass @attr.s(slots=True, auto_attribs=True) diff --git a/synapse/events/validator.py b/synapse/events/validator.py
index 34625dd7a1..5da50cb0d2 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py
@@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import collections.abc -from typing import Iterable, List, Type, Union, cast +from typing import List, Type, Union, cast import jsonschema from pydantic import Field, StrictBool, StrictStr @@ -36,7 +36,7 @@ from synapse.events.utils import ( from synapse.federation.federation_server import server_matches_acl_event from synapse.http.servlet import validate_json_object from synapse.rest.models import RequestBodyModel -from synapse.types import EventID, JsonDict, RoomID, UserID +from synapse.types import EventID, JsonDict, RoomID, StrCollection, UserID class EventValidator: @@ -225,7 +225,7 @@ class EventValidator: self._ensure_state_event(event) - def _ensure_strings(self, d: JsonDict, keys: Iterable[str]) -> None: + def _ensure_strings(self, d: JsonDict, keys: StrCollection) -> None: for s in keys: if s not in d: raise SynapseError(400, "'%s' not in content" % (s,)) diff --git a/synapse/handlers/account.py b/synapse/handlers/account.py
index c05a14304c..fa043cca86 100644 --- a/synapse/handlers/account.py +++ b/synapse/handlers/account.py
@@ -102,7 +102,7 @@ class AccountHandler: """ status = {"exists": False} - userinfo = await self._main_store.get_userinfo_by_id(user_id.to_string()) + userinfo = await self._main_store.get_user_by_id(user_id.to_string()) if userinfo is not None: status = { diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py
index 2f0e5f3b0a..ba9704a065 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py
@@ -14,11 +14,11 @@ import abc import logging -from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Set +from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Sequence, Set from synapse.api.constants import Direction, Membership from synapse.events import EventBase -from synapse.types import JsonDict, RoomStreamToken, StateMap, UserID +from synapse.types import JsonMapping, RoomStreamToken, StateMap, UserID, UserInfo from synapse.visibility import filter_events_for_client if TYPE_CHECKING: @@ -35,7 +35,7 @@ class AdminHandler: self._state_storage_controller = self._storage_controllers.state self._msc3866_enabled = hs.config.experimental.msc3866.enabled - async def get_whois(self, user: UserID) -> JsonDict: + async def get_whois(self, user: UserID) -> JsonMapping: connections = [] sessions = await self._store.get_user_ip_and_agents(user) @@ -55,40 +55,32 @@ class AdminHandler: return ret - async def get_user(self, user: UserID) -> Optional[JsonDict]: + async def get_user(self, user: UserID) -> Optional[JsonMapping]: """Function to get user details""" - user_info_dict = await self._store.get_user_by_id(user.to_string()) - if user_info_dict is None: + user_info: Optional[UserInfo] = await self._store.get_user_by_id( + user.to_string() + ) + if user_info is None: return None - # Restrict returned information to a known set of fields. This prevents additional - # fields added to get_user_by_id from modifying Synapse's external API surface. - user_info_to_return = { - "name", - "admin", - "deactivated", - "locked", - "shadow_banned", - "creation_ts", - "appservice_id", - "consent_server_notice_sent", - "consent_version", - "consent_ts", - "user_type", - "is_guest", - "last_seen_ts", + user_info_dict = { + "name": user.to_string(), + "admin": user_info.is_admin, + "deactivated": user_info.is_deactivated, + "locked": user_info.locked, + "shadow_banned": user_info.is_shadow_banned, + "creation_ts": user_info.creation_ts, + "appservice_id": user_info.appservice_id, + "consent_server_notice_sent": user_info.consent_server_notice_sent, + "consent_version": user_info.consent_version, + "consent_ts": user_info.consent_ts, + "user_type": user_info.user_type, + "is_guest": user_info.is_guest, } if self._msc3866_enabled: # Only include the approved flag if support for MSC3866 is enabled. - user_info_to_return.add("approved") - - # Restrict returned keys to a known set. - user_info_dict = { - key: value - for key, value in user_info_dict.items() - if key in user_info_to_return - } + user_info_dict["approved"] = user_info.approved # Add additional user metadata profile = await self._store.get_profileinfo(user) @@ -105,6 +97,9 @@ class AdminHandler: user_info_dict["external_ids"] = external_ids user_info_dict["erased"] = await self._store.is_user_erased(user.to_string()) + last_seen_ts = await self._store.get_last_seen_for_user_id(user.to_string()) + user_info_dict["last_seen_ts"] = last_seen_ts + return user_info_dict async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> Any: @@ -349,7 +344,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): raise NotImplementedError() @abc.abstractmethod - def write_profile(self, profile: JsonDict) -> None: + def write_profile(self, profile: JsonMapping) -> None: """Write the profile of a user. Args: @@ -358,7 +353,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): raise NotImplementedError() @abc.abstractmethod - def write_devices(self, devices: List[JsonDict]) -> None: + def write_devices(self, devices: Sequence[JsonMapping]) -> None: """Write the devices of a user. Args: @@ -367,7 +362,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): raise NotImplementedError() @abc.abstractmethod - def write_connections(self, connections: List[JsonDict]) -> None: + def write_connections(self, connections: Sequence[JsonMapping]) -> None: """Write the connections of a user. Args: @@ -377,7 +372,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): @abc.abstractmethod def write_account_data( - self, file_name: str, account_data: Mapping[str, JsonDict] + self, file_name: str, account_data: Mapping[str, JsonMapping] ) -> None: """Write the account data of a user. @@ -388,7 +383,7 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): raise NotImplementedError() @abc.abstractmethod - def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None: + def write_media_id(self, media_id: str, media_metadata: JsonMapping) -> None: """Write the media's metadata of a user. Exports only the metadata, as this can be fetched from the database via read only. In order to access the files, a connection to the correct diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py
index a850545453..b5b8b9bd35 100644 --- a/synapse/handlers/cas.py +++ b/synapse/handlers/cas.py
@@ -70,6 +70,7 @@ class CasHandler: self._cas_protocol_version = hs.config.cas.cas_protocol_version self._cas_displayname_attribute = hs.config.cas.cas_displayname_attribute self._cas_required_attributes = hs.config.cas.cas_required_attributes + self._cas_enable_registration = hs.config.cas.cas_enable_registration self._http_client = hs.get_proxied_http_client() @@ -395,4 +396,5 @@ class CasHandler: client_redirect_url, cas_response_to_user_attributes, grandfather_existing_users, + registration_enabled=self._cas_enable_registration, ) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 763f56dfc1..86ad96d030 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py
@@ -43,9 +43,12 @@ from synapse.metrics.background_process_metrics import ( ) from synapse.types import ( JsonDict, + JsonMapping, + ScheduledTask, StrCollection, StreamKeyType, StreamToken, + TaskStatus, UserID, get_domain_from_id, get_verify_key_from_cross_signing_key, @@ -55,13 +58,17 @@ from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.cancellation import cancellable from synapse.util.metrics import measure_func -from synapse.util.retryutils import NotRetryingDestination +from synapse.util.retryutils import ( + NotRetryingDestination, + filter_destinations_by_retry_limiter, +) if TYPE_CHECKING: from synapse.server import HomeServer logger = logging.getLogger(__name__) +DELETE_DEVICE_MSGS_TASK_NAME = "delete_device_messages" MAX_DEVICE_DISPLAY_NAME_LEN = 100 DELETE_STALE_DEVICES_INTERVAL_MS = 24 * 60 * 60 * 1000 @@ -78,14 +85,20 @@ class DeviceWorkerHandler: self._appservice_handler = hs.get_application_service_handler() self._state_storage = hs.get_storage_controllers().state self._auth_handler = hs.get_auth_handler() + self._event_sources = hs.get_event_sources() self.server_name = hs.hostname self._msc3852_enabled = hs.config.experimental.msc3852_enabled self._query_appservices_for_keys = ( hs.config.experimental.msc3984_appservice_key_query ) + self._task_scheduler = hs.get_task_scheduler() self.device_list_updater = DeviceListWorkerUpdater(hs) + self._task_scheduler.register_action( + self._delete_device_messages, DELETE_DEVICE_MSGS_TASK_NAME + ) + @trace async def get_devices_by_user(self, user_id: str) -> List[JsonDict]: """ @@ -375,6 +388,33 @@ class DeviceWorkerHandler: "Trying handling device list state for partial join: not supported on workers." ) + DEVICE_MSGS_DELETE_BATCH_LIMIT = 1000 + DEVICE_MSGS_DELETE_SLEEP_MS = 1000 + + async def _delete_device_messages( + self, + task: ScheduledTask, + ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]: + """Scheduler task to delete device messages in batch of `DEVICE_MSGS_DELETE_BATCH_LIMIT`.""" + assert task.params is not None + user_id = task.params["user_id"] + device_id = task.params["device_id"] + up_to_stream_id = task.params["up_to_stream_id"] + + # Delete the messages in batches to avoid too much DB load. + while True: + res = await self.store.delete_messages_for_device( + user_id=user_id, + device_id=device_id, + up_to_stream_id=up_to_stream_id, + limit=DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT, + ) + + if res < DeviceHandler.DEVICE_MSGS_DELETE_BATCH_LIMIT: + return TaskStatus.COMPLETE, None, None + + await self.clock.sleep(DeviceHandler.DEVICE_MSGS_DELETE_SLEEP_MS / 1000.0) + class DeviceHandler(DeviceWorkerHandler): device_list_updater: "DeviceListUpdater" @@ -530,6 +570,7 @@ class DeviceHandler(DeviceWorkerHandler): user_id: The user to delete devices from. device_ids: The list of device IDs to delete """ + to_device_stream_id = self._event_sources.get_current_token().to_device_key try: await self.store.delete_devices(user_id, device_ids) @@ -559,6 +600,17 @@ class DeviceHandler(DeviceWorkerHandler): f"org.matrix.msc3890.local_notification_settings.{device_id}", ) + # Delete device messages asynchronously and in batches using the task scheduler + await self._task_scheduler.schedule_task( + DELETE_DEVICE_MSGS_TASK_NAME, + resource_id=device_id, + params={ + "user_id": user_id, + "device_id": device_id, + "up_to_stream_id": to_device_stream_id, + }, + ) + # Pushers are deleted after `delete_access_tokens_for_user` is called so that # modules using `on_logged_out` hook can use them if needed. await self.hs.get_pusherpool().remove_pushers_by_devices(user_id, device_ids) @@ -707,12 +759,13 @@ class DeviceHandler(DeviceWorkerHandler): # If the dehydrated device was successfully deleted (the device ID # matched the stored dehydrated device), then modify the access - # token to use the dehydrated device's ID and copy the old device - # display name to the dehydrated device, and destroy the old device - # ID + # token and refresh token to use the dehydrated device's ID and + # copy the old device display name to the dehydrated device, + # and destroy the old device ID old_device_id = await self.store.set_device_for_access_token( access_token, device_id ) + await self.store.set_device_for_refresh_token(user_id, old_device_id, device_id) old_device = await self.store.get_device(user_id, old_device_id) if old_device is None: raise errors.NotFoundError() @@ -982,7 +1035,7 @@ class DeviceListWorkerUpdater: async def multi_user_device_resync( self, user_ids: List[str], mark_failed_as_stale: bool = True - ) -> Dict[str, Optional[JsonDict]]: + ) -> Dict[str, Optional[JsonMapping]]: """ Like `user_device_resync` but operates on multiple users **from the same origin** at once. @@ -1011,6 +1064,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater): self._notifier = hs.get_notifier() self._remote_edu_linearizer = Linearizer(name="remote_device_list") + self._resync_linearizer = Linearizer(name="remote_device_resync") # user_id -> list of updates waiting to be handled. self._pending_updates: Dict[ @@ -1220,8 +1274,18 @@ class DeviceListUpdater(DeviceListWorkerUpdater): self._resync_retry_in_progress = True # Get all of the users that need resyncing. need_resync = await self.store.get_user_ids_requiring_device_list_resync() + + # Filter out users whose host is marked as "down" up front. + hosts = await filter_destinations_by_retry_limiter( + {get_domain_from_id(u) for u in need_resync}, self.clock, self.store + ) + hosts = set(hosts) + # Iterate over the set of user IDs. for user_id in need_resync: + if get_domain_from_id(user_id) not in hosts: + continue + try: # Try to resync the current user's devices list. result = (await self.multi_user_device_resync([user_id], False))[ @@ -1253,7 +1317,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater): async def multi_user_device_resync( self, user_ids: List[str], mark_failed_as_stale: bool = True - ) -> Dict[str, Optional[JsonDict]]: + ) -> Dict[str, Optional[JsonMapping]]: """ Like `user_device_resync` but operates on multiple users **from the same origin** at once. @@ -1273,9 +1337,11 @@ class DeviceListUpdater(DeviceListWorkerUpdater): failed = set() # TODO(Perf): Actually batch these up for user_id in user_ids: - user_result, user_failed = await self._user_device_resync_returning_failed( - user_id - ) + async with self._resync_linearizer.queue(user_id): + ( + user_result, + user_failed, + ) = await self._user_device_resync_returning_failed(user_id) result[user_id] = user_result if user_failed: failed.add(user_id) @@ -1287,7 +1353,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater): async def _user_device_resync_returning_failed( self, user_id: str - ) -> Tuple[Optional[JsonDict], bool]: + ) -> Tuple[Optional[JsonMapping], bool]: """Fetches all devices for a user and updates the device cache with them. Args: @@ -1300,6 +1366,12 @@ class DeviceListUpdater(DeviceListWorkerUpdater): e.g. due to a connection problem. - True iff the resync failed and the device list should be marked as stale. """ + # Check that we haven't gone and fetched the devices since we last + # checked if we needed to resync these device lists. + if await self.store.get_users_whose_devices_are_cached([user_id]): + cached = await self.store.get_cached_devices_for_user(user_id) + return cached, False + logger.debug("Attempting to resync the device list for %s", user_id) log_kv({"message": "Doing resync to update device list."}) # Fetch all devices for the user. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py
index d32d224d56..eedde97ab0 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py
@@ -723,12 +723,11 @@ class FederationEventHandler: if not prevs - seen: return - latest_list = await self._store.get_latest_event_ids_in_room(room_id) + latest_frozen = await self._store.get_latest_event_ids_in_room(room_id) # We add the prev events that we have seen to the latest # list to ensure the remote server doesn't give them to us - latest = set(latest_list) - latest |= seen + latest = seen | latest_frozen logger.info( "Requesting missing events between %s and %s", @@ -1976,8 +1975,7 @@ class FederationEventHandler: # partial and full state and may not be accurate. return - extrem_ids_list = await self._store.get_latest_event_ids_in_room(event.room_id) - extrem_ids = set(extrem_ids_list) + extrem_ids = await self._store.get_latest_event_ids_in_room(event.room_id) prev_event_ids = set(event.prev_event_ids()) if extrem_ids == prev_event_ids: diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py
index b3be7a86f0..5dc76ef588 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py
@@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, List, Optional, Tuple from synapse.api.constants import ( AccountDataTypes, @@ -23,7 +23,6 @@ from synapse.api.constants import ( Membership, ) from synapse.api.errors import SynapseError -from synapse.events import EventBase from synapse.events.utils import SerializeEventConfig from synapse.events.validator import EventValidator from synapse.handlers.presence import format_user_presence_state @@ -35,7 +34,6 @@ from synapse.types import ( JsonDict, Requester, RoomStreamToken, - StateMap, StreamKeyType, StreamToken, UserID, @@ -199,9 +197,7 @@ class InitialSyncHandler: deferred_room_state = run_in_background( self._state_storage_controller.get_state_for_events, [event.event_id], - ).addCallback( - lambda states: cast(StateMap[EventBase], states[event.event_id]) - ) + ).addCallback(lambda states: states[event.event_id]) (messages, token), current_state = await make_deferred_yieldable( gather_results( diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index d6be18cdef..c036578a3d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py
@@ -828,13 +828,13 @@ class EventCreationHandler: u = await self.store.get_user_by_id(user_id) assert u is not None - if u["user_type"] in (UserTypes.SUPPORT, UserTypes.BOT): + if u.user_type in (UserTypes.SUPPORT, UserTypes.BOT): # support and bot users are not required to consent return - if u["appservice_id"] is not None: + if u.appservice_id is not None: # users registered by an appservice are exempt return - if u["consent_version"] == self.config.consent.user_consent_version: + if u.consent_version == self.config.consent.user_consent_version: return consent_uri = self._consent_uri_builder.build_user_consent_uri(user.localpart) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py
index e5ac9096cc..878f267a4e 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py
@@ -13,9 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Set - -import attr +from typing import TYPE_CHECKING, List, Optional, Set, Tuple, cast from twisted.python.failure import Failure @@ -23,16 +21,22 @@ from synapse.api.constants import Direction, EventTypes, Membership from synapse.api.errors import SynapseError from synapse.api.filtering import Filter from synapse.events.utils import SerializeEventConfig -from synapse.handlers.room import ShutdownRoomResponse +from synapse.handlers.room import ShutdownRoomParams, ShutdownRoomResponse from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.rest.admin._base import assert_user_is_admin from synapse.streams.config import PaginationConfig -from synapse.types import JsonDict, Requester, StrCollection, StreamKeyType +from synapse.types import ( + JsonDict, + JsonMapping, + Requester, + ScheduledTask, + StreamKeyType, + TaskStatus, +) from synapse.types.state import StateFilter from synapse.util.async_helpers import ReadWriteLock -from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client if TYPE_CHECKING: @@ -53,80 +57,11 @@ BACKFILL_BECAUSE_TOO_MANY_GAPS_THRESHOLD = 3 PURGE_PAGINATION_LOCK_NAME = "purge_pagination_lock" -@attr.s(slots=True, auto_attribs=True) -class PurgeStatus: - """Object tracking the status of a purge request - - This class contains information on the progress of a purge request, for - return by get_purge_status. - """ - - STATUS_ACTIVE = 0 - STATUS_COMPLETE = 1 - STATUS_FAILED = 2 - - STATUS_TEXT = { - STATUS_ACTIVE: "active", - STATUS_COMPLETE: "complete", - STATUS_FAILED: "failed", - } - - # Save the error message if an error occurs - error: str = "" - - # Tracks whether this request has completed. One of STATUS_{ACTIVE,COMPLETE,FAILED}. - status: int = STATUS_ACTIVE - - def asdict(self) -> JsonDict: - ret = {"status": PurgeStatus.STATUS_TEXT[self.status]} - if self.error: - ret["error"] = self.error - return ret - - -@attr.s(slots=True, auto_attribs=True) -class DeleteStatus: - """Object tracking the status of a delete room request +PURGE_HISTORY_ACTION_NAME = "purge_history" - This class contains information on the progress of a delete room request, for - return by get_delete_status. - """ +PURGE_ROOM_ACTION_NAME = "purge_room" - STATUS_PURGING = 0 - STATUS_COMPLETE = 1 - STATUS_FAILED = 2 - STATUS_SHUTTING_DOWN = 3 - - STATUS_TEXT = { - STATUS_PURGING: "purging", - STATUS_COMPLETE: "complete", - STATUS_FAILED: "failed", - STATUS_SHUTTING_DOWN: "shutting_down", - } - - # Tracks whether this request has completed. - # One of STATUS_{PURGING,COMPLETE,FAILED,SHUTTING_DOWN}. - status: int = STATUS_PURGING - - # Save the error message if an error occurs - error: str = "" - - # Saves the result of an action to give it back to REST API - shutdown_room: ShutdownRoomResponse = { - "kicked_users": [], - "failed_to_kick_users": [], - "local_aliases": [], - "new_room_id": None, - } - - def asdict(self) -> JsonDict: - ret = { - "status": DeleteStatus.STATUS_TEXT[self.status], - "shutdown_room": self.shutdown_room, - } - if self.error: - ret["error"] = self.error - return ret +SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME = "shutdown_and_purge_room" class PaginationHandler: @@ -136,9 +71,6 @@ class PaginationHandler: paginating during a purge. """ - # when to remove a completed deletion/purge from the results map - CLEAR_PURGE_AFTER_MS = 1000 * 3600 * 24 # 24 hours - def __init__(self, hs: "HomeServer"): self.hs = hs self.auth = hs.get_auth() @@ -150,17 +82,11 @@ class PaginationHandler: self._room_shutdown_handler = hs.get_room_shutdown_handler() self._relations_handler = hs.get_relations_handler() self._worker_locks = hs.get_worker_locks_handler() + self._task_scheduler = hs.get_task_scheduler() self.pagination_lock = ReadWriteLock() # IDs of rooms in which there currently an active purge *or delete* operation. self._purges_in_progress_by_room: Set[str] = set() - # map from purge id to PurgeStatus - self._purges_by_id: Dict[str, PurgeStatus] = {} - # map from purge id to DeleteStatus - self._delete_by_id: Dict[str, DeleteStatus] = {} - # map from room id to delete ids - # Dict[`room_id`, List[`delete_id`]] - self._delete_by_room: Dict[str, List[str]] = {} self._event_serializer = hs.get_event_client_serializer() self._retention_default_max_lifetime = ( @@ -173,6 +99,9 @@ class PaginationHandler: self._retention_allowed_lifetime_max = ( hs.config.retention.retention_allowed_lifetime_max ) + self._forgotten_room_retention_period = ( + hs.config.server.forgotten_room_retention_period + ) self._is_master = hs.config.worker.worker_app is None if hs.config.retention.retention_enabled and self._is_master: @@ -189,6 +118,14 @@ class PaginationHandler: job.longest_max_lifetime, ) + self._task_scheduler.register_action( + self._purge_history, PURGE_HISTORY_ACTION_NAME + ) + self._task_scheduler.register_action(self._purge_room, PURGE_ROOM_ACTION_NAME) + self._task_scheduler.register_action( + self._shutdown_and_purge_room, SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME + ) + async def purge_history_for_rooms_in_range( self, min_ms: Optional[int], max_ms: Optional[int] ) -> None: @@ -224,7 +161,7 @@ class PaginationHandler: include_null = False logger.info( - "[purge] Running purge job for %s < max_lifetime <= %s (include NULLs = %s)", + "[purge] Running retention purge job for %s < max_lifetime <= %s (include NULLs = %s)", min_ms, max_ms, include_null, @@ -239,10 +176,10 @@ class PaginationHandler: for room_id, retention_policy in rooms.items(): logger.info("[purge] Attempting to purge messages in room %s", room_id) - if room_id in self._purges_in_progress_by_room: + if len(await self.get_delete_tasks_by_room(room_id, only_active=True)) > 0: logger.warning( - "[purge] not purging room %s as there's an ongoing purge running" - " for this room", + "[purge] not purging room %s for retention as there's an ongoing purge" + " running for this room", room_id, ) continue @@ -295,27 +232,20 @@ class PaginationHandler: (stream, topo, _event_id) = r token = "t%d-%d" % (topo, stream) - purge_id = random_string(16) - - self._purges_by_id[purge_id] = PurgeStatus() - - logger.info( - "Starting purging events in room %s (purge_id %s)" % (room_id, purge_id) - ) + logger.info("Starting purging events in room %s", room_id) # We want to purge everything, including local events, and to run the purge in # the background so that it's not blocking any other operation apart from # other purges in the same room. run_as_background_process( - "_purge_history", - self._purge_history, - purge_id, + PURGE_HISTORY_ACTION_NAME, + self.purge_history, room_id, token, True, ) - def start_purge_history( + async def start_purge_history( self, room_id: str, token: str, delete_local_events: bool = False ) -> str: """Start off a history purge on a room. @@ -329,40 +259,58 @@ class PaginationHandler: Returns: unique ID for this purge transaction. """ - if room_id in self._purges_in_progress_by_room: - raise SynapseError( - 400, "History purge already in progress for %s" % (room_id,) - ) - - purge_id = random_string(16) + purge_id = await self._task_scheduler.schedule_task( + PURGE_HISTORY_ACTION_NAME, + resource_id=room_id, + params={"token": token, "delete_local_events": delete_local_events}, + ) # we log the purge_id here so that it can be tied back to the # request id in the log lines. logger.info("[purge] starting purge_id %s", purge_id) - self._purges_by_id[purge_id] = PurgeStatus() - run_as_background_process( - "purge_history", - self._purge_history, - purge_id, - room_id, - token, - delete_local_events, - ) return purge_id async def _purge_history( - self, purge_id: str, room_id: str, token: str, delete_local_events: bool - ) -> None: + self, + task: ScheduledTask, + ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]: + """ + Scheduler action to purge some history of a room. + """ + if ( + task.resource_id is None + or task.params is None + or "token" not in task.params + or "delete_local_events" not in task.params + ): + return ( + TaskStatus.FAILED, + None, + "Not enough parameters passed to _purge_history", + ) + err = await self.purge_history( + task.resource_id, + task.params["token"], + task.params["delete_local_events"], + ) + if err is not None: + return TaskStatus.FAILED, None, err + return TaskStatus.COMPLETE, None, None + + async def purge_history( + self, + room_id: str, + token: str, + delete_local_events: bool, + ) -> Optional[str]: """Carry out a history purge on a room. Args: - purge_id: The ID for this purge. room_id: The room to purge from token: topological token to delete events before delete_local_events: True to delete local events as well as remote ones """ - self._purges_in_progress_by_room.add(room_id) try: async with self._worker_locks.acquire_read_write_lock( PURGE_PAGINATION_LOCK_NAME, room_id, write=True @@ -371,57 +319,68 @@ class PaginationHandler: room_id, token, delete_local_events ) logger.info("[purge] complete") - self._purges_by_id[purge_id].status = PurgeStatus.STATUS_COMPLETE + return None except Exception: f = Failure() logger.error( "[purge] failed", exc_info=(f.type, f.value, f.getTracebackObject()) ) - self._purges_by_id[purge_id].status = PurgeStatus.STATUS_FAILED - self._purges_by_id[purge_id].error = f.getErrorMessage() - finally: - self._purges_in_progress_by_room.discard(room_id) - - # remove the purge from the list 24 hours after it completes - def clear_purge() -> None: - del self._purges_by_id[purge_id] - - self.hs.get_reactor().callLater( - PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000, clear_purge - ) - - def get_purge_status(self, purge_id: str) -> Optional[PurgeStatus]: - """Get the current status of an active purge + return f.getErrorMessage() - Args: - purge_id: purge_id returned by start_purge_history - """ - return self._purges_by_id.get(purge_id) - - def get_delete_status(self, delete_id: str) -> Optional[DeleteStatus]: + async def get_delete_task(self, delete_id: str) -> Optional[ScheduledTask]: """Get the current status of an active deleting Args: delete_id: delete_id returned by start_shutdown_and_purge_room + or start_purge_history. """ - return self._delete_by_id.get(delete_id) + return await self._task_scheduler.get_task(delete_id) - def get_delete_ids_by_room(self, room_id: str) -> Optional[StrCollection]: - """Get all active delete ids by room + async def get_delete_tasks_by_room( + self, room_id: str, only_active: Optional[bool] = False + ) -> List[ScheduledTask]: + """Get complete, failed or active delete tasks by room Args: room_id: room_id that is deleted + only_active: if True, completed&failed tasks will be omitted + """ + statuses = [TaskStatus.ACTIVE] + if not only_active: + statuses += [TaskStatus.COMPLETE, TaskStatus.FAILED] + + return await self._task_scheduler.get_tasks( + actions=[PURGE_ROOM_ACTION_NAME, SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME], + resource_id=room_id, + statuses=statuses, + ) + + async def _purge_room( + self, + task: ScheduledTask, + ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]: + """ + Scheduler action to purge a room. """ - return self._delete_by_room.get(room_id) + if not task.resource_id: + raise Exception("No room id passed to purge_room task") + params = task.params if task.params else {} + await self.purge_room(task.resource_id, params.get("force", False)) + return TaskStatus.COMPLETE, None, None - async def purge_room(self, room_id: str, force: bool = False) -> None: + async def purge_room( + self, + room_id: str, + force: bool, + ) -> None: """Purge the given room from the database. - This function is part the delete room v1 API. Args: room_id: room to be purged force: set true to skip checking for joined users. """ + logger.info("starting purge room_id=%s force=%s", room_id, force) + async with self._worker_locks.acquire_multi_read_write_lock( [ (PURGE_PAGINATION_LOCK_NAME, room_id), @@ -430,13 +389,20 @@ class PaginationHandler: write=True, ): # first check that we have no users in this room - if not force: - joined = await self.store.is_host_joined(room_id, self._server_name) - if joined: + joined = await self.store.is_host_joined(room_id, self._server_name) + if joined: + if force: + logger.info( + "force-purging room %s with some local users still joined", + room_id, + ) + else: raise SynapseError(400, "Users are still joined to this room") await self._storage_controllers.purge_events.purge_room(room_id) + logger.info("purge complete for room_id %s", room_id) + @trace async def get_messages( self, @@ -711,169 +677,72 @@ class PaginationHandler: async def _shutdown_and_purge_room( self, - delete_id: str, - room_id: str, - requester_user_id: str, - new_room_user_id: Optional[str] = None, - new_room_name: Optional[str] = None, - message: Optional[str] = None, - block: bool = False, - purge: bool = True, - force_purge: bool = False, - ) -> None: + task: ScheduledTask, + ) -> Tuple[TaskStatus, Optional[JsonMapping], Optional[str]]: """ - Shuts down and purges a room. - - See `RoomShutdownHandler.shutdown_room` for details of creation of the new room - - Args: - delete_id: The ID for this delete. - room_id: The ID of the room to shut down. - requester_user_id: - User who requested the action. Will be recorded as putting the room on the - blocking list. - new_room_user_id: - If set, a new room will be created with this user ID - as the creator and admin, and all users in the old room will be - moved into that room. If not set, no new room will be created - and the users will just be removed from the old room. - new_room_name: - A string representing the name of the room that new users will - be invited to. Defaults to `Content Violation Notification` - message: - A string containing the first message that will be sent as - `new_room_user_id` in the new room. Ideally this will clearly - convey why the original room was shut down. - Defaults to `Sharing illegal content on this server is not - permitted and rooms in violation will be blocked.` - block: - If set to `true`, this room will be added to a blocking list, - preventing future attempts to join the room. Defaults to `false`. - purge: - If set to `true`, purge the given room from the database. - force_purge: - If set to `true`, the room will be purged from database - also if it fails to remove some users from room. - - Saves a `RoomShutdownHandler.ShutdownRoomResponse` in `DeleteStatus`: + Scheduler action to shutdown and purge a room. """ + if task.resource_id is None or task.params is None: + raise Exception( + "No room id and/or no parameters passed to shutdown_and_purge_room task" + ) - self._purges_in_progress_by_room.add(room_id) - try: - async with self._worker_locks.acquire_read_write_lock( - PURGE_PAGINATION_LOCK_NAME, room_id, write=True - ): - self._delete_by_id[delete_id].status = DeleteStatus.STATUS_SHUTTING_DOWN - self._delete_by_id[ - delete_id - ].shutdown_room = await self._room_shutdown_handler.shutdown_room( - room_id=room_id, - requester_user_id=requester_user_id, - new_room_user_id=new_room_user_id, - new_room_name=new_room_name, - message=message, - block=block, - ) - self._delete_by_id[delete_id].status = DeleteStatus.STATUS_PURGING + room_id = task.resource_id - if purge: - logger.info("starting purge room_id %s", room_id) + async def update_result(result: Optional[JsonMapping]) -> None: + await self._task_scheduler.update_task(task.id, result=result) - # first check that we have no users in this room - if not force_purge: - joined = await self.store.is_host_joined( - room_id, self._server_name - ) - if joined: - raise SynapseError( - 400, "Users are still joined to this room" - ) + shutdown_result = ( + cast(ShutdownRoomResponse, task.result) if task.result else None + ) - await self._storage_controllers.purge_events.purge_room(room_id) + shutdown_result = await self._room_shutdown_handler.shutdown_room( + room_id, + cast(ShutdownRoomParams, task.params), + shutdown_result, + update_result, + ) - logger.info("purge complete for room_id %s", room_id) - self._delete_by_id[delete_id].status = DeleteStatus.STATUS_COMPLETE - except Exception: - f = Failure() - logger.error( - "failed", - exc_info=(f.type, f.value, f.getTracebackObject()), - ) - self._delete_by_id[delete_id].status = DeleteStatus.STATUS_FAILED - self._delete_by_id[delete_id].error = f.getErrorMessage() - finally: - self._purges_in_progress_by_room.discard(room_id) - - # remove the delete from the list 24 hours after it completes - def clear_delete() -> None: - del self._delete_by_id[delete_id] - self._delete_by_room[room_id].remove(delete_id) - if not self._delete_by_room[room_id]: - del self._delete_by_room[room_id] - - self.hs.get_reactor().callLater( - PaginationHandler.CLEAR_PURGE_AFTER_MS / 1000, clear_delete + if task.params.get("purge", False): + await self.purge_room( + room_id, + task.params.get("force_purge", False), ) - def start_shutdown_and_purge_room( + return (TaskStatus.COMPLETE, shutdown_result, None) + + async def start_shutdown_and_purge_room( self, room_id: str, - requester_user_id: str, - new_room_user_id: Optional[str] = None, - new_room_name: Optional[str] = None, - message: Optional[str] = None, - block: bool = False, - purge: bool = True, - force_purge: bool = False, + shutdown_params: ShutdownRoomParams, ) -> str: """Start off shut down and purge on a room. Args: room_id: The ID of the room to shut down. - requester_user_id: - User who requested the action and put the room on the - blocking list. - new_room_user_id: - If set, a new room will be created with this user ID - as the creator and admin, and all users in the old room will be - moved into that room. If not set, no new room will be created - and the users will just be removed from the old room. - new_room_name: - A string representing the name of the room that new users will - be invited to. Defaults to `Content Violation Notification` - message: - A string containing the first message that will be sent as - `new_room_user_id` in the new room. Ideally this will clearly - convey why the original room was shut down. - Defaults to `Sharing illegal content on this server is not - permitted and rooms in violation will be blocked.` - block: - If set to `true`, this room will be added to a blocking list, - preventing future attempts to join the room. Defaults to `false`. - purge: - If set to `true`, purge the given room from the database. - force_purge: - If set to `true`, the room will be purged from database - also if it fails to remove some users from room. + shutdown_params: parameters for the shutdown Returns: unique ID for this delete transaction. """ - if room_id in self._purges_in_progress_by_room: - raise SynapseError( - 400, "History purge already in progress for %s" % (room_id,) - ) + if len(await self.get_delete_tasks_by_room(room_id, only_active=True)) > 0: + raise SynapseError(400, "Purge already in progress for %s" % (room_id,)) # This check is double to `RoomShutdownHandler.shutdown_room` # But here the requester get a direct response / error with HTTP request # and do not have to check the purge status + new_room_user_id = shutdown_params["new_room_user_id"] if new_room_user_id is not None: if not self.hs.is_mine_id(new_room_user_id): raise SynapseError( 400, "User must be our own: %s" % (new_room_user_id,) ) - delete_id = random_string(16) + delete_id = await self._task_scheduler.schedule_task( + SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME, + resource_id=room_id, + params=shutdown_params, + ) # we log the delete_id here so that it can be tied back to the # request id in the log lines. @@ -883,19 +752,4 @@ class PaginationHandler: delete_id, ) - self._delete_by_id[delete_id] = DeleteStatus() - self._delete_by_room.setdefault(room_id, []).append(delete_id) - run_as_background_process( - "shutdown_and_purge_room", - self._shutdown_and_purge_room, - delete_id, - room_id, - requester_user_id, - new_room_user_id, - new_room_name, - message, - block, - purge, - force_purge, - ) return delete_id diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py
index f31e18328b..375c7d0901 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py
@@ -13,13 +13,56 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""This module is responsible for keeping track of presence status of local +""" +This module is responsible for keeping track of presence status of local and remote users. The methods that define policy are: - PresenceHandler._update_states - PresenceHandler._handle_timeouts - should_notify + +# Tracking local presence + +For local users, presence is tracked on a per-device basis. When a user has multiple +devices the user presence state is derived by coalescing the presence from each +device: + + BUSY > ONLINE > UNAVAILABLE > OFFLINE + +The time that each device was last active and last synced is tracked in order to +automatically downgrade a device's presence state: + + A device may move from ONLINE -> UNAVAILABLE, if it has not been active for + a period of time. + + A device may go from any state -> OFFLINE, if it is not active and has not + synced for a period of time. + +The timeouts are handled using a wheel timer, which has coarse buckets. Timings +do not need to be exact. + +Generally a device's presence state is updated whenever a user syncs (via the +set_presence parameter), when the presence API is called, or if "pro-active" +events occur, including: + +* Sending an event, receipt, read marker. +* Updating typing status. + +The busy state has special status that it cannot is not downgraded by a call to +sync with a lower priority state *and* it takes a long period of time to transition +to offline. + +# Persisting (and restoring) presence + +For all users, presence is persisted on a per-user basis. Data is kept in-memory +and persisted periodically. When Synapse starts each worker loads the current +presence state and then tracks the presence stream to keep itself up-to-date. + +When restoring presence for local users a pseudo-device is created to match the +user state; this device follows the normal timeout logic (see above) and will +automatically be replaced with any information from currently available devices. + """ import abc import contextlib @@ -30,6 +73,7 @@ from contextlib import contextmanager from types import TracebackType from typing import ( TYPE_CHECKING, + AbstractSet, Any, Callable, Collection, @@ -49,7 +93,7 @@ from prometheus_client import Counter import synapse.metrics from synapse.api.constants import EduTypes, EventTypes, Membership, PresenceState from synapse.api.errors import SynapseError -from synapse.api.presence import UserPresenceState +from synapse.api.presence import UserDevicePresenceState, UserPresenceState from synapse.appservice import ApplicationService from synapse.events.presence_router import PresenceRouter from synapse.logging.context import run_in_background @@ -111,6 +155,8 @@ LAST_ACTIVE_GRANULARITY = 60 * 1000 # How long to wait until a new /events or /sync request before assuming # the client has gone. SYNC_ONLINE_TIMEOUT = 30 * 1000 +# Busy status waits longer, but does eventually go offline. +BUSY_ONLINE_TIMEOUT = 60 * 60 * 1000 # How long to wait before marking the user as idle. Compared against last active IDLE_TIMER = 5 * 60 * 1000 @@ -137,6 +183,7 @@ class BasePresenceHandler(abc.ABC): writer""" def __init__(self, hs: "HomeServer"): + self.hs = hs self.clock = hs.get_clock() self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() @@ -162,6 +209,7 @@ class BasePresenceHandler(abc.ABC): self.VALID_PRESENCE += (PresenceState.BUSY,) active_presence = self.store.take_presence_startup_info() + # The combined status across all user devices. self.user_to_current_state = {state.user_id: state for state in active_presence} @abc.abstractmethod @@ -426,8 +474,6 @@ class _NullContextManager(ContextManager[None]): class WorkerPresenceHandler(BasePresenceHandler): def __init__(self, hs: "HomeServer"): super().__init__(hs) - self.hs = hs - self._presence_writer_instance = hs.config.worker.writers.presence[0] # Route presence EDUs to the right worker @@ -691,7 +737,6 @@ class WorkerPresenceHandler(BasePresenceHandler): class PresenceHandler(BasePresenceHandler): def __init__(self, hs: "HomeServer"): super().__init__(hs) - self.hs = hs self.wheel_timer: WheelTimer[str] = WheelTimer() self.notifier = hs.get_notifier() @@ -708,9 +753,27 @@ class PresenceHandler(BasePresenceHandler): lambda: len(self.user_to_current_state), ) + # The per-device presence state, maps user to devices to per-device presence state. + self._user_to_device_to_current_state: Dict[ + str, Dict[Optional[str], UserDevicePresenceState] + ] = {} + now = self.clock.time_msec() if self._presence_enabled: for state in self.user_to_current_state.values(): + # Create a psuedo-device to properly handle time outs. This will + # be overridden by any "real" devices within SYNC_ONLINE_TIMEOUT. + pseudo_device_id = None + self._user_to_device_to_current_state[state.user_id] = { + pseudo_device_id: UserDevicePresenceState( + user_id=state.user_id, + device_id=pseudo_device_id, + state=state.state, + last_active_ts=state.last_active_ts, + last_sync_ts=state.last_user_sync_ts, + ) + } + self.wheel_timer.insert( now=now, obj=state.user_id, then=state.last_active_ts + IDLE_TIMER ) @@ -752,7 +815,7 @@ class PresenceHandler(BasePresenceHandler): # Keeps track of the number of *ongoing* syncs on other processes. # - # While any sync is ongoing on another process the user will never + # While any sync is ongoing on another process the user's device will never # go offline. # # Each process has a unique identifier and an update frequency. If @@ -981,22 +1044,21 @@ class PresenceHandler(BasePresenceHandler): timers_fired_counter.inc(len(states)) - syncing_user_ids = { - user_id - for (user_id, _), count in self._user_device_to_num_current_syncs.items() + # Set of user ID & device IDs which are currently syncing. + syncing_user_devices = { + user_id_device_id + for user_id_device_id, count in self._user_device_to_num_current_syncs.items() if count } - syncing_user_ids.update( - user_id - for user_id, _ in itertools.chain( - *self.external_process_to_current_syncs.values() - ) + syncing_user_devices.update( + itertools.chain(*self.external_process_to_current_syncs.values()) ) changes = handle_timeouts( states, is_mine_fn=self.is_mine_id, - syncing_user_ids=syncing_user_ids, + syncing_user_devices=syncing_user_devices, + user_to_devices=self._user_to_device_to_current_state, now=now, ) @@ -1016,11 +1078,26 @@ class PresenceHandler(BasePresenceHandler): bump_active_time_counter.inc() - prev_state = await self.current_state_for_user(user_id) + now = self.clock.time_msec() - new_fields: Dict[str, Any] = {"last_active_ts": self.clock.time_msec()} - if prev_state.state == PresenceState.UNAVAILABLE: - new_fields["state"] = PresenceState.ONLINE + # Update the device information & mark the device as online if it was + # unavailable. + devices = self._user_to_device_to_current_state.setdefault(user_id, {}) + device_state = devices.setdefault( + device_id, + UserDevicePresenceState.default(user_id, device_id), + ) + device_state.last_active_ts = now + if device_state.state == PresenceState.UNAVAILABLE: + device_state.state = PresenceState.ONLINE + + # Update the user state, this will always update last_active_ts and + # might update the presence state. + prev_state = await self.current_state_for_user(user_id) + new_fields: Dict[str, Any] = { + "last_active_ts": now, + "state": _combine_device_states(devices.values()), + } await self._update_states([prev_state.copy_and_replace(**new_fields)]) @@ -1132,6 +1209,12 @@ class PresenceHandler(BasePresenceHandler): if is_syncing and (user_id, device_id) not in process_presence: process_presence.add((user_id, device_id)) elif not is_syncing and (user_id, device_id) in process_presence: + devices = self._user_to_device_to_current_state.setdefault(user_id, {}) + device_state = devices.setdefault( + device_id, UserDevicePresenceState.default(user_id, device_id) + ) + device_state.last_sync_ts = sync_time_msec + new_state = prev_state.copy_and_replace( last_user_sync_ts=sync_time_msec ) @@ -1151,11 +1234,24 @@ class PresenceHandler(BasePresenceHandler): process_presence = self.external_process_to_current_syncs.pop( process_id, set() ) - prev_states = await self.current_state_for_users( - {user_id for user_id, device_id in process_presence} - ) + time_now_ms = self.clock.time_msec() + # Mark each device as having a last sync time. + updated_users = set() + for user_id, device_id in process_presence: + device_state = self._user_to_device_to_current_state.setdefault( + user_id, {} + ).setdefault( + device_id, UserDevicePresenceState.default(user_id, device_id) + ) + + device_state.last_sync_ts = time_now_ms + updated_users.add(user_id) + + # Update each user (and insert into the appropriate timers to check if + # they've gone offline). + prev_states = await self.current_state_for_users(updated_users) await self._update_states( [ prev_state.copy_and_replace(last_user_sync_ts=time_now_ms) @@ -1277,6 +1373,20 @@ class PresenceHandler(BasePresenceHandler): if prev_state.state == PresenceState.BUSY and is_sync: presence = PresenceState.BUSY + # Update the device specific information. + devices = self._user_to_device_to_current_state.setdefault(user_id, {}) + device_state = devices.setdefault( + device_id, + UserDevicePresenceState.default(user_id, device_id), + ) + device_state.state = presence + device_state.last_active_ts = now + if is_sync: + device_state.last_sync_ts = now + + # Based on the state of each user's device calculate the new presence state. + presence = _combine_device_states(devices.values()) + new_fields = {"state": presence} if presence == PresenceState.ONLINE or presence == PresenceState.BUSY: @@ -1873,7 +1983,8 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): def handle_timeouts( user_states: List[UserPresenceState], is_mine_fn: Callable[[str], bool], - syncing_user_ids: Set[str], + syncing_user_devices: AbstractSet[Tuple[str, Optional[str]]], + user_to_devices: Dict[str, Dict[Optional[str], UserDevicePresenceState]], now: int, ) -> List[UserPresenceState]: """Checks the presence of users that have timed out and updates as @@ -1882,7 +1993,8 @@ def handle_timeouts( Args: user_states: List of UserPresenceState's to check. is_mine_fn: Function that returns if a user_id is ours - syncing_user_ids: Set of user_ids with active syncs. + syncing_user_devices: A set of (user ID, device ID) tuples with active syncs.. + user_to_devices: A map of user ID to device ID to UserDevicePresenceState. now: Current time in ms. Returns: @@ -1891,9 +2003,16 @@ def handle_timeouts( changes = {} # Actual changes we need to notify people about for state in user_states: - is_mine = is_mine_fn(state.user_id) - - new_state = handle_timeout(state, is_mine, syncing_user_ids, now) + user_id = state.user_id + is_mine = is_mine_fn(user_id) + + new_state = handle_timeout( + state, + is_mine, + syncing_user_devices, + user_to_devices.get(user_id, {}), + now, + ) if new_state: changes[state.user_id] = new_state @@ -1901,14 +2020,19 @@ def handle_timeouts( def handle_timeout( - state: UserPresenceState, is_mine: bool, syncing_user_ids: Set[str], now: int + state: UserPresenceState, + is_mine: bool, + syncing_device_ids: AbstractSet[Tuple[str, Optional[str]]], + user_devices: Dict[Optional[str], UserDevicePresenceState], + now: int, ) -> Optional[UserPresenceState]: """Checks the presence of the user to see if any of the timers have elapsed Args: - state + state: UserPresenceState to check. is_mine: Whether the user is ours - syncing_user_ids: Set of user_ids with active syncs. + syncing_user_devices: A set of (user ID, device ID) tuples with active syncs.. + user_devices: A map of device ID to UserDevicePresenceState. now: Current time in ms. Returns: @@ -1919,34 +2043,63 @@ def handle_timeout( return None changed = False - user_id = state.user_id if is_mine: - if state.state == PresenceState.ONLINE: - if now - state.last_active_ts > IDLE_TIMER: - # Currently online, but last activity ages ago so auto - # idle - state = state.copy_and_replace(state=PresenceState.UNAVAILABLE) - changed = True - elif now - state.last_active_ts > LAST_ACTIVE_GRANULARITY: - # So that we send down a notification that we've - # stopped updating. + # Check per-device whether the device should be considered idle or offline + # due to timeouts. + device_changed = False + offline_devices = [] + for device_id, device_state in user_devices.items(): + if device_state.state == PresenceState.ONLINE: + if now - device_state.last_active_ts > IDLE_TIMER: + # Currently online, but last activity ages ago so auto + # idle + device_state.state = PresenceState.UNAVAILABLE + device_changed = True + + # If there are have been no sync for a while (and none ongoing), + # set presence to offline. + if (state.user_id, device_id) not in syncing_device_ids: + # If the user has done something recently but hasn't synced, + # don't set them as offline. + sync_or_active = max( + device_state.last_sync_ts, device_state.last_active_ts + ) + + # Implementations aren't meant to timeout a device with a busy + # state, but it needs to timeout *eventually* or else the user + # will be stuck in that state. + online_timeout = ( + BUSY_ONLINE_TIMEOUT + if device_state.state == PresenceState.BUSY + else SYNC_ONLINE_TIMEOUT + ) + if now - sync_or_active > online_timeout: + # Mark the device as going offline. + offline_devices.append(device_id) + device_changed = True + + # Offline devices are not needed and do not add information. + for device_id in offline_devices: + user_devices.pop(device_id) + + # If the presence state of the devices changed, then (maybe) update + # the user's overall presence state. + if device_changed: + new_presence = _combine_device_states(user_devices.values()) + if new_presence != state.state: + state = state.copy_and_replace(state=new_presence) changed = True + if now - state.last_active_ts > LAST_ACTIVE_GRANULARITY: + # So that we send down a notification that we've + # stopped updating. + changed = True + if now - state.last_federation_update_ts > FEDERATION_PING_INTERVAL: # Need to send ping to other servers to ensure they don't # timeout and set us to offline changed = True - - # If there are have been no sync for a while (and none ongoing), - # set presence to offline - if user_id not in syncing_user_ids: - # If the user has done something recently but hasn't synced, - # don't set them as offline. - sync_or_active = max(state.last_user_sync_ts, state.last_active_ts) - if now - sync_or_active > SYNC_ONLINE_TIMEOUT: - state = state.copy_and_replace(state=PresenceState.OFFLINE) - changed = True else: # We expect to be poked occasionally by the other side. # This is to protect against forgetful/buggy servers, so that @@ -2021,6 +2174,13 @@ def handle_update( new_state = new_state.copy_and_replace(last_federation_update_ts=now) federation_ping = True + if new_state.state == PresenceState.BUSY: + wheel_timer.insert( + now=now, + obj=user_id, + then=new_state.last_user_sync_ts + BUSY_ONLINE_TIMEOUT, + ) + else: wheel_timer.insert( now=now, @@ -2036,6 +2196,46 @@ def handle_update( return new_state, persist_and_notify, federation_ping +PRESENCE_BY_PRIORITY = { + PresenceState.BUSY: 4, + PresenceState.ONLINE: 3, + PresenceState.UNAVAILABLE: 2, + PresenceState.OFFLINE: 1, +} + + +def _combine_device_states( + device_states: Iterable[UserDevicePresenceState], +) -> str: + """ + Find the device to use presence information from. + + Orders devices by priority, then last_active_ts. + + Args: + device_states: An iterable of device presence states + + Return: + The combined presence state. + """ + + # Based on (all) the user's devices calculate the new presence state. + presence = PresenceState.OFFLINE + last_active_ts = -1 + + # Find the device to use the presence state of based on the presence priority, + # but tie-break with how recently the device has been seen. + for device_state in device_states: + if (PRESENCE_BY_PRIORITY[device_state.state], device_state.last_active_ts) > ( + PRESENCE_BY_PRIORITY[presence], + last_active_ts, + ): + presence = device_state.state + last_active_ts = device_state.last_active_ts + + return presence + + async def get_interested_parties( store: DataStore, presence_router: PresenceRouter, states: List[UserPresenceState] ) -> Tuple[Dict[str, List[UserPresenceState]], Dict[str, List[UserPresenceState]]]: diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index 2bacdebfb5..c7edada353 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py
@@ -37,6 +37,8 @@ class ReceiptsHandler: self.server_name = hs.config.server.server_name self.store = hs.get_datastores().main self.event_auth_handler = hs.get_event_auth_handler() + self.event_handler = hs.get_event_handler() + self._storage_controllers = hs.get_storage_controllers() self.hs = hs @@ -81,6 +83,20 @@ class ReceiptsHandler: ) continue + # Let's check that the origin server is in the room before accepting the receipt. + # We don't want to block waiting on a partial state so take an + # approximation if needed. + domains = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation( + room_id + ) + if origin not in domains: + logger.info( + "Ignoring receipt for room %r from server %s as they're not in the room", + room_id, + origin, + ) + continue + for receipt_type, users in room_values.items(): for user_id, user_values in users.items(): if get_domain_from_id(user_id) != origin: @@ -158,17 +174,23 @@ class ReceiptsHandler: self, room_id: str, receipt_type: str, - user_id: str, + user_id: UserID, event_id: str, thread_id: Optional[str], ) -> None: """Called when a client tells us a local user has read up to the given event_id in the room. """ + + # Ensure the room/event exists, this will raise an error if the user + # cannot view the event. + if not await self.event_handler.get_event(user_id, room_id, event_id): + return + receipt = ReadReceipt( room_id=room_id, receipt_type=receipt_type, - user_id=user_id, + user_id=user_id.to_string(), event_ids=[event_id], thread_id=thread_id, data={"ts": int(self.clock.time_msec())}, diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py
index 0513e28aab..a0c3b16819 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py
@@ -20,7 +20,7 @@ import random import string from collections import OrderedDict from http import HTTPStatus -from typing import TYPE_CHECKING, Any, Awaitable, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, List, Optional, Tuple import attr from typing_extensions import TypedDict @@ -54,11 +54,11 @@ from synapse.events import EventBase from synapse.events.snapshot import UnpersistedEventContext from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.handlers.relations import BundledAggregations -from synapse.module_api import NOT_SPAM from synapse.rest.admin._base import assert_user_is_admin from synapse.streams import EventSource from synapse.types import ( JsonDict, + JsonMapping, MutableStateMap, Requester, RoomAlias, @@ -454,7 +454,7 @@ class RoomCreationHandler: spam_check = await self._spam_checker_module_callbacks.user_may_create_room( user_id ) - if spam_check != NOT_SPAM: + if spam_check != self._spam_checker_module_callbacks.NOT_SPAM: raise SynapseError( 403, "You are not permitted to create rooms", @@ -768,7 +768,7 @@ class RoomCreationHandler: spam_check = await self._spam_checker_module_callbacks.user_may_create_room( user_id ) - if spam_check != NOT_SPAM: + if spam_check != self._spam_checker_module_callbacks.NOT_SPAM: raise SynapseError( 403, "You are not permitted to create rooms", @@ -1750,6 +1750,45 @@ class RoomEventSource(EventSource[RoomStreamToken, EventBase]): return self.store.get_current_room_stream_token_for_room_id(room_id) +class ShutdownRoomParams(TypedDict): + """ + Attributes: + requester_user_id: + User who requested the action. Will be recorded as putting the room on the + blocking list. + new_room_user_id: + If set, a new room will be created with this user ID + as the creator and admin, and all users in the old room will be + moved into that room. If not set, no new room will be created + and the users will just be removed from the old room. + new_room_name: + A string representing the name of the room that new users will + be invited to. Defaults to `Content Violation Notification` + message: + A string containing the first message that will be sent as + `new_room_user_id` in the new room. Ideally this will clearly + convey why the original room was shut down. + Defaults to `Sharing illegal content on this server is not + permitted and rooms in violation will be blocked.` + block: + If set to `true`, this room will be added to a blocking list, + preventing future attempts to join the room. Defaults to `false`. + purge: + If set to `true`, purge the given room from the database. + force_purge: + If set to `true`, the room will be purged from database + even if there are still users joined to the room. + """ + + requester_user_id: Optional[str] + new_room_user_id: Optional[str] + new_room_name: Optional[str] + message: Optional[str] + block: bool + purge: bool + force_purge: bool + + class ShutdownRoomResponse(TypedDict): """ Attributes: @@ -1787,12 +1826,12 @@ class RoomShutdownHandler: async def shutdown_room( self, room_id: str, - requester_user_id: str, - new_room_user_id: Optional[str] = None, - new_room_name: Optional[str] = None, - message: Optional[str] = None, - block: bool = False, - ) -> ShutdownRoomResponse: + params: ShutdownRoomParams, + result: Optional[ShutdownRoomResponse] = None, + update_result_fct: Optional[ + Callable[[Optional[JsonMapping]], Awaitable[None]] + ] = None, + ) -> Optional[ShutdownRoomResponse]: """ Shuts down a room. Moves all local users and room aliases automatically to a new room if `new_room_user_id` is set. Otherwise local users only @@ -1808,48 +1847,23 @@ class RoomShutdownHandler: Args: room_id: The ID of the room to shut down. - requester_user_id: - User who requested the action and put the room on the - blocking list. - new_room_user_id: - If set, a new room will be created with this user ID - as the creator and admin, and all users in the old room will be - moved into that room. If not set, no new room will be created - and the users will just be removed from the old room. - new_room_name: - A string representing the name of the room that new users will - be invited to. Defaults to `Content Violation Notification` - message: - A string containing the first message that will be sent as - `new_room_user_id` in the new room. Ideally this will clearly - convey why the original room was shut down. - Defaults to `Sharing illegal content on this server is not - permitted and rooms in violation will be blocked.` - block: - If set to `True`, users will be prevented from joining the old - room. This option can also be used to pre-emptively block a room, - even if it's unknown to this homeserver. In this case, the room - will be blocked, and no further action will be taken. If `False`, - attempting to delete an unknown room is invalid. - - Defaults to `False`. - - Returns: a dict containing the following keys: - kicked_users: An array of users (`user_id`) that were kicked. - failed_to_kick_users: - An array of users (`user_id`) that that were not kicked. - local_aliases: - An array of strings representing the local aliases that were - migrated from the old room to the new. - new_room_id: - A string representing the room ID of the new room, or None if - no such room was created. - """ + delete_id: The delete ID identifying this delete request + params: parameters for the shutdown, cf `ShutdownRoomParams` + result: current status of the shutdown, if it was interrupted + update_result_fct: function called when `result` is updated locally - if not new_room_name: - new_room_name = self.DEFAULT_ROOM_NAME - if not message: - message = self.DEFAULT_MESSAGE + Returns: a dict matching `ShutdownRoomResponse`. + """ + requester_user_id = params["requester_user_id"] + new_room_user_id = params["new_room_user_id"] + block = params["block"] + + new_room_name = ( + params["new_room_name"] + if params["new_room_name"] + else self.DEFAULT_ROOM_NAME + ) + message = params["message"] if params["message"] else self.DEFAULT_MESSAGE if not RoomID.is_valid(room_id): raise SynapseError(400, "%s is not a legal room ID" % (room_id,)) @@ -1861,22 +1875,33 @@ class RoomShutdownHandler: 403, "Shutdown of this room is forbidden", Codes.FORBIDDEN ) + result = ( + result + if result + else { + "kicked_users": [], + "failed_to_kick_users": [], + "local_aliases": [], + "new_room_id": None, + } + ) + # Action the block first (even if the room doesn't exist yet) if block: + if requester_user_id is None: + raise ValueError( + "shutdown_room: block=True not allowed when requester_user_id is None." + ) # This will work even if the room is already blocked, but that is # desirable in case the first attempt at blocking the room failed below. await self.store.block_room(room_id, requester_user_id) if not await self.store.get_room(room_id): # if we don't know about the room, there is nothing left to do. - return { - "kicked_users": [], - "failed_to_kick_users": [], - "local_aliases": [], - "new_room_id": None, - } + return result - if new_room_user_id is not None: + new_room_id = result.get("new_room_id") + if new_room_user_id is not None and new_room_id is None: if not self.hs.is_mine_id(new_room_user_id): raise SynapseError( 400, "User must be our own: %s" % (new_room_user_id,) @@ -1896,6 +1921,10 @@ class RoomShutdownHandler: ratelimit=False, ) + result["new_room_id"] = new_room_id + if update_result_fct: + await update_result_fct(result) + logger.info( "Shutting down room %r, joining to new room: %r", room_id, new_room_id ) @@ -1909,12 +1938,9 @@ class RoomShutdownHandler: stream_id, ) else: - new_room_id = None logger.info("Shutting down room %r", room_id) users = await self.store.get_users_in_room(room_id) - kicked_users = [] - failed_to_kick_users = [] for user_id in users: if not self.hs.is_mine_id(user_id): continue @@ -1943,7 +1969,9 @@ class RoomShutdownHandler: stream_id, ) - await self.room_member_handler.forget(target_requester.user, room_id) + await self.room_member_handler.forget( + target_requester.user, room_id, do_not_schedule_purge=True + ) # Join users to new room if new_room_user_id: @@ -1958,15 +1986,23 @@ class RoomShutdownHandler: require_consent=False, ) - kicked_users.append(user_id) + result["kicked_users"].append(user_id) + if update_result_fct: + await update_result_fct(result) except Exception: logger.exception( "Failed to leave old room and join new room for %r", user_id ) - failed_to_kick_users.append(user_id) + result["failed_to_kick_users"].append(user_id) + if update_result_fct: + await update_result_fct(result) # Send message in new room and move aliases if new_room_user_id: + room_creator_requester = create_requester( + new_room_user_id, authenticated_entity=requester_user_id + ) + await self.event_creation_handler.create_and_send_nonmember_event( room_creator_requester, { @@ -1978,18 +2014,15 @@ class RoomShutdownHandler: ratelimit=False, ) - aliases_for_room = await self.store.get_aliases_for_room(room_id) + result["local_aliases"] = list( + await self.store.get_aliases_for_room(room_id) + ) assert new_room_id is not None await self.store.update_aliases_for_room( room_id, new_room_id, requester_user_id ) else: - aliases_for_room = [] + result["local_aliases"] = [] - return { - "kicked_users": kicked_users, - "failed_to_kick_users": failed_to_kick_users, - "local_aliases": list(aliases_for_room), - "new_room_id": new_room_id, - } + return result diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py
index de0f04e3fe..90343c2306 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py
@@ -37,13 +37,13 @@ from synapse.api.ratelimiting import Ratelimiter from synapse.event_auth import get_named_level, get_power_level_event from synapse.events import EventBase from synapse.events.snapshot import EventContext +from synapse.handlers.pagination import PURGE_ROOM_ACTION_NAME from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN from synapse.handlers.state_deltas import MatchChange, StateDeltasHandler from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME from synapse.logging import opentracing from synapse.metrics import event_processing_positions from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.module_api import NOT_SPAM from synapse.types import ( JsonDict, Requester, @@ -169,6 +169,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): self.request_ratelimiter = hs.get_request_ratelimiter() hs.get_notifier().add_new_join_in_room_callback(self._on_user_joined_room) + self._forgotten_room_retention_period = ( + hs.config.server.forgotten_room_retention_period + ) + def _on_user_joined_room(self, event_id: str, room_id: str) -> None: """Notify the rate limiter that a room join has occurred. @@ -278,7 +282,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): """ raise NotImplementedError() - async def forget(self, user: UserID, room_id: str) -> None: + async def forget( + self, user: UserID, room_id: str, do_not_schedule_purge: bool = False + ) -> None: user_id = user.to_string() member = await self._storage_controllers.state.get_current_state_event( @@ -298,6 +304,20 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # the table `current_state_events` and `get_current_state_events` is `None`. await self.store.forget(user_id, room_id) + # If everyone locally has left the room, then there is no reason for us to keep the + # room around and we automatically purge room after a little bit + if ( + not do_not_schedule_purge + and self._forgotten_room_retention_period + and await self.store.is_locally_forgotten_room(room_id) + ): + await self.hs.get_task_scheduler().schedule_task( + PURGE_ROOM_ACTION_NAME, + resource_id=room_id, + timestamp=self.clock.time_msec() + + self._forgotten_room_retention_period, + ) + async def ratelimit_multiple_invites( self, requester: Optional[Requester], @@ -804,7 +824,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): spam_check = await self._spam_checker_module_callbacks.user_may_invite( requester.user.to_string(), target_id, room_id ) - if spam_check != NOT_SPAM: + if spam_check != self._spam_checker_module_callbacks.NOT_SPAM: logger.info("Blocking invite due to spam checker") block_invite_result = spam_check @@ -939,7 +959,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): target.to_string(), room_id, is_invited=inviter is not None ) ) - if spam_check != NOT_SPAM: + if spam_check != self._spam_checker_module_callbacks.NOT_SPAM: raise SynapseError( 403, "Not allowed to join this room", @@ -1557,7 +1577,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id=room_id, ) ) - if spam_check != NOT_SPAM: + if spam_check != self._spam_checker_module_callbacks.NOT_SPAM: raise SynapseError( 403, "Cannot send threepid invite", diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py
index 05e21509de..657d9b3559 100644 --- a/synapse/handlers/send_email.py +++ b/synapse/handlers/send_email.py
@@ -17,7 +17,7 @@ import logging from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from io import BytesIO -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Dict, Optional from pkg_resources import parse_version @@ -151,6 +151,7 @@ class SendEmailHandler: app_name: str, html: str, text: str, + additional_headers: Optional[Dict[str, str]] = None, ) -> None: """Send a multipart email with the given information. @@ -160,6 +161,7 @@ class SendEmailHandler: app_name: The app name to include in the From header. html: The HTML content to include in the email. text: The plain text content to include in the email. + additional_headers: A map of additional headers to include. """ try: from_string = self._from % {"app": app_name} @@ -172,8 +174,8 @@ class SendEmailHandler: if raw_to == "": raise RuntimeError("Invalid 'to' address") - html_part = MIMEText(html, "html", "utf8") - text_part = MIMEText(text, "plain", "utf8") + html_part = MIMEText(html, "html", "utf-8") + text_part = MIMEText(text, "plain", "utf-8") multipart_msg = MIMEMultipart("alternative") multipart_msg["Subject"] = subject @@ -181,6 +183,7 @@ class SendEmailHandler: multipart_msg["To"] = email_address multipart_msg["Date"] = email.utils.formatdate() multipart_msg["Message-ID"] = email.utils.make_msgid() + # Discourage automatic responses to Synapse's emails. # Per RFC 3834, automatic responses should not be sent if the "Auto-Submitted" # header is present with any value other than "no". See @@ -194,6 +197,11 @@ class SendEmailHandler: # https://stackoverflow.com/a/25324691/5252017 # https://stackoverflow.com/a/61646381/5252017 multipart_msg["X-Auto-Response-Suppress"] = "All" + + if additional_headers: + for header, value in additional_headers.items(): + multipart_msg[header] = value + multipart_msg.attach(text_part) multipart_msg.attach(html_part) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 60a9f341b5..1a4d394eda 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py
@@ -40,6 +40,7 @@ from synapse.api.filtering import FilterCollection from synapse.api.presence import UserPresenceState from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import EventBase +from synapse.handlers.device import DELETE_DEVICE_MSGS_TASK_NAME from synapse.handlers.relations import BundledAggregations from synapse.logging import issue9533_logger from synapse.logging.context import current_context @@ -56,6 +57,7 @@ from synapse.storage.roommember import MemberSummary from synapse.types import ( DeviceListUpdates, JsonDict, + JsonMapping, MutableStateMap, Requester, RoomStreamToken, @@ -268,6 +270,7 @@ class SyncHandler: self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state self._device_handler = hs.get_device_handler() + self._task_scheduler = hs.get_task_scheduler() self.should_calculate_push_rules = hs.config.push.enable_push @@ -360,13 +363,36 @@ class SyncHandler: # (since we now know that the device has received them) if since_token is not None: since_stream_id = since_token.to_device_key + # Fast path: delete a limited number of to-device messages up front. + # We do this to avoid the overhead of scheduling a task for every + # sync. + device_deletion_limit = 100 deleted = await self.store.delete_messages_for_device( - sync_config.user.to_string(), sync_config.device_id, since_stream_id + sync_config.user.to_string(), + sync_config.device_id, + since_stream_id, + limit=device_deletion_limit, ) logger.debug( "Deleted %d to-device messages up to %d", deleted, since_stream_id ) + # If we hit the limit, schedule a background task to delete the rest. + if deleted >= device_deletion_limit: + await self._task_scheduler.schedule_task( + DELETE_DEVICE_MSGS_TASK_NAME, + resource_id=sync_config.device_id, + params={ + "user_id": sync_config.user.to_string(), + "device_id": sync_config.device_id, + "up_to_stream_id": since_stream_id, + }, + ) + logger.debug( + "Deletion of to-device messages up to %d scheduled", + since_stream_id, + ) + if timeout == 0 or since_token is None or full_state: # we are going to return immediately, so don't bother calling # notifier.wait_for_events. @@ -1768,19 +1794,23 @@ class SyncHandler: ) if push_rules_changed: - global_account_data = dict(global_account_data) - global_account_data[ - AccountDataTypes.PUSH_RULES - ] = await self._push_rules_handler.push_rules_for_user(sync_config.user) + global_account_data = { + AccountDataTypes.PUSH_RULES: await self._push_rules_handler.push_rules_for_user( + sync_config.user + ), + **global_account_data, + } else: all_global_account_data = await self.store.get_global_account_data_for_user( user_id ) - global_account_data = dict(all_global_account_data) - global_account_data[ - AccountDataTypes.PUSH_RULES - ] = await self._push_rules_handler.push_rules_for_user(sync_config.user) + global_account_data = { + AccountDataTypes.PUSH_RULES: await self._push_rules_handler.push_rules_for_user( + sync_config.user + ), + **all_global_account_data, + } account_data_for_user = ( await sync_config.filter_collection.filter_global_account_data( @@ -1884,7 +1914,7 @@ class SyncHandler: blocks_all_rooms or sync_result_builder.sync_config.filter_collection.blocks_all_room_account_data() ): - account_data_by_room: Mapping[str, Mapping[str, JsonDict]] = {} + account_data_by_room: Mapping[str, Mapping[str, JsonMapping]] = {} elif since_token and not sync_result_builder.full_state: account_data_by_room = ( await self.store.get_updated_room_account_data_for_user( @@ -2324,8 +2354,8 @@ class SyncHandler: sync_result_builder: "SyncResultBuilder", room_builder: "RoomSyncResultBuilder", ephemeral: List[JsonDict], - tags: Optional[Mapping[str, Mapping[str, Any]]], - account_data: Mapping[str, JsonDict], + tags: Optional[Mapping[str, JsonMapping]], + account_data: Mapping[str, JsonMapping], always_include: bool = False, ) -> None: """Populates the `joined` and `archived` section of `sync_result_builder` diff --git a/synapse/http/client.py b/synapse/http/client.py
index ca2cdbc6e2..c750e03b36 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py
@@ -78,7 +78,7 @@ from synapse.http.replicationagent import ReplicationAgent from synapse.http.types import QueryParams from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag, start_active_span, tags -from synapse.types import ISynapseReactor +from synapse.types import ISynapseReactor, StrSequence from synapse.util import json_decoder from synapse.util.async_helpers import timeout_deferred @@ -108,10 +108,9 @@ RawHeaders = Union[Mapping[str, "RawHeaderValue"], Mapping[bytes, "RawHeaderValu # the value actually has to be a List, but List is invariant so we can't specify that # the entries can either be Lists or bytes. RawHeaderValue = Union[ - List[str], + StrSequence, List[bytes], List[Union[str, bytes]], - Tuple[str, ...], Tuple[bytes, ...], Tuple[Union[str, bytes], ...], ] diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py
index 91a24efcd0..a3a396bb37 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py
@@ -399,15 +399,34 @@ class MatrixHostnameEndpoint: if port or _is_ip_literal(host): return [Server(host, port or 8448)] + # Check _matrix-fed._tcp SRV record. logger.debug("Looking up SRV record for %s", host.decode(errors="replace")) + server_list = await self._srv_resolver.resolve_service( + b"_matrix-fed._tcp." + host + ) + + if server_list: + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Got %s from SRV lookup for %s", + ", ".join(map(str, server_list)), + host.decode(errors="replace"), + ) + return server_list + + # No _matrix-fed._tcp SRV record, fallback to legacy _matrix._tcp SRV record. + logger.debug( + "Looking up deprecated SRV record for %s", host.decode(errors="replace") + ) server_list = await self._srv_resolver.resolve_service(b"_matrix._tcp." + host) if server_list: - logger.debug( - "Got %s from SRV lookup for %s", - ", ".join(map(str, server_list)), - host.decode(errors="replace"), - ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Got %s from deprecated SRV lookup for %s", + ", ".join(map(str, server_list)), + host.decode(errors="replace"), + ) return server_list # No SRV records, so we fallback to host and 8448 diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py
index fc62793628..5d79d31579 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py
@@ -18,7 +18,6 @@ import logging from http import HTTPStatus from typing import ( TYPE_CHECKING, - Iterable, List, Mapping, Optional, @@ -38,7 +37,7 @@ from twisted.web.server import Request from synapse.api.errors import Codes, SynapseError from synapse.http import redact_uri from synapse.http.server import HttpServer -from synapse.types import JsonDict, RoomAlias, RoomID +from synapse.types import JsonDict, RoomAlias, RoomID, StrCollection from synapse.util import json_decoder if TYPE_CHECKING: @@ -340,7 +339,7 @@ def parse_string( name: str, default: str, *, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> str: ... @@ -352,7 +351,7 @@ def parse_string( name: str, *, required: Literal[True], - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> str: ... @@ -365,7 +364,7 @@ def parse_string( *, default: Optional[str] = None, required: bool = False, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> Optional[str]: ... @@ -376,7 +375,7 @@ def parse_string( name: str, default: Optional[str] = None, required: bool = False, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> Optional[str]: """ @@ -485,7 +484,7 @@ def parse_enum( def _parse_string_value( value: bytes, - allowed_values: Optional[Iterable[str]], + allowed_values: Optional[StrCollection], name: str, encoding: str, ) -> str: @@ -511,7 +510,7 @@ def parse_strings_from_args( args: Mapping[bytes, Sequence[bytes]], name: str, *, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> Optional[List[str]]: ... @@ -523,7 +522,7 @@ def parse_strings_from_args( name: str, default: List[str], *, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> List[str]: ... @@ -535,7 +534,7 @@ def parse_strings_from_args( name: str, *, required: Literal[True], - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> List[str]: ... @@ -548,7 +547,7 @@ def parse_strings_from_args( default: Optional[List[str]] = None, *, required: bool = False, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> Optional[List[str]]: ... @@ -559,7 +558,7 @@ def parse_strings_from_args( name: str, default: Optional[List[str]] = None, required: bool = False, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> Optional[List[str]]: """ @@ -610,7 +609,7 @@ def parse_string_from_args( name: str, default: Optional[str] = None, *, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> Optional[str]: ... @@ -623,7 +622,7 @@ def parse_string_from_args( default: Optional[str] = None, *, required: Literal[True], - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> str: ... @@ -635,7 +634,7 @@ def parse_string_from_args( name: str, default: Optional[str] = None, required: bool = False, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> Optional[str]: ... @@ -646,7 +645,7 @@ def parse_string_from_args( name: str, default: Optional[str] = None, required: bool = False, - allowed_values: Optional[Iterable[str]] = None, + allowed_values: Optional[StrCollection] = None, encoding: str = "ascii", ) -> Optional[str]: """ @@ -821,7 +820,7 @@ def parse_and_validate_json_object_from_request( return validate_json_object(content, model_type) -def assert_params_in_dict(body: JsonDict, required: Iterable[str]) -> None: +def assert_params_in_dict(body: JsonDict, required: StrCollection) -> None: absent = [] for k in required: if k not in body: diff --git a/synapse/logging/context.py b/synapse/logging/context.py
index 64c6ae4512..bf7e311026 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py
@@ -728,7 +728,7 @@ async def _unwrap_awaitable(awaitable: Awaitable[R]) -> R: @overload -def preserve_fn( # type: ignore[misc] +def preserve_fn( f: Callable[P, Awaitable[R]], ) -> Callable[P, "defer.Deferred[R]"]: # The `type: ignore[misc]` above suppresses @@ -756,7 +756,7 @@ def preserve_fn( @overload -def run_in_background( # type: ignore[misc] +def run_in_background( f: Callable[P, Awaitable[R]], *args: P.args, **kwargs: P.kwargs ) -> "defer.Deferred[R]": # The `type: ignore[misc]` above suppresses diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 5c3045e197..4454fe29a5 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py
@@ -991,11 +991,7 @@ def trace_with_opname( if not opentracing: return func - # type-ignore: mypy seems to be confused by the ParamSpecs here. - # I think the problem is https://github.com/python/mypy/issues/12909 - return _custom_sync_async_decorator( - func, _wrapping_logic # type: ignore[arg-type] - ) + return _custom_sync_async_decorator(func, _wrapping_logic) return _decorator @@ -1040,9 +1036,7 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]: set_tag(SynapseTags.FUNC_KWARGS, str(kwargs)) yield - # type-ignore: mypy seems to be confused by the ParamSpecs here. - # I think the problem is https://github.com/python/mypy/issues/12909 - return _custom_sync_async_decorator(func, _wrapping_logic) # type: ignore[arg-type] + return _custom_sync_async_decorator(func, _wrapping_logic) @contextlib.contextmanager diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py
index 70b32cee17..9b5a3dd5f4 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py
@@ -846,9 +846,7 @@ def _is_media(content_type: str) -> bool: def _is_html(content_type: str) -> bool: content_type = content_type.lower() - return content_type.startswith("text/html") or content_type.startswith( - "application/xhtml" - ) + return content_type.startswith(("text/html", "application/xhtml")) def _is_json(content_type: str) -> bool: diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py
index 39fc629937..3cf2fbc3e2 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py
@@ -25,7 +25,6 @@ from typing import ( Iterable, Mapping, Optional, - Sequence, Set, Tuple, Type, @@ -49,6 +48,7 @@ import synapse.metrics._reactor_metrics # noqa: F401 from synapse.metrics._gc import MIN_TIME_BETWEEN_GCS, install_gc_manager from synapse.metrics._twisted_exposition import MetricsResource, generate_latest from synapse.metrics._types import Collector +from synapse.types import StrSequence from synapse.util import SYNAPSE_VERSION logger = logging.getLogger(__name__) @@ -81,7 +81,7 @@ class LaterGauge(Collector): name: str desc: str - labels: Optional[Sequence[str]] = attr.ib(hash=False) + labels: Optional[StrSequence] = attr.ib(hash=False) # callback: should either return a value (if there are no labels for this metric), # or dict mapping from a label tuple to a value caller: Callable[ @@ -143,8 +143,8 @@ class InFlightGauge(Generic[MetricsEntry], Collector): self, name: str, desc: str, - labels: Sequence[str], - sub_metrics: Sequence[str], + labels: StrSequence, + sub_metrics: StrSequence, ): self.name = name self.desc = desc diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py
index 9ea4e23b31..fceb7a9f3c 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py
@@ -48,6 +48,9 @@ from synapse.metrics._types import Collector if TYPE_CHECKING: import resource + # Old versions don't have `LiteralString` + from typing_extensions import LiteralString + logger = logging.getLogger(__name__) @@ -191,7 +194,7 @@ R = TypeVar("R") def run_as_background_process( - desc: str, + desc: "LiteralString", func: Callable[..., Awaitable[Optional[R]]], *args: Any, bg_start_span: bool = True, @@ -259,7 +262,7 @@ P = ParamSpec("P") def wrap_as_background_process( - desc: str, + desc: "LiteralString", ) -> Callable[ [Callable[P, Awaitable[Optional[R]]]], Callable[P, "defer.Deferred[Optional[R]]"], @@ -322,13 +325,21 @@ class BackgroundProcessLoggingContext(LoggingContext): if instance_id is None: instance_id = id(self) super().__init__("%s-%s" % (name, instance_id)) - self._proc = _BackgroundProcess(name, self) + self._proc: Optional[_BackgroundProcess] = _BackgroundProcess(name, self) def start(self, rusage: "Optional[resource.struct_rusage]") -> None: """Log context has started running (again).""" super().start(rusage) + if self._proc is None: + logger.error( + "Background process re-entered without a proc: %s", + self.name, + stack_info=True, + ) + return + # We've become active again so we make sure we're in the list of active # procs. (Note that "start" here means we've become active, as opposed # to starting for the first time.) @@ -345,6 +356,14 @@ class BackgroundProcessLoggingContext(LoggingContext): super().__exit__(type, value, traceback) + if self._proc is None: + logger.error( + "Background process exited without a proc: %s", + self.name, + stack_info=True, + ) + return + # The background process has finished. We explicitly remove and manually # update the metrics here so that if nothing is scraping metrics the set # doesn't infinitely grow. @@ -352,3 +371,6 @@ class BackgroundProcessLoggingContext(LoggingContext): _background_processes_active_since_last_scrape.discard(self._proc) self._proc.update_metrics() + + # Set proc to None to break the reference cycle. + self._proc = None diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py
index 2f00a7ba20..65e2aca456 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py
@@ -572,7 +572,7 @@ class ModuleApi: Returns: UserInfo object if a user was found, otherwise None """ - return await self._store.get_userinfo_by_id(user_id) + return await self._store.get_user_by_id(user_id) async def get_user_by_req( self, @@ -1730,6 +1730,30 @@ class ModuleApi: room_alias_str = room_alias.to_string() if room_alias else None return room_id, room_alias_str + async def delete_room(self, room_id: str) -> None: + """ + Schedules the deletion of a room from Synapse's database. + + If the room is already being deleted, this method does nothing. + This method does not wait for the room to be deleted. + + Added in Synapse v1.89.0. + """ + # Future extensions to this method might want to e.g. allow use of `force_purge`. + # TODO In the future we should make sure this is persistent. + await self._hs.get_pagination_handler().start_shutdown_and_purge_room( + room_id, + { + "new_room_user_id": None, + "new_room_name": None, + "message": None, + "requester_user_id": None, + "block": False, + "purge": True, + "force_purge": False, + }, + ) + async def set_displayname( self, user_id: UserID, @@ -1865,7 +1889,7 @@ class AccountDataManager: raise TypeError(f"new_data must be a dict; got {type(new_data).__name__}") # Ensure the user exists, so we don't just write to users that aren't there. - if await self._store.get_userinfo_by_id(user_id) is None: + if await self._store.get_user_by_id(user_id) is None: raise ValueError(f"User {user_id} does not exist on this server.") await self._handler.add_account_data_for_user(user_id, data_type, new_data) diff --git a/synapse/module_api/callbacks/third_party_event_rules_callbacks.py b/synapse/module_api/callbacks/third_party_event_rules_callbacks.py
index 911f37ba42..ecaeef3511 100644 --- a/synapse/module_api/callbacks/third_party_event_rules_callbacks.py +++ b/synapse/module_api/callbacks/third_party_event_rules_callbacks.py
@@ -40,7 +40,7 @@ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK = Callable[ [str, StateMap[EventBase], str], Awaitable[bool] ] ON_NEW_EVENT_CALLBACK = Callable[[EventBase, StateMap[EventBase]], Awaitable] -CHECK_CAN_SHUTDOWN_ROOM_CALLBACK = Callable[[str, str], Awaitable[bool]] +CHECK_CAN_SHUTDOWN_ROOM_CALLBACK = Callable[[Optional[str], str], Awaitable[bool]] CHECK_CAN_DEACTIVATE_USER_CALLBACK = Callable[[str, bool], Awaitable[bool]] ON_PROFILE_UPDATE_CALLBACK = Callable[[str, ProfileInfo, bool, bool], Awaitable] ON_USER_DEACTIVATION_STATUS_CHANGED_CALLBACK = Callable[[str, bool, bool], Awaitable] @@ -429,12 +429,17 @@ class ThirdPartyEventRulesModuleApiCallbacks: "Failed to run module API callback %s: %s", callback, e ) - async def check_can_shutdown_room(self, user_id: str, room_id: str) -> bool: + async def check_can_shutdown_room( + self, user_id: Optional[str], room_id: str + ) -> bool: """Intercept requests to shutdown a room. If `False` is returned, the room must not be shut down. Args: - requester: The ID of the user requesting the shutdown. + user_id: The ID of the user requesting the shutdown. + If no user ID is supplied, then the room is being shut down through + some mechanism other than a user's request, e.g. through a module's + request. room_id: The ID of the room. """ for callback in self._check_can_shutdown_room_callbacks: diff --git a/synapse/notifier.py b/synapse/notifier.py
index 68115bca70..fc39e5c963 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py
@@ -104,7 +104,7 @@ class _NotifierUserStream: def __init__( self, user_id: str, - rooms: Collection[str], + rooms: StrCollection, current_token: StreamToken, time_now_ms: int, ): @@ -457,7 +457,7 @@ class Notifier: stream_key: str, new_token: Union[int, RoomStreamToken], users: Optional[Collection[Union[str, UserID]]] = None, - rooms: Optional[Collection[str]] = None, + rooms: Optional[StrCollection] = None, ) -> None: """Used to inform listeners that something has happened event wise. @@ -529,7 +529,7 @@ class Notifier: user_id: str, timeout: int, callback: Callable[[StreamToken, StreamToken], Awaitable[T]], - room_ids: Optional[Collection[str]] = None, + room_ids: Optional[StrCollection] = None, from_token: StreamToken = StreamToken.START, ) -> T: """Wait until the callback returns a non empty response or the diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py
index 79e0627b6a..b6cad18c2d 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py
@@ -298,20 +298,26 @@ class Mailer: notifs_by_room, state_by_room, notif_events, reason ) + unsubscribe_link = self._make_unsubscribe_link(user_id, app_id, email_address) + template_vars: TemplateVars = { "user_display_name": user_display_name, - "unsubscribe_link": self._make_unsubscribe_link( - user_id, app_id, email_address - ), + "unsubscribe_link": unsubscribe_link, "summary_text": summary_text, "rooms": rooms, "reason": reason, } - await self.send_email(email_address, summary_text, template_vars) + await self.send_email( + email_address, summary_text, template_vars, unsubscribe_link + ) async def send_email( - self, email_address: str, subject: str, extra_template_vars: TemplateVars + self, + email_address: str, + subject: str, + extra_template_vars: TemplateVars, + unsubscribe_link: Optional[str] = None, ) -> None: """Send an email with the given information and template text""" template_vars: TemplateVars = { @@ -330,6 +336,23 @@ class Mailer: app_name=self.app_name, html=html_text, text=plain_text, + # Include the List-Unsubscribe header which some clients render in the UI. + # Per RFC 2369, this can be a URL or mailto URL. See + # https://www.rfc-editor.org/rfc/rfc2369.html#section-3.2 + # + # It is preferred to use email, but Synapse doesn't support incoming email. + # + # Also include the List-Unsubscribe-Post header from RFC 8058. See + # https://www.rfc-editor.org/rfc/rfc8058.html#section-3.1 + # + # Note that many email clients will not render the unsubscribe link + # unless DKIM, etc. is properly setup. + additional_headers={ + "List-Unsubscribe-Post": "List-Unsubscribe=One-Click", + "List-Unsubscribe": f"<{unsubscribe_link}>", + } + if unsubscribe_link + else None, ) async def _get_room_vars( diff --git a/synapse/replication/http/devices.py b/synapse/replication/http/devices.py
index 209833d287..b8198e059c 100644 --- a/synapse/replication/http/devices.py +++ b/synapse/replication/http/devices.py
@@ -20,7 +20,7 @@ from twisted.web.server import Request from synapse.http.server import HttpServer from synapse.logging.opentracing import active_span from synapse.replication.http._base import ReplicationEndpoint -from synapse.types import JsonDict +from synapse.types import JsonDict, JsonMapping if TYPE_CHECKING: from synapse.server import HomeServer @@ -82,7 +82,7 @@ class ReplicationMultiUserDevicesResyncRestServlet(ReplicationEndpoint): async def _handle_request( # type: ignore[override] self, request: Request, content: JsonDict - ) -> Tuple[int, Dict[str, Optional[JsonDict]]]: + ) -> Tuple[int, Dict[str, Optional[JsonMapping]]]: user_ids: List[str] = content["user_ids"] logger.info("Resync for %r", user_ids) diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py
index d9045d7b73..b668bb5da1 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py
@@ -644,7 +644,7 @@ class ReplicationCommandHandler: [stream.parse_row(row) for row in rows], ) - logger.info("Caught up with stream '%s' to %i", stream_name, cmd.new_token) + logger.info("Caught up with stream '%s' to %i", stream_name, cmd.new_token) # We've now caught up to position sent to us, notify handler. await self._replication_data_handler.on_position( @@ -672,14 +672,12 @@ class ReplicationCommandHandler: cmd.instance_name, cmd.lock_name, cmd.lock_key ) - async def on_NEW_ACTIVE_TASK( + def on_NEW_ACTIVE_TASK( self, conn: IReplicationConnection, cmd: NewActiveTaskCommand ) -> None: """Called when get a new NEW_ACTIVE_TASK command.""" if self._task_scheduler: - task = await self._task_scheduler.get_task(cmd.data) - if task: - await self._task_scheduler._launch_task(task) + self._task_scheduler.launch_task_by_id(cmd.data) def new_connection(self, connection: IReplicationConnection) -> None: """Called when we have a new connection.""" diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py
index 347467d863..1d9a29d22e 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py
@@ -191,7 +191,12 @@ class ReplicationStreamer: if updates: logger.info( - "Streaming: %s -> %s", stream.NAME, updates[-1][0] + "Streaming: %s -> %s (limited: %s, updates: %s, max token: %s)", + stream.NAME, + updates[-1][0], + limited, + len(updates), + current_token, ) stream_updates_counter.labels(stream.NAME).inc(len(updates)) diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py
index df0845edb2..1be9c47c61 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py
@@ -123,7 +123,7 @@ class ClientRestResource(JsonResource): if is_main_process: report_event.register_servlets(hs, client_resource) openid.register_servlets(hs, client_resource) - notifications.register_servlets(hs, client_resource) + notifications.register_servlets(hs, client_resource) devices.register_servlets(hs, client_resource) if is_main_process: thirdparty.register_servlets(hs, client_resource) diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py
index 0d42c89ff7..7d0b4b55a0 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py
@@ -21,6 +21,7 @@ from http import HTTPStatus from typing import TYPE_CHECKING, Optional, Tuple from synapse.api.errors import Codes, NotFoundError, SynapseError +from synapse.handlers.pagination import PURGE_HISTORY_ACTION_NAME from synapse.http.server import HttpServer, JsonResource from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest @@ -93,7 +94,7 @@ from synapse.rest.admin.users import ( UserTokenRestServlet, WhoisRestServlet, ) -from synapse.types import JsonDict, RoomStreamToken +from synapse.types import JsonDict, RoomStreamToken, TaskStatus from synapse.util import SYNAPSE_VERSION if TYPE_CHECKING: @@ -196,7 +197,7 @@ class PurgeHistoryRestServlet(RestServlet): errcode=Codes.BAD_JSON, ) - purge_id = self.pagination_handler.start_purge_history( + purge_id = await self.pagination_handler.start_purge_history( room_id, token, delete_local_events=delete_local_events ) @@ -215,11 +216,20 @@ class PurgeHistoryStatusRestServlet(RestServlet): ) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) - purge_status = self.pagination_handler.get_purge_status(purge_id) - if purge_status is None: + purge_task = await self.pagination_handler.get_delete_task(purge_id) + if purge_task is None or purge_task.action != PURGE_HISTORY_ACTION_NAME: raise NotFoundError("purge id '%s' not found" % purge_id) - return HTTPStatus.OK, purge_status.asdict() + result: JsonDict = { + "status": purge_task.status + if purge_task.status == TaskStatus.COMPLETE + or purge_task.status == TaskStatus.FAILED + else "active", + } + if purge_task.error: + result["error"] = purge_task.error + + return HTTPStatus.OK, result ######################################################################################## diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py
index 1d65560265..436718c8b2 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py
@@ -19,6 +19,10 @@ from urllib import parse as urlparse from synapse.api.constants import Direction, EventTypes, JoinRules, Membership from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.api.filtering import Filter +from synapse.handlers.pagination import ( + PURGE_ROOM_ACTION_NAME, + SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME, +) from synapse.http.servlet import ( ResolveRoomIdMixin, RestServlet, @@ -36,7 +40,7 @@ from synapse.rest.admin._base import ( ) from synapse.storage.databases.main.room import RoomSortOrder from synapse.streams.config import PaginationConfig -from synapse.types import JsonDict, RoomID, UserID, create_requester +from synapse.types import JsonDict, RoomID, ScheduledTask, UserID, create_requester from synapse.types.state import StateFilter from synapse.util import json_decoder @@ -117,20 +121,30 @@ class RoomRestV2Servlet(RestServlet): 403, "Shutdown of this room is forbidden", Codes.FORBIDDEN ) - delete_id = self._pagination_handler.start_shutdown_and_purge_room( + delete_id = await self._pagination_handler.start_shutdown_and_purge_room( room_id=room_id, - new_room_user_id=content.get("new_room_user_id"), - new_room_name=content.get("room_name"), - message=content.get("message"), - requester_user_id=requester.user.to_string(), - block=block, - purge=purge, - force_purge=force_purge, + shutdown_params={ + "new_room_user_id": content.get("new_room_user_id"), + "new_room_name": content.get("room_name"), + "message": content.get("message"), + "requester_user_id": requester.user.to_string(), + "block": block, + "purge": purge, + "force_purge": force_purge, + }, ) return HTTPStatus.OK, {"delete_id": delete_id} +def _convert_delete_task_to_response(task: ScheduledTask) -> JsonDict: + return { + "delete_id": task.id, + "status": task.status, + "shutdown_room": task.result, + } + + class DeleteRoomStatusByRoomIdRestServlet(RestServlet): """Get the status of the delete room background task.""" @@ -150,21 +164,16 @@ class DeleteRoomStatusByRoomIdRestServlet(RestServlet): HTTPStatus.BAD_REQUEST, "%s is not a legal room ID" % (room_id,) ) - delete_ids = self._pagination_handler.get_delete_ids_by_room(room_id) - if delete_ids is None: - raise NotFoundError("No delete task for room_id '%s' found" % room_id) + delete_tasks = await self._pagination_handler.get_delete_tasks_by_room(room_id) - response = [] - for delete_id in delete_ids: - delete = self._pagination_handler.get_delete_status(delete_id) - if delete: - response += [ - { - "delete_id": delete_id, - **delete.asdict(), - } - ] - return HTTPStatus.OK, {"results": cast(JsonDict, response)} + if delete_tasks: + return HTTPStatus.OK, { + "results": [ + _convert_delete_task_to_response(task) for task in delete_tasks + ], + } + else: + raise NotFoundError("No delete task for room_id '%s' found" % room_id) class DeleteRoomStatusByDeleteIdRestServlet(RestServlet): @@ -181,11 +190,14 @@ class DeleteRoomStatusByDeleteIdRestServlet(RestServlet): ) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self._auth, request) - delete_status = self._pagination_handler.get_delete_status(delete_id) - if delete_status is None: + delete_task = await self._pagination_handler.get_delete_task(delete_id) + if delete_task is None or ( + delete_task.action != PURGE_ROOM_ACTION_NAME + and delete_task.action != SHUTDOWN_AND_PURGE_ROOM_ACTION_NAME + ): raise NotFoundError("delete id '%s' not found" % delete_id) - return HTTPStatus.OK, cast(JsonDict, delete_status.asdict()) + return HTTPStatus.OK, _convert_delete_task_to_response(delete_task) class ListRoomRestServlet(RestServlet): @@ -349,11 +361,15 @@ class RoomRestServlet(RestServlet): ret = await room_shutdown_handler.shutdown_room( room_id=room_id, - new_room_user_id=content.get("new_room_user_id"), - new_room_name=content.get("room_name"), - message=content.get("message"), - requester_user_id=requester.user.to_string(), - block=block, + params={ + "new_room_user_id": content.get("new_room_user_id"), + "new_room_name": content.get("room_name"), + "message": content.get("message"), + "requester_user_id": requester.user.to_string(), + "block": block, + "purge": purge, + "force_purge": force_purge, + }, ) # Purge room diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py
index 91898a5c13..5b743a1d03 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py
@@ -39,7 +39,7 @@ from synapse.rest.admin._base import ( from synapse.rest.client._base import client_patterns from synapse.storage.databases.main.registration import ExternalIDReuseException from synapse.storage.databases.main.stats import UserSortOrder -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict, JsonMapping, UserID if TYPE_CHECKING: from synapse.server import HomeServer @@ -66,6 +66,7 @@ class UsersRestServletV2(RestServlet): The parameter `deactivated` can be used to include deactivated users. The parameter `order_by` can be used to order the result. The parameter `not_user_type` can be used to exclude certain user types. + The parameter `locked` can be used to include locked users. Possible values are `bot`, `support` or "empty string". "empty string" here means to exclude users without a type. """ @@ -107,8 +108,9 @@ class UsersRestServletV2(RestServlet): "The guests parameter is not supported when MSC3861 is enabled.", errcode=Codes.INVALID_PARAM, ) - deactivated = parse_boolean(request, "deactivated", default=False) + deactivated = parse_boolean(request, "deactivated", default=False) + locked = parse_boolean(request, "locked", default=False) admins = parse_boolean(request, "admins") # If support for MSC3866 is not enabled, apply no filtering based on the @@ -133,6 +135,7 @@ class UsersRestServletV2(RestServlet): UserSortOrder.SHADOW_BANNED.value, UserSortOrder.CREATION_TS.value, UserSortOrder.LAST_SEEN_TS.value, + UserSortOrder.LOCKED.value, ), ) @@ -154,6 +157,7 @@ class UsersRestServletV2(RestServlet): direction, approved, not_user_types, + locked, ) # If support for MSC3866 is not enabled, don't show the approval flag. @@ -211,7 +215,7 @@ class UserRestServletV2(RestServlet): async def on_GET( self, request: SynapseRequest, user_id: str - ) -> Tuple[int, JsonDict]: + ) -> Tuple[int, JsonMapping]: await assert_requester_is_admin(self.auth, request) target_user = UserID.from_string(user_id) @@ -226,7 +230,7 @@ class UserRestServletV2(RestServlet): async def on_PUT( self, request: SynapseRequest, user_id: str - ) -> Tuple[int, JsonDict]: + ) -> Tuple[int, JsonMapping]: requester = await self.auth.get_user_by_req(request) await assert_user_is_admin(self.auth, requester) @@ -658,7 +662,7 @@ class WhoisRestServlet(RestServlet): async def on_GET( self, request: SynapseRequest, user_id: str - ) -> Tuple[int, JsonDict]: + ) -> Tuple[int, JsonMapping]: target_user = UserID.from_string(user_id) requester = await self.auth.get_user_by_req(request) diff --git a/synapse/rest/client/_base.py b/synapse/rest/client/_base.py
index 5c1c19e1f3..73c568ef75 100644 --- a/synapse/rest/client/_base.py +++ b/synapse/rest/client/_base.py
@@ -20,14 +20,14 @@ from typing import Any, Awaitable, Callable, Iterable, Pattern, Tuple, TypeVar, from synapse.api.errors import InteractiveAuthIncompleteError from synapse.api.urls import CLIENT_API_PREFIX -from synapse.types import JsonDict +from synapse.types import JsonDict, StrCollection logger = logging.getLogger(__name__) def client_patterns( path_regex: str, - releases: Iterable[str] = ("r0", "v3"), + releases: StrCollection = ("r0", "v3"), unstable: bool = True, v1: bool = False, ) -> Iterable[Pattern]: diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py
index 679ab9f266..49cd0805fd 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py
@@ -179,85 +179,81 @@ class PasswordRestServlet(RestServlet): # # In the second case, we require a password to confirm their identity. - requester = None - if self.auth.has_access_token(request): - requester = await self.auth.get_user_by_req(request) - try: + try: + requester = None + if self.auth.has_access_token(request): + requester = await self.auth.get_user_by_req(request) params, session_id = await self.auth_handler.validate_user_via_ui_auth( requester, request, - body.dict(exclude_unset=True), + body.dict(exclude_unset=True, exclude={"new_password"}), "modify your account password", ) - except InteractiveAuthIncompleteError as e: - # The user needs to provide more steps to complete auth, but - # they're not required to provide the password again. - # - # If a password is available now, hash the provided password and - # store it for later. - if new_password: - new_password_hash = await self.auth_handler.hash(new_password) - await self.auth_handler.set_session_data( - e.session_id, - UIAuthSessionDataConstants.PASSWORD_HASH, - new_password_hash, - ) - raise - user_id = requester.user.to_string() - else: - try: + user_id = requester.user.to_string() + else: result, params, session_id = await self.auth_handler.check_ui_auth( [[LoginType.EMAIL_IDENTITY]], request, - body.dict(exclude_unset=True), + body.dict(exclude_unset=True, exclude={"new_password"}), "modify your account password", ) - except InteractiveAuthIncompleteError as e: - # The user needs to provide more steps to complete auth, but - # they're not required to provide the password again. - # - # If a password is available now, hash the provided password and - # store it for later. - if new_password: - new_password_hash = await self.auth_handler.hash(new_password) - await self.auth_handler.set_session_data( - e.session_id, - UIAuthSessionDataConstants.PASSWORD_HASH, - new_password_hash, + + if LoginType.EMAIL_IDENTITY in result: + threepid = result[LoginType.EMAIL_IDENTITY] + if "medium" not in threepid or "address" not in threepid: + raise SynapseError(500, "Malformed threepid") + if threepid["medium"] == "email": + # For emails, canonicalise the address. + # We store all email addresses canonicalised in the DB. + # (See add_threepid in synapse/handlers/auth.py) + try: + threepid["address"] = validate_email(threepid["address"]) + except ValueError as e: + raise SynapseError(400, str(e)) + # if using email, we must know about the email they're authing with! + threepid_user_id = await self.datastore.get_user_id_by_threepid( + threepid["medium"], threepid["address"] ) + if not threepid_user_id: + raise SynapseError( + 404, "Email address not found", Codes.NOT_FOUND + ) + user_id = threepid_user_id + else: + logger.error("Auth succeeded but no known type! %r", result.keys()) + raise SynapseError(500, "", Codes.UNKNOWN) + + except InteractiveAuthIncompleteError as e: + # The user needs to provide more steps to complete auth, but + # they're not required to provide the password again. + # + # If a password is available now, hash the provided password and + # store it for later. We only do this if we don't already have the + # password hash stored, to avoid repeatedly hashing the password. + + if not new_password: raise - if LoginType.EMAIL_IDENTITY in result: - threepid = result[LoginType.EMAIL_IDENTITY] - if "medium" not in threepid or "address" not in threepid: - raise SynapseError(500, "Malformed threepid") - if threepid["medium"] == "email": - # For emails, canonicalise the address. - # We store all email addresses canonicalised in the DB. - # (See add_threepid in synapse/handlers/auth.py) - try: - threepid["address"] = validate_email(threepid["address"]) - except ValueError as e: - raise SynapseError(400, str(e)) - # if using email, we must know about the email they're authing with! - threepid_user_id = await self.datastore.get_user_id_by_threepid( - threepid["medium"], threepid["address"] - ) - if not threepid_user_id: - raise SynapseError(404, "Email address not found", Codes.NOT_FOUND) - user_id = threepid_user_id - else: - logger.error("Auth succeeded but no known type! %r", result.keys()) - raise SynapseError(500, "", Codes.UNKNOWN) + existing_session_password_hash = await self.auth_handler.get_session_data( + e.session_id, UIAuthSessionDataConstants.PASSWORD_HASH, None + ) + if existing_session_password_hash: + raise + + new_password_hash = await self.auth_handler.hash(new_password) + await self.auth_handler.set_session_data( + e.session_id, + UIAuthSessionDataConstants.PASSWORD_HASH, + new_password_hash, + ) + raise # If we have a password in this request, prefer it. Otherwise, use the # password hash from an earlier request. if new_password: password_hash: Optional[str] = await self.auth_handler.hash(new_password) elif session_id is not None: - password_hash = await self.auth_handler.get_session_data( - session_id, UIAuthSessionDataConstants.PASSWORD_HASH, None - ) + password_hash = existing_session_password_hash else: # UI validation was skipped, but the request did not include a new # password. diff --git a/synapse/rest/client/account_data.py b/synapse/rest/client/account_data.py
index b1f9e9dc9b..ce0c4e7742 100644 --- a/synapse/rest/client/account_data.py +++ b/synapse/rest/client/account_data.py
@@ -20,7 +20,7 @@ from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest -from synapse.types import JsonDict, RoomID +from synapse.types import JsonDict, JsonMapping, RoomID from ._base import client_patterns @@ -95,7 +95,7 @@ class AccountDataServlet(RestServlet): async def on_GET( self, request: SynapseRequest, user_id: str, account_data_type: str - ) -> Tuple[int, JsonDict]: + ) -> Tuple[int, JsonMapping]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot get account data for other users.") @@ -106,7 +106,7 @@ class AccountDataServlet(RestServlet): and account_data_type == AccountDataTypes.PUSH_RULES ): account_data: Optional[ - JsonDict + JsonMapping ] = await self._push_rules_handler.push_rules_for_user(requester.user) else: account_data = await self.store.get_global_account_data_by_type_for_user( @@ -236,7 +236,7 @@ class RoomAccountDataServlet(RestServlet): user_id: str, room_id: str, account_data_type: str, - ) -> Tuple[int, JsonDict]: + ) -> Tuple[int, JsonMapping]: requester = await self.auth.get_user_by_req(request) if user_id != requester.user.to_string(): raise AuthError(403, "Cannot get account data for other users.") @@ -253,7 +253,7 @@ class RoomAccountDataServlet(RestServlet): self._hs.config.experimental.msc4010_push_rules_account_data and account_data_type == AccountDataTypes.PUSH_RULES ): - account_data: Optional[JsonDict] = {} + account_data: Optional[JsonMapping] = {} else: account_data = await self.store.get_account_data_for_room_and_type( user_id, room_id, account_data_type diff --git a/synapse/rest/client/notifications.py b/synapse/rest/client/notifications.py
index ea10042569..e7fe1332e7 100644 --- a/synapse/rest/client/notifications.py +++ b/synapse/rest/client/notifications.py
@@ -36,6 +36,8 @@ logger = logging.getLogger(__name__) class NotificationsServlet(RestServlet): PATTERNS = client_patterns("/notifications$") + CATEGORY = "Client API requests" + def __init__(self, hs: "HomeServer"): super().__init__() self.store = hs.get_datastores().main diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py
index 1707e51972..15e4d56cdb 100644 --- a/synapse/rest/client/read_marker.py +++ b/synapse/rest/client/read_marker.py
@@ -84,7 +84,7 @@ class ReadMarkerRestServlet(RestServlet): await self.receipts_handler.received_client_receipt( room_id, receipt_type, - user_id=requester.user.to_string(), + user_id=requester.user, event_id=event_id, # Setting the thread ID is not possible with the /read_markers endpoint. thread_id=None, diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py
index 869a374459..814d075faf 100644 --- a/synapse/rest/client/receipts.py +++ b/synapse/rest/client/receipts.py
@@ -108,7 +108,7 @@ class ReceiptRestServlet(RestServlet): await self.receipts_handler.received_client_receipt( room_id, receipt_type, - user_id=requester.user.to_string(), + user_id=requester.user, event_id=event_id, thread_id=thread_id, ) diff --git a/synapse/rest/consent/consent_resource.py b/synapse/rest/consent/consent_resource.py
index 25f9ea285b..88d3ec1baf 100644 --- a/synapse/rest/consent/consent_resource.py +++ b/synapse/rest/consent/consent_resource.py
@@ -129,7 +129,7 @@ class ConsentResource(DirectServeHtmlResource): if u is None: raise NotFoundError("Unknown user") - has_consented = u["consent_version"] == version + has_consented = u.consent_version == version userhmac = userhmac_bytes.decode("ascii") try: diff --git a/synapse/rest/synapse/client/unsubscribe.py b/synapse/rest/synapse/client/unsubscribe.py
index 60321018f9..050fd7bba1 100644 --- a/synapse/rest/synapse/client/unsubscribe.py +++ b/synapse/rest/synapse/client/unsubscribe.py
@@ -38,6 +38,10 @@ class UnsubscribeResource(DirectServeHtmlResource): self.macaroon_generator = hs.get_macaroon_generator() async def _async_render_GET(self, request: SynapseRequest) -> None: + """ + Handle a user opening an unsubscribe link in the browser, either via an + HTML/Text email or via the List-Unsubscribe header. + """ token = parse_string(request, "access_token", required=True) app_id = parse_string(request, "app_id", required=True) pushkey = parse_string(request, "pushkey", required=True) @@ -62,3 +66,16 @@ class UnsubscribeResource(DirectServeHtmlResource): 200, UnsubscribeResource.SUCCESS_HTML, ) + + async def _async_render_POST(self, request: SynapseRequest) -> None: + """ + Handle a mail user agent POSTing to the unsubscribe URL via the + List-Unsubscribe & List-Unsubscribe-Post headers. + """ + + # TODO Assert that the body has a single field + + # Assert the body has form encoded key/value pair of + # List-Unsubscribe=One-Click. + + await self._async_render_GET(request) diff --git a/synapse/server_notices/consent_server_notices.py b/synapse/server_notices/consent_server_notices.py
index 94025ba41f..a879b6505e 100644 --- a/synapse/server_notices/consent_server_notices.py +++ b/synapse/server_notices/consent_server_notices.py
@@ -79,15 +79,15 @@ class ConsentServerNotices: if u is None: return - if u["is_guest"] and not self._send_to_guests: + if u.is_guest and not self._send_to_guests: # don't send to guests return - if u["consent_version"] == self._current_consent_version: + if u.consent_version == self._current_consent_version: # user has already consented return - if u["consent_server_notice_sent"] == self._current_consent_version: + if u.consent_server_notice_sent == self._current_consent_version: # we've already sent a notice to the user return diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 1b91cf5eaa..e977ed1044 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py
@@ -20,7 +20,6 @@ from typing import ( Any, Awaitable, Callable, - Collection, DefaultDict, Dict, FrozenSet, @@ -49,7 +48,7 @@ from synapse.logging.opentracing import tag_args, trace from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.types import StateMap +from synapse.types import StateMap, StrCollection from synapse.types.state import StateFilter from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache @@ -197,7 +196,7 @@ class StateHandler: async def compute_state_after_events( self, room_id: str, - event_ids: Collection[str], + event_ids: StrCollection, state_filter: Optional[StateFilter] = None, await_full_state: bool = True, ) -> StateMap[str]: @@ -231,7 +230,7 @@ class StateHandler: return await ret.get_state(self._state_storage_controller, state_filter) async def get_current_user_ids_in_room( - self, room_id: str, latest_event_ids: Collection[str] + self, room_id: str, latest_event_ids: StrCollection ) -> Set[str]: """ Get the users IDs who are currently in a room. @@ -256,7 +255,7 @@ class StateHandler: return await self.store.get_joined_user_ids_from_state(room_id, state) async def get_hosts_in_room_at_events( - self, room_id: str, event_ids: Collection[str] + self, room_id: str, event_ids: StrCollection ) -> FrozenSet[str]: """Get the hosts that were in a room at the given event ids @@ -470,7 +469,7 @@ class StateHandler: @trace @measure_func() async def resolve_state_groups_for_events( - self, room_id: str, event_ids: Collection[str], await_full_state: bool = True + self, room_id: str, event_ids: StrCollection, await_full_state: bool = True ) -> _StateCacheEntry: """Given a list of event_ids this method fetches the state at each event, resolves conflicts between them and returns them. @@ -882,7 +881,7 @@ class StateResolutionStore: store: "DataStore" def get_events( - self, event_ids: Collection[str], allow_rejected: bool = False + self, event_ids: StrCollection, allow_rejected: bool = False ) -> Awaitable[Dict[str, EventBase]]: """Get events from the database diff --git a/synapse/state/v1.py b/synapse/state/v1.py
index 500e384695..c76a2f082e 100644 --- a/synapse/state/v1.py +++ b/synapse/state/v1.py
@@ -17,7 +17,6 @@ import logging from typing import ( Awaitable, Callable, - Collection, Dict, Iterable, List, @@ -32,7 +31,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import AuthError from synapse.api.room_versions import RoomVersion from synapse.events import EventBase -from synapse.types import MutableStateMap, StateMap +from synapse.types import MutableStateMap, StateMap, StrCollection logger = logging.getLogger(__name__) @@ -45,7 +44,7 @@ async def resolve_events_with_store( room_version: RoomVersion, state_sets: Sequence[StateMap[str]], event_map: Optional[Dict[str, EventBase]], - state_map_factory: Callable[[Collection[str]], Awaitable[Dict[str, EventBase]]], + state_map_factory: Callable[[StrCollection], Awaitable[Dict[str, EventBase]]], ) -> StateMap[str]: """ Args: diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 44c49274a9..1752f95db8 100644 --- a/synapse/state/v2.py +++ b/synapse/state/v2.py
@@ -19,7 +19,6 @@ from typing import ( Any, Awaitable, Callable, - Collection, Dict, Generator, Iterable, @@ -39,7 +38,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import AuthError from synapse.api.room_versions import RoomVersion from synapse.events import EventBase -from synapse.types import MutableStateMap, StateMap +from synapse.types import MutableStateMap, StateMap, StrCollection logger = logging.getLogger(__name__) @@ -56,7 +55,7 @@ class StateResolutionStore(Protocol): # This is usually synapse.state.StateResolutionStore, but it's replaced with a # TestStateResolutionStore in tests. def get_events( - self, event_ids: Collection[str], allow_rejected: bool = False + self, event_ids: StrCollection, allow_rejected: bool = False ) -> Awaitable[Dict[str, EventBase]]: ... @@ -366,7 +365,7 @@ async def _get_auth_chain_difference( union = unpersisted_set_ids[0].union(*unpersisted_set_ids[1:]) intersection = unpersisted_set_ids[0].intersection(*unpersisted_set_ids[1:]) - auth_difference_unpersisted_part: Collection[str] = union - intersection + auth_difference_unpersisted_part: StrCollection = union - intersection else: auth_difference_unpersisted_part = () state_sets_ids = [set(state_set.values()) for state_set in state_sets] diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py
index 7619f405fa..99ebd96f84 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py
@@ -62,7 +62,6 @@ class Constraint(metaclass=abc.ABCMeta): @abc.abstractmethod def make_check_clause(self, table: str) -> str: """Returns an SQL expression that checks the row passes the constraint.""" - pass @abc.abstractmethod def make_constraint_clause_postgres(self) -> str: @@ -70,7 +69,6 @@ class Constraint(metaclass=abc.ABCMeta): Only used on Postgres DBs """ - pass @attr.s(auto_attribs=True) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index abd1d149db..f39ae2d635 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py
@@ -19,6 +19,7 @@ import logging from collections import deque from typing import ( TYPE_CHECKING, + AbstractSet, Any, Awaitable, Callable, @@ -154,12 +155,13 @@ class _UpdateCurrentStateTask: _EventPersistQueueTask = Union[_PersistEventsTask, _UpdateCurrentStateTask] +_PersistResult = TypeVar("_PersistResult") @attr.s(auto_attribs=True, slots=True) -class _EventPersistQueueItem: +class _EventPersistQueueItem(Generic[_PersistResult]): task: _EventPersistQueueTask - deferred: ObservableDeferred + deferred: ObservableDeferred[_PersistResult] parent_opentracing_span_contexts: List = attr.ib(factory=list) """A list of opentracing spans waiting for this batch""" @@ -168,9 +170,6 @@ class _EventPersistQueueItem: """The opentracing span under which the persistence actually happened""" -_PersistResult = TypeVar("_PersistResult") - - class _EventPeristenceQueue(Generic[_PersistResult]): """Queues up tasks so that they can be processed with only one concurrent transaction per room. @@ -620,7 +619,7 @@ class EventsPersistenceStorageController: ) for room_id, ev_ctx_rm in events_by_room.items(): - latest_event_ids = set( + latest_event_ids = ( await self.main_store.get_latest_event_ids_in_room(room_id) ) new_latest_event_ids = await self._calculate_new_extremities( @@ -742,7 +741,7 @@ class EventsPersistenceStorageController: self, room_id: str, event_contexts: List[Tuple[EventBase, EventContext]], - latest_event_ids: Collection[str], + latest_event_ids: AbstractSet[str], ) -> Set[str]: """Calculates the new forward extremities for a room given events to persist. @@ -760,8 +759,6 @@ class EventsPersistenceStorageController: and not event.internal_metadata.is_soft_failed() ] - latest_event_ids = set(latest_event_ids) - # start with the existing forward extremities result = set(latest_event_ids) @@ -800,7 +797,7 @@ class EventsPersistenceStorageController: self, room_id: str, events_context: List[Tuple[EventBase, EventContext]], - old_latest_event_ids: Set[str], + old_latest_event_ids: AbstractSet[str], new_latest_event_ids: Set[str], ) -> Tuple[Optional[StateMap[str]], Optional[StateMap[str]], Set[str]]: """Calculate the current state dict after adding some new events to diff --git a/synapse/storage/database.py b/synapse/storage/database.py
index 55ac313f33..697bc5651c 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py
@@ -422,10 +422,11 @@ class LoggingTransaction: return self._do_execute( # TODO: is it safe for values to be Iterable[Iterable[Any]] here? # https://www.psycopg.org/docs/extras.html?highlight=execute_batch#psycopg2.extras.execute_values says values should be Sequence[Sequence] - lambda the_sql: execute_values( - self.txn, the_sql, values, template=template, fetch=fetch + lambda the_sql, the_values: execute_values( + self.txn, the_sql, the_values, template=template, fetch=fetch ), sql, + values, ) def execute(self, sql: str, parameters: SQLQueryParameters = ()) -> None: @@ -1192,6 +1193,7 @@ class DatabasePool: keyvalues: Dict[str, Any], values: Dict[str, Any], insertion_values: Optional[Dict[str, Any]] = None, + where_clause: Optional[str] = None, desc: str = "simple_upsert", ) -> bool: """Insert a row with values + insertion_values; on conflict, update with values. @@ -1242,6 +1244,7 @@ class DatabasePool: keyvalues: The unique key columns and their new values values: The nonunique columns and their new values insertion_values: additional key/values to use only when inserting + where_clause: An index predicate to apply to the upsert. desc: description of the transaction, for logging and metrics Returns: Returns True if a row was inserted or updated (i.e. if `values` is @@ -1262,6 +1265,7 @@ class DatabasePool: keyvalues, values, insertion_values, + where_clause, db_autocommit=autocommit, ) except self.engine.module.IntegrityError as e: diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py
index 0836e247ef..101403578c 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py
@@ -175,6 +175,7 @@ class DataStore( direction: Direction = Direction.FORWARDS, approved: bool = True, not_user_types: Optional[List[str]] = None, + locked: bool = False, ) -> Tuple[List[JsonDict], int]: """Function to retrieve a paginated list of users from users list. This will return a json list of users and the @@ -194,6 +195,7 @@ class DataStore( direction: sort ascending or descending approved: whether to include approved users not_user_types: list of user types to exclude + locked: whether to include locked users Returns: A tuple of a list of mappings from user to information and a count of total users. """ @@ -226,6 +228,9 @@ class DataStore( if not deactivated: filters.append("deactivated = 0") + if not locked: + filters.append("locked IS FALSE") + if admins is not None: if admins: filters.append("admin = 1") @@ -290,7 +295,7 @@ class DataStore( sql = f""" SELECT name, user_type, is_guest, admin, deactivated, shadow_banned, displayname, avatar_url, creation_ts * 1000 as creation_ts, approved, - eu.user_id is not null as erased, last_seen_ts + eu.user_id is not null as erased, last_seen_ts, locked {sql_base} ORDER BY {order_by_column} {order}, u.name ASC LIMIT ? OFFSET ? diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py
index 8f7bdbc61a..80f146dd53 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py
@@ -43,7 +43,7 @@ from synapse.storage.util.id_generators import ( MultiWriterIdGenerator, StreamIdGenerator, ) -from synapse.types import JsonDict +from synapse.types import JsonDict, JsonMapping from synapse.util import json_encoder from synapse.util.caches.descriptors import cached from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -119,7 +119,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) @cached() async def get_global_account_data_for_user( self, user_id: str - ) -> Mapping[str, JsonDict]: + ) -> Mapping[str, JsonMapping]: """ Get all the global client account_data for a user. @@ -164,7 +164,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) @cached() async def get_room_account_data_for_user( self, user_id: str - ) -> Mapping[str, Mapping[str, JsonDict]]: + ) -> Mapping[str, Mapping[str, JsonMapping]]: """ Get all of the per-room client account_data for a user. @@ -213,7 +213,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) @cached(num_args=2, max_entries=5000, tree=True) async def get_global_account_data_by_type_for_user( self, user_id: str, data_type: str - ) -> Optional[JsonDict]: + ) -> Optional[JsonMapping]: """ Returns: The account data. @@ -265,7 +265,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) @cached(num_args=2, tree=True) async def get_account_data_for_room( self, user_id: str, room_id: str - ) -> Mapping[str, JsonDict]: + ) -> Mapping[str, JsonMapping]: """Get all the client account_data for a user for a room. Args: @@ -296,7 +296,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) @cached(num_args=3, max_entries=5000, tree=True) async def get_account_data_for_room_and_type( self, user_id: str, room_id: str, account_data_type: str - ) -> Optional[JsonDict]: + ) -> Optional[JsonMapping]: """Get the client account_data of given type for a user for a room. Args: @@ -394,7 +394,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) async def get_updated_global_account_data_for_user( self, user_id: str, stream_id: int - ) -> Dict[str, JsonDict]: + ) -> Mapping[str, JsonMapping]: """Get all the global account_data that's changed for a user. Args: diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py
index d8d333e11d..7da47c3dd7 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py
@@ -764,3 +764,14 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore, MonthlyActiveUsersWorke } return list(results.values()) + + async def get_last_seen_for_user_id(self, user_id: str) -> Optional[int]: + """Get the last seen timestamp for a user, if we have it.""" + + return await self.db_pool.simple_select_one_onecol( + table="user_ips", + keyvalues={"user_id": user_id}, + retcol="MAX(last_seen)", + allow_none=True, + desc="get_last_seen_for_user_id", + ) diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py
index b471fcb064..744e98c6d0 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py
@@ -349,7 +349,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): table="devices", column="user_id", iterable=user_ids_to_query, - keyvalues={"user_id": user_id, "hidden": False}, + keyvalues={"hidden": False}, retcols=("device_id",), ) @@ -445,13 +445,18 @@ class DeviceInboxWorkerStore(SQLBaseStore): @trace async def delete_messages_for_device( - self, user_id: str, device_id: Optional[str], up_to_stream_id: int + self, + user_id: str, + device_id: Optional[str], + up_to_stream_id: int, + limit: int, ) -> int: """ Args: user_id: The recipient user_id. device_id: The recipient device_id. up_to_stream_id: Where to delete messages up to. + limit: maximum number of messages to delete Returns: The number of messages deleted. @@ -472,12 +477,16 @@ class DeviceInboxWorkerStore(SQLBaseStore): log_kv({"message": "No changes in cache since last check"}) return 0 + ROW_ID_NAME = self.database_engine.row_id_name + def delete_messages_for_device_txn(txn: LoggingTransaction) -> int: - sql = ( - "DELETE FROM device_inbox" - " WHERE user_id = ? AND device_id = ?" - " AND stream_id <= ?" - ) + sql = f""" + DELETE FROM device_inbox WHERE {ROW_ID_NAME} IN ( + SELECT {ROW_ID_NAME} FROM device_inbox + WHERE user_id = ? AND device_id = ? AND stream_id <= ? + LIMIT {limit} + ) + """ txn.execute(sql, (user_id, device_id, up_to_stream_id)) return txn.rowcount @@ -487,6 +496,11 @@ class DeviceInboxWorkerStore(SQLBaseStore): log_kv({"message": f"deleted {count} messages for device", "count": count}) + # In this case we don't know if we hit the limit or the delete is complete + # so let's not update the cache. + if count == limit: + return count + # Update the cache, ensuring that we only ever increase the value updated_last_deleted_stream_id = self._last_device_delete_cache.get( (user_id, device_id), 0 diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py
index e4162f846b..70faf4b1ec 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py
@@ -759,18 +759,10 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): mapping of user_id -> device_id -> device_info. """ unique_user_ids = user_ids | {user_id for user_id, _ in user_and_device_ids} - user_map = await self.get_device_list_last_stream_id_for_remotes( - list(unique_user_ids) - ) - # We go and check if any of the users need to have their device lists - # resynced. If they do then we remove them from the cached list. - users_needing_resync = await self.get_user_ids_requiring_device_list_resync( + user_ids_in_cache = await self.get_users_whose_devices_are_cached( unique_user_ids ) - user_ids_in_cache = { - user_id for user_id, stream_id in user_map.items() if stream_id - } - users_needing_resync user_ids_not_in_cache = unique_user_ids - user_ids_in_cache # First fetch all the users which all devices are to be returned. @@ -792,6 +784,22 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): return user_ids_not_in_cache, results + async def get_users_whose_devices_are_cached( + self, user_ids: StrCollection + ) -> Set[str]: + """Checks which of the given users we have cached the devices for.""" + user_map = await self.get_device_list_last_stream_id_for_remotes(user_ids) + + # We go and check if any of the users need to have their device lists + # resynced. If they do then we remove them from the cached list. + users_needing_resync = await self.get_user_ids_requiring_device_list_resync( + user_ids + ) + user_ids_in_cache = { + user_id for user_id, stream_id in user_map.items() if stream_id + } - users_needing_resync + return user_ids_in_cache + @cached(num_args=2, tree=True) async def _get_cached_user_device(self, user_id: str, device_id: str) -> JsonDict: content = await self.db_pool.simple_select_one_onecol( @@ -1766,14 +1774,6 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): self.db_pool.simple_delete_many_txn( txn, - table="device_inbox", - column="device_id", - values=device_ids, - keyvalues={"user_id": user_id}, - ) - - self.db_pool.simple_delete_many_txn( - txn, table="device_auth_providers", column="device_id", values=device_ids, diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py
index fab7008a8f..afffa54985 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py
@@ -19,6 +19,7 @@ from typing import ( TYPE_CHECKING, Collection, Dict, + FrozenSet, Iterable, List, Optional, @@ -1179,13 +1180,14 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) @cached(max_entries=5000, iterable=True) - async def get_latest_event_ids_in_room(self, room_id: str) -> Sequence[str]: - return await self.db_pool.simple_select_onecol( + async def get_latest_event_ids_in_room(self, room_id: str) -> FrozenSet[str]: + event_ids = await self.db_pool.simple_select_onecol( table="event_forward_extremities", keyvalues={"room_id": room_id}, retcol="event_id", desc="get_latest_event_ids_in_room", ) + return frozenset(event_ids) async def get_min_depth(self, room_id: str) -> Optional[int]: """For the given room, get the minimum depth we have seen for it.""" diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index 07bda7d6be..b958a39aeb 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py
@@ -1740,42 +1740,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # We sleep to ensure that we don't overwhelm the DB. await self._clock.sleep(1.0) - -class EventPushActionsStore(EventPushActionsWorkerStore): - EPA_HIGHLIGHT_INDEX = "epa_highlight_index" - - def __init__( - self, - database: DatabasePool, - db_conn: LoggingDatabaseConnection, - hs: "HomeServer", - ): - super().__init__(database, db_conn, hs) - - self.db_pool.updates.register_background_index_update( - self.EPA_HIGHLIGHT_INDEX, - index_name="event_push_actions_u_highlight", - table="event_push_actions", - columns=["user_id", "stream_ordering"], - ) - - self.db_pool.updates.register_background_index_update( - "event_push_actions_highlights_index", - index_name="event_push_actions_highlights_index", - table="event_push_actions", - columns=["user_id", "room_id", "topological_ordering", "stream_ordering"], - where_clause="highlight=1", - ) - - # Add index to make deleting old push actions faster. - self.db_pool.updates.register_background_index_update( - "event_push_actions_stream_highlight_index", - index_name="event_push_actions_stream_highlight_index", - table="event_push_actions", - columns=["highlight", "stream_ordering"], - where_clause="highlight=0", - ) - async def get_push_actions_for_user( self, user_id: str, @@ -1834,6 +1798,42 @@ class EventPushActionsStore(EventPushActionsWorkerStore): ] +class EventPushActionsStore(EventPushActionsWorkerStore): + EPA_HIGHLIGHT_INDEX = "epa_highlight_index" + + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + self.db_pool.updates.register_background_index_update( + self.EPA_HIGHLIGHT_INDEX, + index_name="event_push_actions_u_highlight", + table="event_push_actions", + columns=["user_id", "stream_ordering"], + ) + + self.db_pool.updates.register_background_index_update( + "event_push_actions_highlights_index", + index_name="event_push_actions_highlights_index", + table="event_push_actions", + columns=["user_id", "room_id", "topological_ordering", "stream_ordering"], + where_clause="highlight=1", + ) + + # Add index to make deleting old push actions faster. + self.db_pool.updates.register_background_index_update( + "event_push_actions_stream_highlight_index", + index_name="event_push_actions_stream_highlight_index", + table="event_push_actions", + columns=["highlight", "stream_ordering"], + where_clause="highlight=0", + ) + + def _action_has_highlight(actions: Collection[Union[Mapping, str]]) -> bool: for action in actions: if not isinstance(action, dict): diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py
index 0c1ed75240..bc8474a589 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py
@@ -222,7 +222,7 @@ class PersistEventsStore: for room_id, latest_event_ids in new_forward_extremities.items(): self.store.get_latest_event_ids_in_room.prefill( - (room_id,), list(latest_event_ids) + (room_id,), frozenset(latest_event_ids) ) async def _get_events_which_are_prevs(self, event_ids: Iterable[str]) -> List[str]: diff --git a/synapse/storage/databases/main/experimental_features.py b/synapse/storage/databases/main/experimental_features.py
index cf3226ae5a..654f924019 100644 --- a/synapse/storage/databases/main/experimental_features.py +++ b/synapse/storage/databases/main/experimental_features.py
@@ -12,11 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Dict +from typing import TYPE_CHECKING, Dict, FrozenSet from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.storage.databases.main import CacheInvalidationWorkerStore -from synapse.types import StrCollection from synapse.util.caches.descriptors import cached if TYPE_CHECKING: @@ -34,7 +33,7 @@ class ExperimentalFeaturesStore(CacheInvalidationWorkerStore): super().__init__(database, db_conn, hs) @cached() - async def list_enabled_features(self, user_id: str) -> StrCollection: + async def list_enabled_features(self, user_id: str) -> FrozenSet[str]: """ Checks to see what features are enabled for a given user Args: @@ -49,7 +48,7 @@ class ExperimentalFeaturesStore(CacheInvalidationWorkerStore): ["feature"], ) - return [feature["feature"] for feature in enabled] + return frozenset(feature["feature"] for feature in enabled) async def set_features_for_user( self, diff --git a/synapse/storage/databases/main/keys.py b/synapse/storage/databases/main/keys.py
index a3b4744855..41563371dc 100644 --- a/synapse/storage/databases/main/keys.py +++ b/synapse/storage/databases/main/keys.py
@@ -16,14 +16,17 @@ import itertools import json import logging -from typing import Dict, Iterable, Mapping, Optional, Tuple +from typing import Dict, Iterable, Optional, Tuple +from canonicaljson import encode_canonical_json from signedjson.key import decode_verify_key_bytes from unpaddedbase64 import decode_base64 +from synapse.storage.database import LoggingTransaction from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.keys import FetchKeyResult, FetchKeyResultForRemote from synapse.storage.types import Cursor +from synapse.types import JsonDict from synapse.util.caches.descriptors import cached, cachedList from synapse.util.iterutils import batch_iter @@ -36,162 +39,84 @@ db_binary_type = memoryview class KeyStore(CacheInvalidationWorkerStore): """Persistence for signature verification keys""" - @cached() - def _get_server_signature_key( - self, server_name_and_key_id: Tuple[str, str] - ) -> FetchKeyResult: - raise NotImplementedError() - - @cachedList( - cached_method_name="_get_server_signature_key", - list_name="server_name_and_key_ids", - ) - async def get_server_signature_keys( - self, server_name_and_key_ids: Iterable[Tuple[str, str]] - ) -> Dict[Tuple[str, str], FetchKeyResult]: - """ - Args: - server_name_and_key_ids: - iterable of (server_name, key-id) tuples to fetch keys for - - Returns: - A map from (server_name, key_id) -> FetchKeyResult, or None if the - key is unknown - """ - keys = {} - - def _get_keys(txn: Cursor, batch: Tuple[Tuple[str, str], ...]) -> None: - """Processes a batch of keys to fetch, and adds the result to `keys`.""" - - # batch_iter always returns tuples so it's safe to do len(batch) - sql = """ - SELECT server_name, key_id, verify_key, ts_valid_until_ms - FROM server_signature_keys WHERE 1=0 - """ + " OR (server_name=? AND key_id=?)" * len( - batch - ) - - txn.execute(sql, tuple(itertools.chain.from_iterable(batch))) - - for row in txn: - server_name, key_id, key_bytes, ts_valid_until_ms = row - - if ts_valid_until_ms is None: - # Old keys may be stored with a ts_valid_until_ms of null, - # in which case we treat this as if it was set to `0`, i.e. - # it won't match key requests that define a minimum - # `ts_valid_until_ms`. - ts_valid_until_ms = 0 - - keys[(server_name, key_id)] = FetchKeyResult( - verify_key=decode_verify_key_bytes(key_id, bytes(key_bytes)), - valid_until_ts=ts_valid_until_ms, - ) - - def _txn(txn: Cursor) -> Dict[Tuple[str, str], FetchKeyResult]: - for batch in batch_iter(server_name_and_key_ids, 50): - _get_keys(txn, batch) - return keys - - return await self.db_pool.runInteraction("get_server_signature_keys", _txn) - - async def store_server_signature_keys( + async def store_server_keys_response( self, + server_name: str, from_server: str, ts_added_ms: int, - verify_keys: Mapping[Tuple[str, str], FetchKeyResult], + verify_keys: Dict[str, FetchKeyResult], + response_json: JsonDict, ) -> None: - """Stores NACL verification keys for remote servers. + """Stores the keys for the given server that we got from `from_server`. + Args: - from_server: Where the verification keys were looked up - ts_added_ms: The time to record that the key was added - verify_keys: - keys to be stored. Each entry is a triplet of - (server_name, key_id, key). + server_name: The owner of the keys + from_server: Which server we got the keys from + ts_added_ms: When we're adding the keys + verify_keys: The decoded keys + response_json: The full *signed* response JSON that contains the keys. """ - key_values = [] - value_values = [] - invalidations = [] - for (server_name, key_id), fetch_result in verify_keys.items(): - key_values.append((server_name, key_id)) - value_values.append( - ( - from_server, - ts_added_ms, - fetch_result.valid_until_ts, - db_binary_type(fetch_result.verify_key.encode()), - ) - ) - # invalidate takes a tuple corresponding to the params of - # _get_server_signature_key. _get_server_signature_key only takes one - # param, which is itself the 2-tuple (server_name, key_id). - invalidations.append((server_name, key_id)) - await self.db_pool.simple_upsert_many( - table="server_signature_keys", - key_names=("server_name", "key_id"), - key_values=key_values, - value_names=( - "from_server", - "ts_added_ms", - "ts_valid_until_ms", - "verify_key", - ), - value_values=value_values, - desc="store_server_signature_keys", - ) + key_json_bytes = encode_canonical_json(response_json) + + def store_server_keys_response_txn(txn: LoggingTransaction) -> None: + self.db_pool.simple_upsert_many_txn( + txn, + table="server_signature_keys", + key_names=("server_name", "key_id"), + key_values=[(server_name, key_id) for key_id in verify_keys], + value_names=( + "from_server", + "ts_added_ms", + "ts_valid_until_ms", + "verify_key", + ), + value_values=[ + ( + from_server, + ts_added_ms, + fetch_result.valid_until_ts, + db_binary_type(fetch_result.verify_key.encode()), + ) + for fetch_result in verify_keys.values() + ], + ) - invalidate = self._get_server_signature_key.invalidate - for i in invalidations: - invalidate((i,)) + self.db_pool.simple_upsert_many_txn( + txn, + table="server_keys_json", + key_names=("server_name", "key_id", "from_server"), + key_values=[ + (server_name, key_id, from_server) for key_id in verify_keys + ], + value_names=( + "ts_added_ms", + "ts_valid_until_ms", + "key_json", + ), + value_values=[ + ( + ts_added_ms, + fetch_result.valid_until_ts, + db_binary_type(key_json_bytes), + ) + for fetch_result in verify_keys.values() + ], + ) - async def store_server_keys_json( - self, - server_name: str, - key_id: str, - from_server: str, - ts_now_ms: int, - ts_expires_ms: int, - key_json_bytes: bytes, - ) -> None: - """Stores the JSON bytes for a set of keys from a server - The JSON should be signed by the originating server, the intermediate - server, and by this server. Updates the value for the - (server_name, key_id, from_server) triplet if one already existed. - Args: - server_name: The name of the server. - key_id: The identifier of the key this JSON is for. - from_server: The server this JSON was fetched from. - ts_now_ms: The time now in milliseconds. - ts_valid_until_ms: The time when this json stops being valid. - key_json_bytes: The encoded JSON. - """ - await self.db_pool.simple_upsert( - table="server_keys_json", - keyvalues={ - "server_name": server_name, - "key_id": key_id, - "from_server": from_server, - }, - values={ - "server_name": server_name, - "key_id": key_id, - "from_server": from_server, - "ts_added_ms": ts_now_ms, - "ts_valid_until_ms": ts_expires_ms, - "key_json": db_binary_type(key_json_bytes), - }, - desc="store_server_keys_json", - ) + # invalidate takes a tuple corresponding to the params of + # _get_server_keys_json. _get_server_keys_json only takes one + # param, which is itself the 2-tuple (server_name, key_id). + for key_id in verify_keys: + self._invalidate_cache_and_stream( + txn, self._get_server_keys_json, ((server_name, key_id),) + ) + self._invalidate_cache_and_stream( + txn, self.get_server_key_json_for_remote, (server_name, key_id) + ) - # invalidate takes a tuple corresponding to the params of - # _get_server_keys_json. _get_server_keys_json only takes one - # param, which is itself the 2-tuple (server_name, key_id). - await self.invalidate_cache_and_stream( - "_get_server_keys_json", ((server_name, key_id),) - ) - await self.invalidate_cache_and_stream( - "get_server_key_json_for_remote", (server_name, key_id) + await self.db_pool.runInteraction( + "store_server_keys_response", store_server_keys_response_txn ) @cached() @@ -221,12 +146,17 @@ class KeyStore(CacheInvalidationWorkerStore): """Processes a batch of keys to fetch, and adds the result to `keys`.""" # batch_iter always returns tuples so it's safe to do len(batch) - sql = """ - SELECT server_name, key_id, key_json, ts_valid_until_ms - FROM server_keys_json WHERE 1=0 - """ + " OR (server_name=? AND key_id=?)" * len( - batch - ) + where_clause = " OR (server_name=? AND key_id=?)" * len(batch) + + # `server_keys_json` can have multiple entries per server (one per + # remote server we fetched from, if using perspectives). Order by + # `ts_added_ms` so the most recently fetched one always wins. + sql = f""" + SELECT server_name, key_id, key_json, ts_valid_until_ms + FROM server_keys_json WHERE 1=0 + {where_clause} + ORDER BY ts_added_ms + """ txn.execute(sql, tuple(itertools.chain.from_iterable(batch))) diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index b52f48cf04..dea0e0458c 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py
@@ -450,10 +450,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): "e2e_room_keys", "event_push_summary", "pusher_throttle", - "insertion_events", - "insertion_event_extremities", - "insertion_event_edges", - "batch_events", "room_account_data", "room_tags", # "rooms" happens last, to keep the foreign keys in the other tables diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py
index 5ee5c7ad9f..a074c43989 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py
@@ -795,9 +795,7 @@ class ReceiptsWorkerStore(SQLBaseStore): now - event_ts, ) - await self.db_pool.runInteraction( - "insert_graph_receipt", - self._insert_graph_receipt_txn, + await self._insert_graph_receipt( room_id, receipt_type, user_id, @@ -810,9 +808,8 @@ class ReceiptsWorkerStore(SQLBaseStore): return stream_id, max_persisted_id - def _insert_graph_receipt_txn( + async def _insert_graph_receipt( self, - txn: LoggingTransaction, room_id: str, receipt_type: str, user_id: str, @@ -822,13 +819,6 @@ class ReceiptsWorkerStore(SQLBaseStore): ) -> None: assert self._can_write_to_receipts - txn.call_after( - self._get_receipts_for_user_with_orderings.invalidate, - (user_id, receipt_type), - ) - # FIXME: This shouldn't invalidate the whole cache - txn.call_after(self._get_linearized_receipts_for_room.invalidate, (room_id,)) - keyvalues = { "room_id": room_id, "receipt_type": receipt_type, @@ -840,8 +830,8 @@ class ReceiptsWorkerStore(SQLBaseStore): else: keyvalues["thread_id"] = thread_id - self.db_pool.simple_upsert_txn( - txn, + await self.db_pool.simple_upsert( + desc="insert_graph_receipt", table="receipts_graph", keyvalues=keyvalues, values={ @@ -851,6 +841,11 @@ class ReceiptsWorkerStore(SQLBaseStore): where_clause=where_clause, ) + self._get_receipts_for_user_with_orderings.invalidate((user_id, receipt_type)) + + # FIXME: This shouldn't invalidate the whole cache + self._get_linearized_receipts_for_room.invalidate((room_id,)) + class ReceiptsBackgroundUpdateStore(SQLBaseStore): POPULATE_RECEIPT_EVENT_STREAM_ORDERING = "populate_event_stream_ordering" @@ -939,11 +934,7 @@ class ReceiptsBackgroundUpdateStore(SQLBaseStore): receipts.""" def _remote_duplicate_receipts_txn(txn: LoggingTransaction) -> None: - if isinstance(self.database_engine, PostgresEngine): - ROW_ID_NAME = "ctid" - else: - ROW_ID_NAME = "rowid" - + ROW_ID_NAME = self.database_engine.row_id_name # Identify any duplicate receipts arising from # https://github.com/matrix-org/synapse/issues/14406. # The following query takes less than a minute on matrix.org. diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py
index 7e85b73e8e..cc964604e2 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py
@@ -16,7 +16,7 @@ import logging import random import re -from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast import attr @@ -192,8 +192,8 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): ) @cached() - async def get_user_by_id(self, user_id: str) -> Optional[Mapping[str, Any]]: - """Deprecated: use get_userinfo_by_id instead""" + async def get_user_by_id(self, user_id: str) -> Optional[UserInfo]: + """Returns info about the user account, if it exists.""" def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]: # We could technically use simple_select_one here, but it would not perform @@ -202,16 +202,12 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): txn.execute( """ SELECT - name, password_hash, is_guest, admin, consent_version, consent_ts, + name, is_guest, admin, consent_version, consent_ts, consent_server_notice_sent, appservice_id, creation_ts, user_type, deactivated, COALESCE(shadow_banned, FALSE) AS shadow_banned, COALESCE(approved, TRUE) AS approved, - COALESCE(locked, FALSE) AS locked, last_seen_ts + COALESCE(locked, FALSE) AS locked FROM users - LEFT JOIN ( - SELECT user_id, MAX(last_seen) AS last_seen_ts - FROM user_ips GROUP BY user_id - ) ls ON users.name = ls.user_id WHERE name = ? """, (user_id,), @@ -228,51 +224,23 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): desc="get_user_by_id", func=get_user_by_id_txn, ) - - if row is not None: - # If we're using SQLite our boolean values will be integers. Because we - # present some of this data as is to e.g. server admins via REST APIs, we - # want to make sure we're returning the right type of data. - # Note: when adding a column name to this list, be wary of NULLable columns, - # since NULL values will be turned into False. - boolean_columns = [ - "admin", - "deactivated", - "shadow_banned", - "approved", - "locked", - ] - for column in boolean_columns: - row[column] = bool(row[column]) - - return row - - async def get_userinfo_by_id(self, user_id: str) -> Optional[UserInfo]: - """Get a UserInfo object for a user by user ID. - - Note! Currently uses the cache of `get_user_by_id`. Once that deprecated method is removed, - this method should be cached. - - Args: - user_id: The user to fetch user info for. - Returns: - `UserInfo` object if user found, otherwise `None`. - """ - user_data = await self.get_user_by_id(user_id) - if not user_data: + if row is None: return None + return UserInfo( - appservice_id=user_data["appservice_id"], - consent_server_notice_sent=user_data["consent_server_notice_sent"], - consent_version=user_data["consent_version"], - creation_ts=user_data["creation_ts"], - is_admin=bool(user_data["admin"]), - is_deactivated=bool(user_data["deactivated"]), - is_guest=bool(user_data["is_guest"]), - is_shadow_banned=bool(user_data["shadow_banned"]), - user_id=UserID.from_string(user_data["name"]), - user_type=user_data["user_type"], - last_seen_ts=user_data["last_seen_ts"], + appservice_id=row["appservice_id"], + consent_server_notice_sent=row["consent_server_notice_sent"], + consent_version=row["consent_version"], + consent_ts=row["consent_ts"], + creation_ts=row["creation_ts"], + is_admin=bool(row["admin"]), + is_deactivated=bool(row["deactivated"]), + is_guest=bool(row["is_guest"]), + is_shadow_banned=bool(row["shadow_banned"]), + user_id=UserID.from_string(row["name"]), + user_type=row["user_type"], + approved=bool(row["approved"]), + locked=bool(row["locked"]), ) async def is_trial_user(self, user_id: str) -> bool: @@ -290,10 +258,10 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): now = self._clock.time_msec() days = self.config.server.mau_appservice_trial_days.get( - info["appservice_id"], self.config.server.mau_trial_days + info.appservice_id, self.config.server.mau_trial_days ) trial_duration_ms = days * 24 * 60 * 60 * 1000 - is_trial = (now - info["creation_ts"] * 1000) < trial_duration_ms + is_trial = (now - info.creation_ts * 1000) < trial_duration_ms return is_trial @cached() @@ -2312,6 +2280,26 @@ class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): return next_id + async def set_device_for_refresh_token( + self, user_id: str, old_device_id: str, device_id: str + ) -> None: + """Moves refresh tokens from old device to current device + + Args: + user_id: The user of the devices. + old_device_id: The old device. + device_id: The new device ID. + Returns: + None + """ + + await self.db_pool.simple_update( + "refresh_tokens", + keyvalues={"user_id": user_id, "device_id": old_device_id}, + updatevalues={"device_id": device_id}, + desc="set_device_for_refresh_token", + ) + def _set_device_for_access_token_txn( self, txn: LoggingTransaction, token: str, device_id: str ) -> str: diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py
index 3a2966b9e4..9d403919e4 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py
@@ -108,6 +108,7 @@ class UserSortOrder(Enum): SHADOW_BANNED = "shadow_banned" CREATION_TS = "creation_ts" LAST_SEEN_TS = "last_seen_ts" + LOCKED = "locked" class StatsStore(StateDeltasStore): diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py
index c149a9eacb..61403a98cf 100644 --- a/synapse/storage/databases/main/tags.py +++ b/synapse/storage/databases/main/tags.py
@@ -23,7 +23,7 @@ from synapse.storage._base import db_to_json from synapse.storage.database import LoggingTransaction from synapse.storage.databases.main.account_data import AccountDataWorkerStore from synapse.storage.util.id_generators import AbstractStreamIdGenerator -from synapse.types import JsonDict +from synapse.types import JsonDict, JsonMapping from synapse.util import json_encoder from synapse.util.caches.descriptors import cached @@ -34,7 +34,7 @@ class TagsWorkerStore(AccountDataWorkerStore): @cached() async def get_tags_for_user( self, user_id: str - ) -> Mapping[str, Mapping[str, JsonDict]]: + ) -> Mapping[str, Mapping[str, JsonMapping]]: """Get all the tags for a user. @@ -109,7 +109,7 @@ class TagsWorkerStore(AccountDataWorkerStore): async def get_updated_tags( self, user_id: str, stream_id: int - ) -> Mapping[str, Mapping[str, JsonDict]]: + ) -> Mapping[str, Mapping[str, JsonMapping]]: """Get all the tags for the rooms where the tags have changed since the given version diff --git a/synapse/storage/databases/main/task_scheduler.py b/synapse/storage/databases/main/task_scheduler.py
index 9ab120eea9..5c5372a825 100644 --- a/synapse/storage/databases/main/task_scheduler.py +++ b/synapse/storage/databases/main/task_scheduler.py
@@ -53,6 +53,7 @@ class TaskSchedulerWorkerStore(SQLBaseStore): resource_id: Optional[str] = None, statuses: Optional[List[TaskStatus]] = None, max_timestamp: Optional[int] = None, + limit: Optional[int] = None, ) -> List[ScheduledTask]: """Get a list of scheduled tasks from the DB. @@ -62,6 +63,7 @@ class TaskSchedulerWorkerStore(SQLBaseStore): statuses: Limit the returned tasks to the specific statuses max_timestamp: Limit the returned tasks to the ones that have a timestamp inferior to the specified one + limit: Only return `limit` number of rows if set. Returns: a list of `ScheduledTask`, ordered by increasing timestamps """ @@ -94,6 +96,10 @@ class TaskSchedulerWorkerStore(SQLBaseStore): sql = sql + " ORDER BY timestamp" + if limit is not None: + sql += " LIMIT ?" + args.append(limit) + txn.execute(sql, args) return self.db_pool.cursor_to_dict(txn) diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py
index 0b5b3bf03e..b1a2418cbd 100644 --- a/synapse/storage/engines/_base.py +++ b/synapse/storage/engines/_base.py
@@ -100,6 +100,12 @@ class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCM """Gets a string giving the server version. For example: '3.22.0'""" ... + @property + @abc.abstractmethod + def row_id_name(self) -> str: + """Gets the literal name representing a row id for this engine.""" + ... + @abc.abstractmethod def in_transaction(self, conn: ConnectionType) -> bool: """Whether the connection is currently in a transaction.""" diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py
index 05a72dc554..6309363217 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py
@@ -211,6 +211,10 @@ class PostgresEngine( else: return "%i.%i.%i" % (numver / 10000, (numver % 10000) / 100, numver % 100) + @property + def row_id_name(self) -> str: + return "ctid" + def in_transaction(self, conn: psycopg2.extensions.connection) -> bool: return conn.status != psycopg2.extensions.STATUS_READY diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py
index ca8c59297c..802069e1e1 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py
@@ -123,6 +123,10 @@ class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]): """Gets a string giving the server version. For example: '3.22.0'.""" return "%i.%i.%i" % sqlite3.sqlite_version_info + @property + def row_id_name(self) -> str: + return "rowid" + def in_transaction(self, conn: sqlite3.Connection) -> bool: return conn.in_transaction diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py
index 422f11f59e..5b50bd66bc 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py
@@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 81 # remember to update the list below when updating +SCHEMA_VERSION = 82 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -117,6 +117,10 @@ Changes in SCHEMA_VERSION = 80 Changes in SCHEMA_VERSION = 81 - The event_txn_id is no longer written to for new events. + +Changes in SCHEMA_VERSION = 82 + - The insertion_events, insertion_event_extremities, insertion_event_edges, and + batch_events tables are no longer purged in preparation for their removal. """ diff --git a/synapse/storage/schema/main/delta/48/group_unique_indexes.py b/synapse/storage/schema/main/delta/48/group_unique_indexes.py
index ad2da4c8af..622686d28f 100644 --- a/synapse/storage/schema/main/delta/48/group_unique_indexes.py +++ b/synapse/storage/schema/main/delta/48/group_unique_indexes.py
@@ -14,7 +14,7 @@ from synapse.storage.database import LoggingTransaction -from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine +from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.prepare_database import get_statements FIX_INDEXES = """ @@ -37,7 +37,7 @@ CREATE INDEX group_rooms_r_idx ON group_rooms(room_id); def run_create(cur: LoggingTransaction, database_engine: BaseDatabaseEngine) -> None: - rowid = "ctid" if isinstance(database_engine, PostgresEngine) else "rowid" + rowid = database_engine.row_id_name # remove duplicates from group_users & group_invites tables cur.execute( diff --git a/synapse/storage/schema/main/delta/82/02_scheduled_tasks_index.sql b/synapse/storage/schema/main/delta/82/02_scheduled_tasks_index.sql new file mode 100644
index 0000000000..6b90275139 --- /dev/null +++ b/synapse/storage/schema/main/delta/82/02_scheduled_tasks_index.sql
@@ -0,0 +1,16 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +CREATE INDEX IF NOT EXISTS scheduled_tasks_timestamp ON scheduled_tasks(timestamp); diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 488714f60c..76b0e3e694 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py
@@ -933,33 +933,37 @@ def get_verify_key_from_cross_signing_key( @attr.s(auto_attribs=True, frozen=True, slots=True) class UserInfo: - """Holds information about a user. Result of get_userinfo_by_id. + """Holds information about a user. Result of get_user_by_id. Attributes: user_id: ID of the user. appservice_id: Application service ID that created this user. consent_server_notice_sent: Version of policy documents the user has been sent. consent_version: Version of policy documents the user has consented to. + consent_ts: Time the user consented creation_ts: Creation timestamp of the user. is_admin: True if the user is an admin. is_deactivated: True if the user has been deactivated. is_guest: True if the user is a guest user. is_shadow_banned: True if the user has been shadow-banned. user_type: User type (None for normal user, 'support' and 'bot' other options). - last_seen_ts: Last activity timestamp of the user. + approved: If the user has been "approved" to register on the server. + locked: Whether the user's account has been locked """ user_id: UserID appservice_id: Optional[int] consent_server_notice_sent: Optional[str] consent_version: Optional[str] + consent_ts: Optional[int] user_type: Optional[str] creation_ts: int is_admin: bool is_deactivated: bool is_guest: bool is_shadow_banned: bool - last_seen_ts: Optional[int] + approved: bool + locked: bool class UserProfile(TypedDict): diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 943ad54456..0cbeb0c365 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py
@@ -19,6 +19,7 @@ import collections import inspect import itertools import logging +import typing from contextlib import asynccontextmanager from typing import ( Any, @@ -29,6 +30,7 @@ from typing import ( Collection, Coroutine, Dict, + Generator, Generic, Hashable, Iterable, @@ -398,7 +400,7 @@ class _LinearizerEntry: # The number of things executing. count: int # Deferreds for the things blocked from executing. - deferreds: collections.OrderedDict + deferreds: typing.OrderedDict["defer.Deferred[None]", Literal[1]] class Linearizer: @@ -717,30 +719,25 @@ def timeout_deferred( return new_d -# This class can't be generic because it uses slots with attrs. -# See: https://github.com/python-attrs/attrs/issues/313 @attr.s(slots=True, frozen=True, auto_attribs=True) -class DoneAwaitable: # should be: Generic[R] +class DoneAwaitable(Awaitable[R]): """Simple awaitable that returns the provided value.""" - value: Any # should be: R + value: R - def __await__(self) -> Any: - return self - - def __iter__(self) -> "DoneAwaitable": - return self - - def __next__(self) -> None: - raise StopIteration(self.value) + def __await__(self) -> Generator[Any, None, R]: + yield None + return self.value def maybe_awaitable(value: Union[Awaitable[R], R]) -> Awaitable[R]: """Convert a value to an awaitable if not already an awaitable.""" if inspect.isawaitable(value): - assert isinstance(value, Awaitable) return value + # For some reason mypy doesn't deduce that value is not Awaitable here, even though + # inspect.isawaitable returns a TypeGuard. + assert not isinstance(value, Awaitable) return DoneAwaitable(value) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py
index 5eaf70c7ab..2fbc7b1e6c 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py
@@ -14,7 +14,7 @@ import enum import logging import threading -from typing import Any, Dict, Generic, Iterable, Optional, Set, Tuple, TypeVar, Union +from typing import Dict, Generic, Iterable, Optional, Set, Tuple, TypeVar, Union import attr from typing_extensions import Literal @@ -33,10 +33,8 @@ DKT = TypeVar("DKT") DV = TypeVar("DV") -# This class can't be generic because it uses slots with attrs. -# See: https://github.com/python-attrs/attrs/issues/313 @attr.s(slots=True, frozen=True, auto_attribs=True) -class DictionaryEntry: # should be: Generic[DKT, DV]. +class DictionaryEntry(Generic[DKT, DV]): """Returned when getting an entry from the cache If `full` is true then `known_absent` will be the empty set. @@ -50,8 +48,8 @@ class DictionaryEntry: # should be: Generic[DKT, DV]. """ full: bool - known_absent: Set[Any] # should be: Set[DKT] - value: Dict[Any, Any] # should be: Dict[DKT, DV] + known_absent: Set[DKT] + value: Dict[DKT, DV] def __len__(self) -> int: return len(self.value) diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index 01ad02af67..e73cf66080 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py
@@ -14,7 +14,7 @@ import logging from collections import OrderedDict -from typing import Any, Generic, Optional, TypeVar, Union, overload +from typing import Any, Generic, Iterable, Optional, TypeVar, Union, overload import attr from typing_extensions import Literal @@ -73,7 +73,7 @@ class ExpiringCache(Generic[KT, VT]): self._expiry_ms = expiry_ms self._reset_expiry_on_get = reset_expiry_on_get - self._cache: OrderedDict[KT, _CacheEntry] = OrderedDict() + self._cache: OrderedDict[KT, _CacheEntry[VT]] = OrderedDict() self.iterable = iterable @@ -84,9 +84,7 @@ class ExpiringCache(Generic[KT, VT]): return def f() -> "defer.Deferred[None]": - return run_as_background_process( - "prune_cache_%s" % self._cache_name, self._prune_cache - ) + return run_as_background_process("prune_cache", self._prune_cache) self._clock.looping_call(f, self._expiry_ms / 2) @@ -100,7 +98,10 @@ class ExpiringCache(Generic[KT, VT]): while self._max_size and len(self) > self._max_size: _key, value = self._cache.popitem(last=False) if self.iterable: - self.metrics.inc_evictions(EvictionReason.size, len(value.value)) + # type-ignore, here and below: if self.iterable is true, then the value + # type VT should be Sized (i.e. have a __len__ method). We don't enforce + # this via the type system at present. + self.metrics.inc_evictions(EvictionReason.size, len(value.value)) # type: ignore[arg-type] else: self.metrics.inc_evictions(EvictionReason.size) @@ -134,7 +135,7 @@ class ExpiringCache(Generic[KT, VT]): return default if self.iterable: - self.metrics.inc_evictions(EvictionReason.invalidation, len(value.value)) + self.metrics.inc_evictions(EvictionReason.invalidation, len(value.value)) # type: ignore[arg-type] else: self.metrics.inc_evictions(EvictionReason.invalidation) @@ -182,7 +183,7 @@ class ExpiringCache(Generic[KT, VT]): for k in keys_to_delete: value = self._cache.pop(k) if self.iterable: - self.metrics.inc_evictions(EvictionReason.time, len(value.value)) + self.metrics.inc_evictions(EvictionReason.time, len(value.value)) # type: ignore[arg-type] else: self.metrics.inc_evictions(EvictionReason.time) @@ -195,7 +196,8 @@ class ExpiringCache(Generic[KT, VT]): def __len__(self) -> int: if self.iterable: - return sum(len(entry.value) for entry in self._cache.values()) + g: Iterable[int] = (len(entry.value) for entry in self._cache.values()) # type: ignore[arg-type] + return sum(g) else: return len(self._cache) @@ -218,6 +220,6 @@ class ExpiringCache(Generic[KT, VT]): @attr.s(slots=True, auto_attribs=True) -class _CacheEntry: +class _CacheEntry(Generic[VT]): time: int - value: Any + value: VT diff --git a/synapse/util/caches/ttlcache.py b/synapse/util/caches/ttlcache.py
index f6b3ee31e4..48a6e4a906 100644 --- a/synapse/util/caches/ttlcache.py +++ b/synapse/util/caches/ttlcache.py
@@ -35,10 +35,10 @@ class TTLCache(Generic[KT, VT]): def __init__(self, cache_name: str, timer: Callable[[], float] = time.time): # map from key to _CacheEntry - self._data: Dict[KT, _CacheEntry] = {} + self._data: Dict[KT, _CacheEntry[KT, VT]] = {} # the _CacheEntries, sorted by expiry time - self._expiry_list: SortedList[_CacheEntry] = SortedList() + self._expiry_list: SortedList[_CacheEntry[KT, VT]] = SortedList() self._timer = timer @@ -160,11 +160,11 @@ class TTLCache(Generic[KT, VT]): @attr.s(frozen=True, slots=True, auto_attribs=True) -class _CacheEntry: # Should be Generic[KT, VT]. See python-attrs/attrs#313 +class _CacheEntry(Generic[KT, VT]): """TTLCache entry""" # expiry_time is the first attribute, so that entries are sorted by expiry. expiry_time: float ttl: float - key: Any # should be KT - value: Any # should be VT + key: KT + value: VT diff --git a/synapse/util/gai_resolver.py b/synapse/util/gai_resolver.py
index 214eb17fbc..fecf829ade 100644 --- a/synapse/util/gai_resolver.py +++ b/synapse/util/gai_resolver.py
@@ -136,7 +136,7 @@ class GAIResolver: # The types on IHostnameResolver is incorrect in Twisted, see # https://twistedmatrix.com/trac/ticket/10276 - def resolveHostName( # type: ignore[override] + def resolveHostName( self, resolutionReceiver: IResolutionReceiver, hostName: str, diff --git a/synapse/util/task_scheduler.py b/synapse/util/task_scheduler.py
index 9e89aeb748..caf13b3474 100644 --- a/synapse/util/task_scheduler.py +++ b/synapse/util/task_scheduler.py
@@ -15,11 +15,14 @@ import logging from typing import TYPE_CHECKING, Awaitable, Callable, Dict, List, Optional, Set, Tuple -from prometheus_client import Gauge - from twisted.python.failure import Failure -from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.logging.context import nested_logging_context +from synapse.metrics import LaterGauge +from synapse.metrics.background_process_metrics import ( + run_as_background_process, + wrap_as_background_process, +) from synapse.types import JsonMapping, ScheduledTask, TaskStatus from synapse.util.stringutils import random_string @@ -29,12 +32,6 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -running_tasks_gauge = Gauge( - "synapse_scheduler_running_tasks", - "The number of concurrent running tasks handled by the TaskScheduler", -) - - class TaskScheduler: """ This is a simple task sheduler aimed at resumable tasks: usually we use `run_in_background` @@ -69,6 +66,8 @@ class TaskScheduler: # Precision of the scheduler, evaluation of tasks to run will only happen # every `SCHEDULE_INTERVAL_MS` ms SCHEDULE_INTERVAL_MS = 1 * 60 * 1000 # 1mn + # How often to clean up old tasks. + CLEANUP_INTERVAL_MS = 30 * 60 * 1000 # Time before a complete or failed task is deleted from the DB KEEP_TASKS_FOR_MS = 7 * 24 * 60 * 60 * 1000 # 1 week # Maximum number of tasks that can run at the same time @@ -77,6 +76,7 @@ class TaskScheduler: LAST_UPDATE_BEFORE_WARNING_MS = 24 * 60 * 60 * 1000 # 24hrs def __init__(self, hs: "HomeServer"): + self._hs = hs self._store = hs.get_datastores().main self._clock = hs.get_clock() self._running_tasks: Set[str] = set() @@ -90,15 +90,25 @@ class TaskScheduler: ] = {} self._run_background_tasks = hs.config.worker.run_background_tasks + # Flag to make sure we only try and launch new tasks once at a time. + self._launching_new_tasks = False + if self._run_background_tasks: self._clock.looping_call( - run_as_background_process, + self._launch_scheduled_tasks, + TaskScheduler.SCHEDULE_INTERVAL_MS, + ) + self._clock.looping_call( + self._clean_scheduled_tasks, TaskScheduler.SCHEDULE_INTERVAL_MS, - "handle_scheduled_tasks", - self._handle_scheduled_tasks, ) - else: - self.replication_client = hs.get_replication_command_handler() + + LaterGauge( + "synapse_scheduler_running_tasks", + "The number of concurrent running tasks handled by the TaskScheduler", + labels=None, + caller=lambda: len(self._running_tasks), + ) def register_action( self, @@ -133,7 +143,7 @@ class TaskScheduler: params: Optional[JsonMapping] = None, ) -> str: """Schedule a new potentially resumable task. A function matching the specified - `action` should have been previously registered with `register_action`. + `action` should have be registered with `register_action` before the task is run. Args: action: the name of a previously registered action @@ -149,11 +159,6 @@ class TaskScheduler: Returns: The id of the scheduled task """ - if action not in self._actions: - raise Exception( - f"No function associated with action {action} of the scheduled task" - ) - status = TaskStatus.SCHEDULED if timestamp is None or timestamp < self._clock.time_msec(): timestamp = self._clock.time_msec() @@ -175,7 +180,7 @@ class TaskScheduler: if self._run_background_tasks: await self._launch_task(task) else: - self.replication_client.send_new_active_task(task.id) + self._hs.get_replication_command_handler().send_new_active_task(task.id) return task.id @@ -239,6 +244,7 @@ class TaskScheduler: resource_id: Optional[str] = None, statuses: Optional[List[TaskStatus]] = None, max_timestamp: Optional[int] = None, + limit: Optional[int] = None, ) -> List[ScheduledTask]: """Get a list of tasks. Returns all the tasks if no args is provided. @@ -252,6 +258,7 @@ class TaskScheduler: statuses: Limit the returned tasks to the specific statuses max_timestamp: Limit the returned tasks to the ones that have a timestamp inferior to the specified one + limit: Only return `limit` number of rows if set. Returns A list of `ScheduledTask`, ordered by increasing timestamps @@ -261,6 +268,7 @@ class TaskScheduler: resource_id=resource_id, statuses=statuses, max_timestamp=max_timestamp, + limit=limit, ) async def delete_task(self, id: str) -> None: @@ -278,34 +286,58 @@ class TaskScheduler: raise Exception(f"Task {id} is currently ACTIVE and can't be deleted") await self._store.delete_scheduled_task(id) - async def _handle_scheduled_tasks(self) -> None: - """Main loop taking care of launching tasks and cleaning up old ones.""" - await self._launch_scheduled_tasks() - await self._clean_scheduled_tasks() + def launch_task_by_id(self, id: str) -> None: + """Try launching the task with the given ID.""" + # Don't bother trying to launch new tasks if we're already at capacity. + if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS: + return + + run_as_background_process("launch_task_by_id", self._launch_task_by_id, id) + + async def _launch_task_by_id(self, id: str) -> None: + """Helper async function for `launch_task_by_id`.""" + task = await self.get_task(id) + if task: + await self._launch_task(task) + @wrap_as_background_process("launch_scheduled_tasks") async def _launch_scheduled_tasks(self) -> None: """Retrieve and launch scheduled tasks that should be running at that time.""" - for task in await self.get_tasks(statuses=[TaskStatus.ACTIVE]): - await self._launch_task(task) - for task in await self.get_tasks( - statuses=[TaskStatus.SCHEDULED], max_timestamp=self._clock.time_msec() - ): - await self._launch_task(task) + # Don't bother trying to launch new tasks if we're already at capacity. + if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS: + return + + if self._launching_new_tasks: + return - running_tasks_gauge.set(len(self._running_tasks)) + self._launching_new_tasks = True + try: + for task in await self.get_tasks( + statuses=[TaskStatus.ACTIVE], limit=self.MAX_CONCURRENT_RUNNING_TASKS + ): + await self._launch_task(task) + for task in await self.get_tasks( + statuses=[TaskStatus.SCHEDULED], + max_timestamp=self._clock.time_msec(), + limit=self.MAX_CONCURRENT_RUNNING_TASKS, + ): + await self._launch_task(task) + + finally: + self._launching_new_tasks = False + + @wrap_as_background_process("clean_scheduled_tasks") async def _clean_scheduled_tasks(self) -> None: """Clean old complete or failed jobs to avoid clutter the DB.""" + now = self._clock.time_msec() for task in await self._store.get_scheduled_tasks( - statuses=[TaskStatus.FAILED, TaskStatus.COMPLETE] + statuses=[TaskStatus.FAILED, TaskStatus.COMPLETE], + max_timestamp=now - TaskScheduler.KEEP_TASKS_FOR_MS, ): # FAILED and COMPLETE tasks should never be running assert task.id not in self._running_tasks - if ( - self._clock.time_msec() - > task.timestamp + TaskScheduler.KEEP_TASKS_FOR_MS - ): - await self._store.delete_scheduled_task(task.id) + await self._store.delete_scheduled_task(task.id) async def _launch_task(self, task: ScheduledTask) -> None: """Launch a scheduled task now. @@ -315,30 +347,37 @@ class TaskScheduler: """ assert self._run_background_tasks - assert task.action in self._actions + if task.action not in self._actions: + raise Exception( + f"No function associated with action {task.action} of the scheduled task {task.id}" + ) function = self._actions[task.action] async def wrapper() -> None: - try: - (status, result, error) = await function(task) - except Exception: - f = Failure() - logger.error( - f"scheduled task {task.id} failed", - exc_info=(f.type, f.value, f.getTracebackObject()), + with nested_logging_context(task.id): + try: + (status, result, error) = await function(task) + except Exception: + f = Failure() + logger.error( + f"scheduled task {task.id} failed", + exc_info=(f.type, f.value, f.getTracebackObject()), + ) + status = TaskStatus.FAILED + result = None + error = f.getErrorMessage() + + await self._store.update_scheduled_task( + task.id, + self._clock.time_msec(), + status=status, + result=result, + error=error, ) - status = TaskStatus.FAILED - result = None - error = f.getErrorMessage() - - await self._store.update_scheduled_task( - task.id, - self._clock.time_msec(), - status=status, - result=result, - error=error, - ) - self._running_tasks.remove(task.id) + self._running_tasks.remove(task.id) + + # Try launch a new task since we've finished with this one. + self._clock.call_later(1, self._launch_scheduled_tasks) if len(self._running_tasks) >= TaskScheduler.MAX_CONCURRENT_RUNNING_TASKS: return @@ -356,5 +395,4 @@ class TaskScheduler: self._running_tasks.add(task.id) await self.update_task(task.id, status=TaskStatus.ACTIVE) - description = f"{task.id}-{task.action}" - run_as_background_process(description, wrapper) + run_as_background_process(f"task-{task.action}", wrapper) diff --git a/synapse/visibility.py b/synapse/visibility.py
index eac10f6438..f15fdd8314 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py
@@ -36,7 +36,7 @@ from synapse.events.utils import prune_event from synapse.logging.opentracing import trace from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore -from synapse.types import RetentionPolicy, StateMap, get_domain_from_id +from synapse.types import RetentionPolicy, StateMap, StrCollection, get_domain_from_id from synapse.types.state import StateFilter from synapse.util import Clock @@ -150,12 +150,12 @@ async def filter_events_for_client( async def filter_event_for_clients_with_state( store: DataStore, - user_ids: Collection[str], + user_ids: StrCollection, event: EventBase, context: EventContext, is_peeking: bool = False, filter_send_to_client: bool = True, -) -> Collection[str]: +) -> StrCollection: """ Checks to see if an event is visible to the users in the list at the time of the event.