From 3b0083c92adf76daf4161908565de9e5efc08074 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 22 Mar 2023 17:15:34 +0000 Subject: Use immutabledict instead of frozendict (#15113) Additionally: * Consistently use `freeze()` in test --------- Co-authored-by: Patrick Cloke Co-authored-by: 6543 <6543@obermui.de> --- tests/api/test_filtering.py | 6 ++--- tests/config/test_workers.py | 6 ++--- tests/push/test_push_rule_evaluator.py | 18 +++++++-------- tests/storage/test_state.py | 40 +++++++++++++++++++--------------- tests/types/test_state.py | 14 ++++++------ 5 files changed, 43 insertions(+), 41 deletions(-) (limited to 'tests') diff --git a/tests/api/test_filtering.py b/tests/api/test_filtering.py index 0f45615160..6c6a9ab4b4 100644 --- a/tests/api/test_filtering.py +++ b/tests/api/test_filtering.py @@ -18,7 +18,6 @@ from typing import List from unittest.mock import patch import jsonschema -from frozendict import frozendict from twisted.test.proto_helpers import MemoryReactor @@ -29,6 +28,7 @@ from synapse.api.presence import UserPresenceState from synapse.server import HomeServer from synapse.types import JsonDict from synapse.util import Clock +from synapse.util.frozenutils import freeze from tests import unittest from tests.events.test_utils import MockEvent @@ -343,12 +343,12 @@ class FilteringTestCase(unittest.HomeserverTestCase): self.assertFalse(Filter(self.hs, definition)._check(event)) - # check it works with frozendicts too + # check it works with frozen dictionaries too event = MockEvent( sender="@foo:bar", type="m.room.message", room_id="!secretbase:unknown", - content=frozendict({EventContentFields.LABELS: ["#fun"]}), + content=freeze({EventContentFields.LABELS: ["#fun"]}), ) self.assertTrue(Filter(self.hs, definition)._check(event)) diff --git a/tests/config/test_workers.py b/tests/config/test_workers.py index ef6294ecb2..49a6bdf408 100644 --- a/tests/config/test_workers.py +++ b/tests/config/test_workers.py @@ -14,14 +14,14 @@ from typing import Any, Mapping, Optional from unittest.mock import Mock -from frozendict import frozendict +from immutabledict import immutabledict from synapse.config import ConfigError from synapse.config.workers import WorkerConfig from tests.unittest import TestCase -_EMPTY_FROZENDICT: Mapping[str, Any] = frozendict() +_EMPTY_IMMUTABLEDICT: Mapping[str, Any] = immutabledict() class WorkerDutyConfigTestCase(TestCase): @@ -29,7 +29,7 @@ class WorkerDutyConfigTestCase(TestCase): self, worker_app: str, worker_name: Optional[str], - extras: Mapping[str, Any] = _EMPTY_FROZENDICT, + extras: Mapping[str, Any] = _EMPTY_IMMUTABLEDICT, ) -> WorkerConfig: root_config = Mock() root_config.worker_app = worker_app diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 52c4aafea6..b2536562e0 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -14,8 +14,6 @@ from typing import Any, Dict, List, Optional, Union, cast -import frozendict - from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin @@ -318,11 +316,11 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "pattern should only match at the start/end of the value", ) - # it should work on frozendicts too + # it should work on frozen dictionaries too self._assert_matches( condition, - frozendict.frozendict({"value": "FoobaZ"}), - "patterns should match on frozendicts", + freeze({"value": "FoobaZ"}), + "patterns should match on frozen dictionaries", ) # wildcards should match @@ -425,11 +423,11 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "incorrect types should not match", ) - # it should work on frozendicts too + # it should work on frozen dictionaries too self._assert_matches( condition, - frozendict.frozendict({"value": "foobaz"}), - "values should match on frozendicts", + freeze({"value": "foobaz"}), + "values should match on frozen dictionaries", ) def test_exact_event_match_boolean(self) -> None: @@ -546,11 +544,11 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "does not search in a string", ) - # it should work on frozendicts too + # it should work on frozen dictionaries too self._assert_matches( condition, freeze({"value": ["foobaz"]}), - "values should match on frozendicts", + "values should match on frozen dictionaries", ) def test_no_body(self) -> None: diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 62aed6af0a..0b9446c36c 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -14,7 +14,7 @@ import logging -from frozendict import frozendict +from immutabledict import immutabledict from twisted.test.proto_helpers import MemoryReactor @@ -198,7 +198,7 @@ class StateStoreTestCase(HomeserverTestCase): self.storage.state.get_state_for_event( e5.event_id, state_filter=StateFilter( - types=frozendict( + types=immutabledict( {EventTypes.Member: frozenset({self.u_alice.to_string()})} ), include_others=True, @@ -220,7 +220,7 @@ class StateStoreTestCase(HomeserverTestCase): self.storage.state.get_state_for_event( e5.event_id, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset()}), + types=immutabledict({EventTypes.Member: frozenset()}), include_others=True, ), ) @@ -246,7 +246,8 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset()}), include_others=True + types=immutabledict({EventTypes.Member: frozenset()}), + include_others=True, ), ) @@ -263,7 +264,8 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset()}), include_others=True + types=immutabledict({EventTypes.Member: frozenset()}), + include_others=True, ), ) @@ -276,7 +278,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: None}), include_others=True + types=immutabledict({EventTypes.Member: None}), include_others=True ), ) @@ -293,7 +295,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: None}), include_others=True + types=immutabledict({EventTypes.Member: None}), include_others=True ), ) @@ -313,7 +315,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}), include_others=True, ), ) @@ -331,7 +333,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}), include_others=True, ), ) @@ -345,7 +347,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}), include_others=False, ), ) @@ -396,7 +398,8 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset()}), include_others=True + types=immutabledict({EventTypes.Member: frozenset()}), + include_others=True, ), ) @@ -408,7 +411,8 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset()}), include_others=True + types=immutabledict({EventTypes.Member: frozenset()}), + include_others=True, ), ) @@ -421,7 +425,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: None}), include_others=True + types=immutabledict({EventTypes.Member: None}), include_others=True ), ) @@ -432,7 +436,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: None}), include_others=True + types=immutabledict({EventTypes.Member: None}), include_others=True ), ) @@ -451,7 +455,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}), include_others=True, ), ) @@ -463,7 +467,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}), include_others=True, ), ) @@ -477,7 +481,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}), include_others=False, ), ) @@ -489,7 +493,7 @@ class StateStoreTestCase(HomeserverTestCase): self.state_datastore._state_group_members_cache, group, state_filter=StateFilter( - types=frozendict({EventTypes.Member: frozenset({e5.state_key})}), + types=immutabledict({EventTypes.Member: frozenset({e5.state_key})}), include_others=False, ), ) diff --git a/tests/types/test_state.py b/tests/types/test_state.py index eb809f9fb7..1d89582c44 100644 --- a/tests/types/test_state.py +++ b/tests/types/test_state.py @@ -1,4 +1,4 @@ -from frozendict import frozendict +from immutabledict import immutabledict from synapse.api.constants import EventTypes from synapse.types.state import StateFilter @@ -172,7 +172,7 @@ class StateFilterDifferenceTestCase(TestCase): }, include_others=False, ), - StateFilter(types=frozendict(), include_others=True), + StateFilter(types=immutabledict(), include_others=True), ) # (wildcard on state keys) - (no state keys) @@ -188,7 +188,7 @@ class StateFilterDifferenceTestCase(TestCase): include_others=False, ), StateFilter( - types=frozendict(), + types=immutabledict(), include_others=True, ), ) @@ -279,7 +279,7 @@ class StateFilterDifferenceTestCase(TestCase): {EventTypes.Member: None, EventTypes.CanonicalAlias: None}, include_others=True, ), - StateFilter(types=frozendict(), include_others=False), + StateFilter(types=immutabledict(), include_others=False), ) # (wildcard on state keys) - (specific state keys) @@ -332,7 +332,7 @@ class StateFilterDifferenceTestCase(TestCase): include_others=True, ), StateFilter( - types=frozendict(), + types=immutabledict(), include_others=False, ), ) @@ -403,7 +403,7 @@ class StateFilterDifferenceTestCase(TestCase): {EventTypes.Member: None, EventTypes.CanonicalAlias: None}, include_others=True, ), - StateFilter(types=frozendict(), include_others=False), + StateFilter(types=immutabledict(), include_others=False), ) # (wildcard on state keys) - (specific state keys) @@ -450,7 +450,7 @@ class StateFilterDifferenceTestCase(TestCase): include_others=True, ), StateFilter( - types=frozendict(), + types=immutabledict(), include_others=False, ), ) -- cgit 1.5.1 From 68a671731207645f693e4e48676781b9a1acb838 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 24 Mar 2023 08:31:14 -0400 Subject: Reject mentions on the C-S API which are invalid. (#15311) Invalid mentions data received over the Client-Server API should be rejected with a 400 error. This will hopefully stop clients from sending invalid data, although does not help with data received over federation. --- changelog.d/15311.misc | 1 + synapse/events/validator.py | 42 ++++++++++--- synapse/http/servlet.py | 22 +++++-- tests/push/test_bulk_push_rule_evaluator.py | 94 +++++++++++++++++------------ 4 files changed, 105 insertions(+), 54 deletions(-) create mode 100644 changelog.d/15311.misc (limited to 'tests') diff --git a/changelog.d/15311.misc b/changelog.d/15311.misc new file mode 100644 index 0000000000..ce03cb9523 --- /dev/null +++ b/changelog.d/15311.misc @@ -0,0 +1 @@ +Reject events with an invalid "mentions" property pert [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952). diff --git a/synapse/events/validator.py b/synapse/events/validator.py index 6f0e4386d3..47203209db 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -12,11 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. import collections.abc -from typing import Iterable, Type, Union, cast +from typing import Iterable, List, Type, Union, cast import jsonschema +from pydantic import Field, StrictBool, StrictStr -from synapse.api.constants import MAX_ALIAS_LENGTH, EventTypes, Membership +from synapse.api.constants import ( + MAX_ALIAS_LENGTH, + EventContentFields, + EventTypes, + Membership, +) from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import EventFormatVersions from synapse.config.homeserver import HomeServerConfig @@ -28,6 +34,8 @@ from synapse.events.utils import ( validate_canonicaljson, ) from synapse.federation.federation_server import server_matches_acl_event +from synapse.http.servlet import validate_json_object +from synapse.rest.models import RequestBodyModel from synapse.types import EventID, JsonDict, RoomID, UserID @@ -88,27 +96,27 @@ class EventValidator: Codes.INVALID_PARAM, ) - if event.type == EventTypes.Retention: + elif event.type == EventTypes.Retention: self._validate_retention(event) - if event.type == EventTypes.ServerACL: + elif event.type == EventTypes.ServerACL: if not server_matches_acl_event(config.server.server_name, event): raise SynapseError( 400, "Can't create an ACL event that denies the local server" ) - if event.type == EventTypes.PowerLevels: + elif event.type == EventTypes.PowerLevels: try: jsonschema.validate( instance=event.content, schema=POWER_LEVELS_SCHEMA, - cls=plValidator, + cls=POWER_LEVELS_VALIDATOR, ) except jsonschema.ValidationError as e: if e.path: # example: "users_default": '0' is not of type 'integer' # cast safety: path entries can be integers, if we fail to validate - # items in an array. However the POWER_LEVELS_SCHEMA doesn't expect + # items in an array. However, the POWER_LEVELS_SCHEMA doesn't expect # to see any arrays. message = ( '"' + cast(str, e.path[-1]) + '": ' + e.message # noqa: B306 @@ -125,6 +133,15 @@ class EventValidator: errcode=Codes.BAD_JSON, ) + # If the event contains a mentions key, validate it. + if ( + EventContentFields.MSC3952_MENTIONS in event.content + and config.experimental.msc3952_intentional_mentions + ): + validate_json_object( + event.content[EventContentFields.MSC3952_MENTIONS], Mentions + ) + def _validate_retention(self, event: EventBase) -> None: """Checks that an event that defines the retention policy for a room respects the format enforced by the spec. @@ -253,10 +270,15 @@ POWER_LEVELS_SCHEMA = { } +class Mentions(RequestBodyModel): + user_ids: List[StrictStr] = Field(default_factory=list) + room: StrictBool = False + + # This could return something newer than Draft 7, but that's the current "latest" # validator. -def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]: - validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA) +def _create_validator(schema: JsonDict) -> Type[jsonschema.Draft7Validator]: + validator = jsonschema.validators.validator_for(schema) # by default jsonschema does not consider a immutabledict to be an object so # we need to use a custom type checker @@ -268,4 +290,4 @@ def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]: return jsonschema.validators.extend(validator, type_checker=type_checker) -plValidator = _create_power_level_validator() +POWER_LEVELS_VALIDATOR = _create_validator(POWER_LEVELS_SCHEMA) diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 0070bd2940..fc62793628 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -778,17 +778,13 @@ def parse_json_object_from_request( Model = TypeVar("Model", bound=BaseModel) -def parse_and_validate_json_object_from_request( - request: Request, model_type: Type[Model] -) -> Model: - """Parse a JSON object from the body of a twisted HTTP request, then deserialise and - validate using the given pydantic model. +def validate_json_object(content: JsonDict, model_type: Type[Model]) -> Model: + """Validate a deserialized JSON object using the given pydantic model. Raises: SynapseError if the request body couldn't be decoded as JSON or if it wasn't a JSON object. """ - content = parse_json_object_from_request(request, allow_empty_body=False) try: instance = model_type.parse_obj(content) except ValidationError as e: @@ -811,6 +807,20 @@ def parse_and_validate_json_object_from_request( return instance +def parse_and_validate_json_object_from_request( + request: Request, model_type: Type[Model] +) -> Model: + """Parse a JSON object from the body of a twisted HTTP request, then deserialise and + validate using the given pydantic model. + + Raises: + SynapseError if the request body couldn't be decoded as JSON or + if it wasn't a JSON object. + """ + content = parse_json_object_from_request(request, allow_empty_body=False) + return validate_json_object(content, model_type) + + def assert_params_in_dict(body: JsonDict, required: Iterable[str]) -> None: absent = [] for k in required: diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 46df0102f7..9501096a77 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -243,22 +243,28 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) # Non-dict mentions should be ignored. - mentions: Any - for mentions in (None, True, False, 1, "foo", []): - self.assertFalse( - self._create_and_process( - bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: mentions} + # + # Avoid C-S validation as these aren't expected. + with patch( + "synapse.events.validator.EventValidator.validate_new", + new=lambda s, event, config: True, + ): + mentions: Any + for mentions in (None, True, False, 1, "foo", []): + self.assertFalse( + self._create_and_process( + bulk_evaluator, {EventContentFields.MSC3952_MENTIONS: mentions} + ) ) - ) - # A non-list should be ignored. - for mentions in (None, True, False, 1, "foo", {}): - self.assertFalse( - self._create_and_process( - bulk_evaluator, - {EventContentFields.MSC3952_MENTIONS: {"user_ids": mentions}}, + # A non-list should be ignored. + for mentions in (None, True, False, 1, "foo", {}): + self.assertFalse( + self._create_and_process( + bulk_evaluator, + {EventContentFields.MSC3952_MENTIONS: {"user_ids": mentions}}, + ) ) - ) # The Matrix ID appearing anywhere in the list should notify. self.assertTrue( @@ -291,26 +297,32 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) # Invalid entries in the list are ignored. - self.assertFalse( - self._create_and_process( - bulk_evaluator, - { - EventContentFields.MSC3952_MENTIONS: { - "user_ids": [None, True, False, {}, []] - } - }, + # + # Avoid C-S validation as these aren't expected. + with patch( + "synapse.events.validator.EventValidator.validate_new", + new=lambda s, event, config: True, + ): + self.assertFalse( + self._create_and_process( + bulk_evaluator, + { + EventContentFields.MSC3952_MENTIONS: { + "user_ids": [None, True, False, {}, []] + } + }, + ) ) - ) - self.assertTrue( - self._create_and_process( - bulk_evaluator, - { - EventContentFields.MSC3952_MENTIONS: { - "user_ids": [None, True, False, {}, [], self.alice] - } - }, + self.assertTrue( + self._create_and_process( + bulk_evaluator, + { + EventContentFields.MSC3952_MENTIONS: { + "user_ids": [None, True, False, {}, [], self.alice] + } + }, + ) ) - ) # The legacy push rule should not mention if the mentions field exists. self.assertFalse( @@ -351,14 +363,20 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) # Invalid data should not notify. - mentions: Any - for mentions in (None, False, 1, "foo", [], {}): - self.assertFalse( - self._create_and_process( - bulk_evaluator, - {EventContentFields.MSC3952_MENTIONS: {"room": mentions}}, + # + # Avoid C-S validation as these aren't expected. + with patch( + "synapse.events.validator.EventValidator.validate_new", + new=lambda s, event, config: True, + ): + mentions: Any + for mentions in (None, False, 1, "foo", [], {}): + self.assertFalse( + self._create_and_process( + bulk_evaluator, + {EventContentFields.MSC3952_MENTIONS: {"room": mentions}}, + ) ) - ) # The legacy push rule should not mention if the mentions field exists. self.assertFalse( -- cgit 1.5.1 From 5b70f240cf70b390db7e74ab614ace108fc08d70 Mon Sep 17 00:00:00 2001 From: Quentin Gliech Date: Fri, 24 Mar 2023 16:09:39 +0100 Subject: Make cleaning up pushers depend on the device_id instead of the token_id (#15280) This makes it so that we rely on the `device_id` to delete pushers on logout, instead of relying on the `access_token_id`. This ensures we're not removing pushers on token refresh, and prepares for a world without access token IDs (also known as the OIDC). This actually runs the `set_device_id_for_pushers` background update, which was forgotten in #13831. Note that for backwards compatibility it still deletes pushers based on the `access_token` until the background update finishes. --- changelog.d/15280.misc | 1 + synapse/_scripts/synapse_port_db.py | 6 ++- synapse/handlers/auth.py | 8 ++- synapse/handlers/device.py | 2 + synapse/handlers/register.py | 4 +- synapse/push/__init__.py | 7 ++- synapse/push/pusherpool.py | 58 ++++++++++++++++------ synapse/rest/admin/users.py | 1 - synapse/rest/client/pusher.py | 1 - synapse/storage/databases/main/pusher.py | 40 +++++++++++---- .../74/02_set_device_id_for_pushers_bg_update.sql | 19 +++++++ tests/push/test_email.py | 6 +-- tests/push/test_http.py | 46 ++++++++--------- tests/replication/test_pusher_shard.py | 4 +- tests/rest/admin/test_user.py | 4 +- 15 files changed, 142 insertions(+), 65 deletions(-) create mode 100644 changelog.d/15280.misc create mode 100644 synapse/storage/schema/main/delta/74/02_set_device_id_for_pushers_bg_update.sql (limited to 'tests') diff --git a/changelog.d/15280.misc b/changelog.d/15280.misc new file mode 100644 index 0000000000..41d56b0cf0 --- /dev/null +++ b/changelog.d/15280.misc @@ -0,0 +1 @@ +Make the pushers rely on the `device_id` instead of the `access_token_id` for various operations. diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 94b86c1d6f..1dcb397ba4 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -68,7 +68,10 @@ from synapse.storage.databases.main.media_repository import ( MediaRepositoryBackgroundUpdateStore, ) from synapse.storage.databases.main.presence import PresenceBackgroundUpdateStore -from synapse.storage.databases.main.pusher import PusherWorkerStore +from synapse.storage.databases.main.pusher import ( + PusherBackgroundUpdatesStore, + PusherWorkerStore, +) from synapse.storage.databases.main.receipts import ReceiptsBackgroundUpdateStore from synapse.storage.databases.main.registration import ( RegistrationBackgroundUpdateStore, @@ -226,6 +229,7 @@ class Store( AccountDataWorkerStore, PushRuleStore, PusherWorkerStore, + PusherBackgroundUpdatesStore, PresenceBackgroundUpdateStore, ReceiptsBackgroundUpdateStore, RelationsWorkerStore, diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 308e38edea..1e89447044 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -1504,8 +1504,10 @@ class AuthHandler: ) # delete pushers associated with this access token + # XXX(quenting): This is only needed until the 'set_device_id_for_pushers' + # background update completes. if token.token_id is not None: - await self.hs.get_pusherpool().remove_pushers_by_access_token( + await self.hs.get_pusherpool().remove_pushers_by_access_tokens( token.user_id, (token.token_id,) ) @@ -1535,7 +1537,9 @@ class AuthHandler: ) # delete pushers associated with the access tokens - await self.hs.get_pusherpool().remove_pushers_by_access_token( + # XXX(quenting): This is only needed until the 'set_device_id_for_pushers' + # background update completes. + await self.hs.get_pusherpool().remove_pushers_by_access_tokens( user_id, (token_id for _, token_id, _ in tokens_and_devices) ) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 6f7963df43..9ded6389ac 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -503,6 +503,8 @@ class DeviceHandler(DeviceWorkerHandler): else: raise + await self.hs.get_pusherpool().remove_pushers_by_devices(user_id, device_ids) + # Delete data specific to each device. Not optimised as it is not # considered as part of a critical path. for device_id in device_ids: diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 6b110dcb6e..c8bf2439af 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -1013,11 +1013,11 @@ class RegistrationHandler: user_tuple = await self.store.get_user_by_access_token(token) # The token better still exist. assert user_tuple - token_id = user_tuple.token_id + device_id = user_tuple.device_id await self.pusher_pool.add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="email", app_id="m.email", app_display_name="Email Notifications", diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index a0c760239d..9e3a98741a 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -103,7 +103,7 @@ class PusherConfig: id: Optional[str] user_name: str - access_token: Optional[int] + profile_tag: str kind: str app_id: str @@ -119,6 +119,11 @@ class PusherConfig: enabled: bool device_id: Optional[str] + # XXX(quenting): The access_token is not persisted anymore for new pushers, but we + # keep it when reading from the database, so that we don't get stale pushers + # while the "set_device_id_for_pushers" background update is running. + access_token: Optional[int] + def as_dict(self) -> Dict[str, Any]: """Information that can be retrieved about a pusher after creation.""" return { diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index e2648cbc93..6517e3566f 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -25,7 +25,7 @@ from synapse.metrics.background_process_metrics import ( from synapse.push import Pusher, PusherConfig, PusherConfigException from synapse.push.pusher import PusherFactory from synapse.replication.http.push import ReplicationRemovePusherRestServlet -from synapse.types import JsonDict, RoomStreamToken +from synapse.types import JsonDict, RoomStreamToken, StrCollection from synapse.util.async_helpers import concurrently_execute from synapse.util.threepids import canonicalise_email @@ -97,7 +97,6 @@ class PusherPool: async def add_or_update_pusher( self, user_id: str, - access_token: Optional[int], kind: str, app_id: str, app_display_name: str, @@ -128,6 +127,22 @@ class PusherPool: # stream ordering, so it will process pushes from this point onwards. last_stream_ordering = self.store.get_room_max_stream_ordering() + # Before we actually persist the pusher, we check if the user already has one + # for this app ID and pushkey. If so, we want to keep the access token and + # device ID in place, since this could be one device modifying + # (e.g. enabling/disabling) another device's pusher. + # XXX(quenting): Even though we're not persisting the access_token_id for new + # pushers anymore, we still need to copy existing access_token_ids over when + # updating a pusher, in case the "set_device_id_for_pushers" background update + # hasn't run yet. + access_token_id = None + existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey( + user_id, app_id, pushkey + ) + if existing_config: + device_id = existing_config.device_id + access_token_id = existing_config.access_token + # we try to create the pusher just to validate the config: it # will then get pulled out of the database, # recreated, added and started: this means we have only one @@ -136,7 +151,6 @@ class PusherPool: PusherConfig( id=None, user_name=user_id, - access_token=access_token, profile_tag=profile_tag, kind=kind, app_id=app_id, @@ -151,23 +165,12 @@ class PusherPool: failing_since=None, enabled=enabled, device_id=device_id, + access_token=access_token_id, ) ) - # Before we actually persist the pusher, we check if the user already has one - # this app ID and pushkey. If so, we want to keep the access token and device ID - # in place, since this could be one device modifying (e.g. enabling/disabling) - # another device's pusher. - existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey( - user_id, app_id, pushkey - ) - if existing_config: - access_token = existing_config.access_token - device_id = existing_config.device_id - await self.store.add_pusher( user_id=user_id, - access_token=access_token, kind=kind, app_id=app_id, app_display_name=app_display_name, @@ -180,6 +183,7 @@ class PusherPool: profile_tag=profile_tag, enabled=enabled, device_id=device_id, + access_token_id=access_token_id, ) pusher = await self.process_pusher_change_by_id(app_id, pushkey, user_id) @@ -199,7 +203,7 @@ class PusherPool: ) await self.remove_pusher(p.app_id, p.pushkey, p.user_name) - async def remove_pushers_by_access_token( + async def remove_pushers_by_access_tokens( self, user_id: str, access_tokens: Iterable[int] ) -> None: """Remove the pushers for a given user corresponding to a set of @@ -209,6 +213,8 @@ class PusherPool: user_id: user to remove pushers for access_tokens: access token *ids* to remove pushers for """ + # XXX(quenting): This is only needed until the "set_device_id_for_pushers" + # background update finishes tokens = set(access_tokens) for p in await self.store.get_pushers_by_user_id(user_id): if p.access_token in tokens: @@ -220,6 +226,26 @@ class PusherPool: ) await self.remove_pusher(p.app_id, p.pushkey, p.user_name) + async def remove_pushers_by_devices( + self, user_id: str, devices: StrCollection + ) -> None: + """Remove the pushers for a given user corresponding to a set of devices + + Args: + user_id: user to remove pushers for + devices: device IDs to remove pushers for + """ + device_ids = set(devices) + for p in await self.store.get_pushers_by_user_id(user_id): + if p.device_id in device_ids: + logger.info( + "Removing pusher for app id %s, pushkey %s, user %s", + p.app_id, + p.pushkey, + p.user_name, + ) + await self.remove_pusher(p.app_id, p.pushkey, p.user_name) + def on_new_notifications(self, max_token: RoomStreamToken) -> None: if not self.pushers: # nothing to do here. diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 281e8fd0ad..331f225116 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -425,7 +425,6 @@ class UserRestServletV2(RestServlet): ): await self.pusher_pool.add_or_update_pusher( user_id=user_id, - access_token=None, kind="email", app_id="m.email", app_display_name="Email Notifications", diff --git a/synapse/rest/client/pusher.py b/synapse/rest/client/pusher.py index 975eef2144..1a8f5292ac 100644 --- a/synapse/rest/client/pusher.py +++ b/synapse/rest/client/pusher.py @@ -126,7 +126,6 @@ class PushersSetRestServlet(RestServlet): try: await self.pusher_pool.add_or_update_pusher( user_id=user.to_string(), - access_token=requester.access_token_id, kind=content["kind"], app_id=content["app_id"], app_display_name=content["app_display_name"], diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index 9a24f7a655..ab76b754e0 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -509,19 +509,24 @@ class PusherBackgroundUpdatesStore(SQLBaseStore): async def _set_device_id_for_pushers( self, progress: JsonDict, batch_size: int ) -> int: - """Background update to populate the device_id column of the pushers table.""" + """ + Background update to populate the device_id column and clear the access_token + column for the pushers table. + """ last_pusher_id = progress.get("pusher_id", 0) def set_device_id_for_pushers_txn(txn: LoggingTransaction) -> int: txn.execute( """ - SELECT p.id, at.device_id + SELECT + p.id AS pusher_id, + p.device_id AS pusher_device_id, + at.device_id AS token_device_id FROM pushers AS p - INNER JOIN access_tokens AS at + LEFT JOIN access_tokens AS at ON p.access_token = at.id WHERE p.access_token IS NOT NULL - AND at.device_id IS NOT NULL AND p.id > ? ORDER BY p.id LIMIT ? @@ -533,13 +538,27 @@ class PusherBackgroundUpdatesStore(SQLBaseStore): if len(rows) == 0: return 0 + # The reason we're clearing the access_token column here is a bit subtle. + # When a user logs out, we: + # (1) delete the access token + # (2) delete the device + # + # Ideally, we would delete the pushers only via its link to the device + # during (2), but since this background update might not have fully run yet, + # we're still deleting the pushers via the access token during (1). self.db_pool.simple_update_many_txn( txn=txn, table="pushers", key_names=("id",), - key_values=[(row["id"],) for row in rows], - value_names=("device_id",), - value_values=[(row["device_id"],) for row in rows], + key_values=[(row["pusher_id"],) for row in rows], + value_names=("device_id", "access_token"), + # If there was already a device_id on the pusher, we only want to clear + # the access_token column, so we keep the existing device_id. Otherwise, + # we set the device_id we got from joining the access_tokens table. + value_values=[ + (row["pusher_device_id"] or row["token_device_id"], None) + for row in rows + ], ) self.db_pool.updates._background_update_progress_txn( @@ -568,7 +587,6 @@ class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore): async def add_pusher( self, user_id: str, - access_token: Optional[int], kind: str, app_id: str, app_display_name: str, @@ -581,13 +599,13 @@ class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore): profile_tag: str = "", enabled: bool = True, device_id: Optional[str] = None, + access_token_id: Optional[int] = None, ) -> None: async with self._pushers_id_gen.get_next() as stream_id: await self.db_pool.simple_upsert( table="pushers", keyvalues={"app_id": app_id, "pushkey": pushkey, "user_name": user_id}, values={ - "access_token": access_token, "kind": kind, "app_display_name": app_display_name, "device_display_name": device_display_name, @@ -599,6 +617,10 @@ class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore): "id": stream_id, "enabled": enabled, "device_id": device_id, + # XXX(quenting): We're only really persisting the access token ID + # when updating an existing pusher. This is in case the + # 'set_device_id_for_pushers' background update hasn't finished yet. + "access_token": access_token_id, }, desc="add_pusher", ) diff --git a/synapse/storage/schema/main/delta/74/02_set_device_id_for_pushers_bg_update.sql b/synapse/storage/schema/main/delta/74/02_set_device_id_for_pushers_bg_update.sql new file mode 100644 index 0000000000..1367fb6267 --- /dev/null +++ b/synapse/storage/schema/main/delta/74/02_set_device_id_for_pushers_bg_update.sql @@ -0,0 +1,19 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Triggers the background update to set the device_id for pushers +-- that don't have one, and clear the access_token column. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7402, 'set_device_id_for_pushers', '{}'); diff --git a/tests/push/test_email.py b/tests/push/test_email.py index 4ea5472eb4..4b5c96aeae 100644 --- a/tests/push/test_email.py +++ b/tests/push/test_email.py @@ -105,7 +105,7 @@ class EmailPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(self.access_token) ) assert user_tuple is not None - self.token_id = user_tuple.token_id + self.device_id = user_tuple.device_id # We need to add email to account before we can create a pusher. self.get_success( @@ -117,7 +117,7 @@ class EmailPusherTests(HomeserverTestCase): pusher = self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=self.user_id, - access_token=self.token_id, + device_id=self.device_id, kind="email", app_id="m.email", app_display_name="Email Notifications", @@ -141,7 +141,7 @@ class EmailPusherTests(HomeserverTestCase): self.get_success_or_raise( self.hs.get_pusherpool().add_or_update_pusher( user_id=self.user_id, - access_token=self.token_id, + device_id=self.device_id, kind="email", app_id="m.email", app_display_name="Email Notifications", diff --git a/tests/push/test_http.py b/tests/push/test_http.py index c280ddcdf6..99cec0836b 100644 --- a/tests/push/test_http.py +++ b/tests/push/test_http.py @@ -67,13 +67,13 @@ class HTTPPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id def test_data(data: Any) -> None: self.get_failure( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", @@ -114,12 +114,12 @@ class HTTPPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", @@ -235,12 +235,12 @@ class HTTPPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", @@ -356,12 +356,12 @@ class HTTPPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", @@ -443,12 +443,12 @@ class HTTPPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", @@ -521,12 +521,12 @@ class HTTPPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", @@ -628,12 +628,12 @@ class HTTPPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", @@ -764,12 +764,12 @@ class HTTPPusherTests(HomeserverTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", @@ -778,7 +778,6 @@ class HTTPPusherTests(HomeserverTestCase): lang=None, data={"url": "http://example.com/_matrix/push/v1/notify"}, enabled=enabled, - device_id=user_tuple.device_id, ) ) @@ -895,19 +894,17 @@ class HTTPPusherTests(HomeserverTestCase): def test_update_different_device_access_token_device_id(self) -> None: """Tests that if we create a pusher from one device, the update it from another - device, the access token and device ID associated with the pusher stays the - same. + device, the device ID associated with the pusher stays the same. """ # Create a user with a pusher. user_id, access_token = self._make_user_with_pusher("user") - # Get the token ID for the current access token, since that's what we store in - # the pushers table. Also get the device ID from it. + # Get the device ID for the current access token, since that's what we store in + # the pushers table. user_tuple = self.get_success( self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_tuple is not None - token_id = user_tuple.token_id device_id = user_tuple.device_id # Generate a new access token, and update the pusher with it. @@ -920,10 +917,9 @@ class HTTPPusherTests(HomeserverTestCase): ) pushers: List[PusherConfig] = list(ret) - # Check that we still have one pusher, and that the access token and device ID - # associated with it didn't change. + # Check that we still have one pusher, and that the device ID associated with + # it didn't change. self.assertEqual(len(pushers), 1) - self.assertEqual(pushers[0].access_token, token_id) self.assertEqual(pushers[0].device_id, device_id) @override_config({"experimental_features": {"msc3881_enabled": True}}) diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py index 0798b021c3..dcb3e6669b 100644 --- a/tests/replication/test_pusher_shard.py +++ b/tests/replication/test_pusher_shard.py @@ -51,12 +51,12 @@ class PusherShardTestCase(BaseMultiWorkerStreamTestCase): self.hs.get_datastores().main.get_user_by_access_token(access_token) ) assert user_dict is not None - token_id = user_dict.token_id + device_id = user_dict.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 4b8f889a71..b4241ceaf0 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -3047,12 +3047,12 @@ class PushersRestTestCase(unittest.HomeserverTestCase): self.store.get_user_by_access_token(other_user_token) ) assert user_tuple is not None - token_id = user_tuple.token_id + device_id = user_tuple.device_id self.get_success( self.hs.get_pusherpool().add_or_update_pusher( user_id=self.other_user, - access_token=token_id, + device_id=device_id, kind="http", app_id="m.http", app_display_name="HTTP Push Notifications", -- cgit 1.5.1 From bd4d958aaf7c2123abb3665e7a7b199cf8ce27ee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 09:46:47 +0100 Subject: Bump ruff from 0.0.252 to 0.0.259 (#15328) * Bump ruff from 0.0.252 to 0.0.259 Bumps [ruff](https://github.com/charliermarsh/ruff) from 0.0.252 to 0.0.259. - [Release notes](https://github.com/charliermarsh/ruff/releases) - [Changelog](https://github.com/charliermarsh/ruff/blob/main/BREAKING_CHANGES.md) - [Commits](https://github.com/charliermarsh/ruff/compare/v0.0.252...v0.0.259) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Fix new warnings * Mypy * Newsfile --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Erik Johnston --- changelog.d/15328.misc | 1 + poetry.lock | 38 +++++++++++++------------- pyproject.toml | 2 +- synapse/events/__init__.py | 4 +-- synapse/events/utils.py | 2 +- synapse/storage/database.py | 4 +-- synapse/storage/databases/main/events.py | 5 ++-- synapse/storage/databases/main/pusher.py | 2 +- synapse/storage/databases/main/stats.py | 14 ++++++++-- synapse/storage/databases/main/stream.py | 5 +++- tests/replication/slave/storage/test_events.py | 2 +- tests/server.py | 10 +++++-- 12 files changed, 54 insertions(+), 35 deletions(-) create mode 100644 changelog.d/15328.misc (limited to 'tests') diff --git a/changelog.d/15328.misc b/changelog.d/15328.misc new file mode 100644 index 0000000000..e3e5953332 --- /dev/null +++ b/changelog.d/15328.misc @@ -0,0 +1 @@ +Bump ruff from 0.0.252 to 0.0.259. diff --git a/poetry.lock b/poetry.lock index 294ce49a8d..978a6e1598 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2323,29 +2323,29 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "ruff" -version = "0.0.252" +version = "0.0.259" description = "An extremely fast Python linter, written in Rust." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.0.252-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:349367a227c4db7abbc3a9993efea8a608b5bea4bb4a1e5fc6f0d56819524f92"}, - {file = "ruff-0.0.252-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:ce77f9106d96b4faf7865860fb5155b9deaf6f699d9c279118c5ad947739ecaf"}, - {file = "ruff-0.0.252-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edadb0b050293b4e60dab979ba6a4e734d9c899cbe316a0ee5b65e3cdd39c750"}, - {file = "ruff-0.0.252-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4efdae98937d1e4d23ab0b7fc7e8e6b6836cc7d2d42238ceeacbc793ef780542"}, - {file = "ruff-0.0.252-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8546d879f7d3f669379a03e7b103d90e11901976ab508aeda59c03dfd8a359e"}, - {file = "ruff-0.0.252-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:83fdc7169b6c1fb5fe8d1cdf345697f558c1b433ef97df9ca11defa2a8f3ee9e"}, - {file = "ruff-0.0.252-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84ed9be1a17e2a556a571a5b959398633dd10910abd8dcf8b098061e746e892d"}, - {file = "ruff-0.0.252-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f5e77bd9ba4438cf2ee32154e2673afe22f538ef29f5d65ca47e3dc46c42cf8"}, - {file = "ruff-0.0.252-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5179b94b45c0f8512eaff3ab304c14714a46df2e9ca72a9d96084adc376b71"}, - {file = "ruff-0.0.252-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:92efd8a71157595df5bc46aaaa0613d8a2fbc5cddc53ae7b749c16025c324732"}, - {file = "ruff-0.0.252-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd350fc10832cfd28e681d829a8aa83ea3e653326e0ea9d98637dfb8d46177d2"}, - {file = "ruff-0.0.252-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f119240c9631216e846166e06023b1d878e25fbac93bf20da50069e91cfbfaee"}, - {file = "ruff-0.0.252-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5c5a49f89f5ede93d16eddfeeadd7e5739ec703e8f63ac95eac30236b9e49da3"}, - {file = "ruff-0.0.252-py3-none-win32.whl", hash = "sha256:89a897dc743f2fe063483ea666097e72e848f4bbe40493fe0533e61799959f6e"}, - {file = "ruff-0.0.252-py3-none-win_amd64.whl", hash = "sha256:cdc89ad6ff88519b1fb1816ac82a9ad910762c90ff5fd64dda7691b72d36aff7"}, - {file = "ruff-0.0.252-py3-none-win_arm64.whl", hash = "sha256:4b594a17cf53077165429486650658a0e1b2ac6ab88954f5afd50d2b1b5657a9"}, - {file = "ruff-0.0.252.tar.gz", hash = "sha256:6992611ab7bdbe7204e4831c95ddd3febfeece2e6f5e44bbed044454c7db0f63"}, + {file = "ruff-0.0.259-py3-none-macosx_10_7_x86_64.whl", hash = "sha256:f3938dc45e2a3f818e9cbd53007265c22246fbfded8837b2c563bf0ebde1a226"}, + {file = "ruff-0.0.259-py3-none-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:22e1e35bf5f12072cd644d22afd9203641ccf258bc14ff91aa1c43dc14f6047d"}, + {file = "ruff-0.0.259-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2fb20e89e85d147c85caa807707a1488bccc1f3854dc3d53533e89b52a0c5ff"}, + {file = "ruff-0.0.259-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49e903bcda19f6bb0725a962c058eb5d61f40d84ef52ed53b61939b69402ab4e"}, + {file = "ruff-0.0.259-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71f0ef1985e9a6696fa97da8459917fa34bdaa2c16bd33bd5edead585b7d44f7"}, + {file = "ruff-0.0.259-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:7cfef26619cba184d59aa7fa17b48af5891d51fc0b755a9bc533478a10d4d066"}, + {file = "ruff-0.0.259-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79b02fa17ec1fd8d306ae302cb47fb614b71e1f539997858243769bcbe78c6d9"}, + {file = "ruff-0.0.259-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:428507fb321b386dda70d66cd1a8aa0abf51d7c197983d83bb9e4fa5ee60300b"}, + {file = "ruff-0.0.259-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5fbaea9167f1852757f02133e5daacdb8c75b3431343205395da5b10499927a"}, + {file = "ruff-0.0.259-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:40ae87f2638484b7e8a7567b04a7af719f1c484c5bf132038b702bb32e1f6577"}, + {file = "ruff-0.0.259-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:29e2b77b7d5da6a7dd5cf9b738b511355c5734ece56f78e500d4b5bffd58c1a0"}, + {file = "ruff-0.0.259-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b3c1beacf6037e7f0781d4699d9a2dd4ba2462f475be5b1f45cf84c4ba3c69d"}, + {file = "ruff-0.0.259-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:daaea322e7e85f4c13d82be9536309e1c4b8b9851bb0cbc7eeb15d490fd46bf9"}, + {file = "ruff-0.0.259-py3-none-win32.whl", hash = "sha256:38704f151323aa5858370a2f792e122cc25e5d1aabe7d42ceeab83da18f0b456"}, + {file = "ruff-0.0.259-py3-none-win_amd64.whl", hash = "sha256:aa9449b898287e621942cc71b9327eceb8f0c357e4065fecefb707ef2d978df8"}, + {file = "ruff-0.0.259-py3-none-win_arm64.whl", hash = "sha256:e4f39e18702de69faaaee3969934b92d7467285627f99a5b6ecd55a7d9f5d086"}, + {file = "ruff-0.0.259.tar.gz", hash = "sha256:8b56496063ab3bfdf72339a5fbebb8bd46e5c5fee25ef11a9f03b208fa0562ec"}, ] [[package]] @@ -3426,4 +3426,4 @@ user-search = ["pyicu"] [metadata] lock-version = "2.0" python-versions = "^3.7.1" -content-hash = "0a1dd4be3dff3c8cc71bd57a4eb48e1d92f155db7230e61fbb54f8af03619509" +content-hash = "102eed4faa13eab195555ea070f235acd1e3f0ff9cf028afcac6c51b3e409071" diff --git a/pyproject.toml b/pyproject.toml index b04edb611d..9a6306ee70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -311,7 +311,7 @@ all = [ # We pin black so that our tests don't start failing on new releases. isort = ">=5.10.1" black = ">=22.3.0" -ruff = "0.0.252" +ruff = "0.0.259" # Typechecking mypy = "*" diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 91118a8d84..d475fe7ae5 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -462,7 +462,7 @@ class FrozenEvent(EventBase): # Signatures is a dict of dicts, and this is faster than doing a # copy.deepcopy signatures = { - name: {sig_id: sig for sig_id, sig in sigs.items()} + name: dict(sigs.items()) for name, sigs in event_dict.pop("signatures", {}).items() } @@ -510,7 +510,7 @@ class FrozenEventV2(EventBase): # Signatures is a dict of dicts, and this is faster than doing a # copy.deepcopy signatures = { - name: {sig_id: sig for sig_id, sig in sigs.items()} + name: dict(sigs.items()) for name, sigs in event_dict.pop("signatures", {}).items() } diff --git a/synapse/events/utils.py b/synapse/events/utils.py index e41c7a4b83..c14c7791db 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -355,7 +355,7 @@ def serialize_event( time_now_ms = int(time_now_ms) # Should this strip out None's? - d = {k: v for k, v in e.get_dict().items()} + d = dict(e.get_dict().items()) d["event_id"] = e.event_id diff --git a/synapse/storage/database.py b/synapse/storage/database.py index fec4ae5b97..226ccc1671 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1504,8 +1504,8 @@ class DatabasePool: self.engine.lock_table(txn, "user_ips") for keyv, valv in zip(key_values, value_values): - _keys = {x: y for x, y in zip(key_names, keyv)} - _vals = {x: y for x, y in zip(value_names, valv)} + _keys = dict(zip(key_names, keyv)) + _vals = dict(zip(value_names, valv)) self.simple_upsert_txn_emulated(txn, table, _keys, _vals, lock=False) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 193959b250..ccd9f9d141 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -27,6 +27,7 @@ from typing import ( Optional, Set, Tuple, + cast, ) import attr @@ -1348,9 +1349,7 @@ class PersistEventsStore: [event.event_id for event, _ in events_and_contexts], ) - have_persisted: Dict[str, bool] = { - event_id: outlier for event_id, outlier in txn - } + have_persisted = dict(cast(Iterable[Tuple[str, bool]], txn)) logger.debug( "_update_outliers_txn: events=%s have_persisted=%s", diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index ab76b754e0..aeb6034f46 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -518,7 +518,7 @@ class PusherBackgroundUpdatesStore(SQLBaseStore): def set_device_id_for_pushers_txn(txn: LoggingTransaction) -> int: txn.execute( """ - SELECT + SELECT p.id AS pusher_id, p.device_id AS pusher_device_id, at.device_id AS token_device_id diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index d3393d8e49..97c4dc2603 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -16,7 +16,17 @@ import logging from enum import Enum from itertools import chain -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, + cast, +) from typing_extensions import Counter @@ -523,7 +533,7 @@ class StatsStore(StateDeltasStore): """, (room_id,), ) - membership_counts = {membership: cnt for membership, cnt in txn} + membership_counts = dict(cast(Iterable[Tuple[str, int]], txn)) txn.execute( """ diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 2b8779bbb8..92cbe262a6 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -41,6 +41,7 @@ from typing import ( Any, Collection, Dict, + Iterable, List, Optional, Set, @@ -1343,7 +1344,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): GROUP BY type """ txn.execute(sql) - min_positions = {typ: pos for typ, pos in txn} # Map from type -> min position + min_positions = dict( + cast(Iterable[Tuple[str, int]], txn) + ) # Map from type -> min position # Ensure we do actually have some values here assert set(min_positions) == {"federation", "events"} diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 57c781a0c3..b2125b1fea 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -412,7 +412,7 @@ class EventsWorkerStoreTestCase(BaseSlavedStoreTestCase): self.get_success( self.master_store.add_push_actions_to_staging( event.event_id, - {user_id: actions for user_id, actions in push_actions}, + dict(push_actions), False, "main", ) diff --git a/tests/server.py b/tests/server.py index 5de9722766..bb059630fa 100644 --- a/tests/server.py +++ b/tests/server.py @@ -983,7 +983,9 @@ def setup_test_homeserver( dropped = True except psycopg2.OperationalError as e: warnings.warn( - "Couldn't drop old db: " + str(e), category=UserWarning + "Couldn't drop old db: " + str(e), + category=UserWarning, + stacklevel=2, ) time.sleep(0.5) @@ -991,7 +993,11 @@ def setup_test_homeserver( db_conn.close() if not dropped: - warnings.warn("Failed to drop old DB.", category=UserWarning) + warnings.warn( + "Failed to drop old DB.", + category=UserWarning, + stacklevel=2, + ) if not LEAVE_DB: # Register the cleanup hook -- cgit 1.5.1 From 5282ba1e2bbff2635dc09aec45fd42a56c1a4545 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 28 Mar 2023 14:26:27 -0400 Subject: Implement MSC3983 to proxy /keys/claim queries to appservices. (#15314) Experimental support for MSC3983 is behind a configuration flag. If enabled, for users which are exclusively owned by an application service then the appservice will be queried for one-time keys *if* there are none uploaded to Synapse. --- changelog.d/15314.feature | 1 + synapse/appservice/api.py | 56 +++++++++++++++++ synapse/config/experimental.py | 5 ++ synapse/federation/federation_server.py | 20 +++--- synapse/handlers/appservice.py | 74 +++++++++++++++++++++- synapse/handlers/e2e_keys.py | 57 ++++++++++++++--- synapse/storage/databases/main/end_to_end_keys.py | 36 ++++++++--- tests/appservice/test_api.py | 59 ++++++++++++++++++ tests/handlers/test_e2e_keys.py | 76 ++++++++++++++++++++++- 9 files changed, 355 insertions(+), 29 deletions(-) create mode 100644 changelog.d/15314.feature (limited to 'tests') diff --git a/changelog.d/15314.feature b/changelog.d/15314.feature new file mode 100644 index 0000000000..68b289b0cc --- /dev/null +++ b/changelog.d/15314.feature @@ -0,0 +1 @@ +Experimental support for passing One Time Key requests to application services ([MSC3983](https://github.com/matrix-org/matrix-spec-proposals/pull/3983)). diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index 4812fb4496..51ee0e79df 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -388,6 +388,62 @@ class ApplicationServiceApi(SimpleHttpClient): failed_transactions_counter.labels(service.id).inc() return False + async def claim_client_keys( + self, service: "ApplicationService", query: List[Tuple[str, str, str]] + ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]: + """Claim one time keys from an application service. + + Args: + query: An iterable of tuples of (user ID, device ID, algorithm). + + Returns: + A tuple of: + A map of user ID -> a map device ID -> a map of key ID -> JSON dict. + + A copy of the input which has not been fulfilled because the + appservice doesn't support this endpoint or has not returned + data for that tuple. + """ + if service.url is None: + return {}, query + + # This is required by the configuration. + assert service.hs_token is not None + + # Create the expected payload shape. + body: Dict[str, Dict[str, List[str]]] = {} + for user_id, device, algorithm in query: + body.setdefault(user_id, {}).setdefault(device, []).append(algorithm) + + uri = f"{service.url}/_matrix/app/unstable/org.matrix.msc3983/keys/claim" + try: + response = await self.post_json_get_json( + uri, + body, + headers={"Authorization": [f"Bearer {service.hs_token}"]}, + ) + except CodeMessageException as e: + # The appservice doesn't support this endpoint. + if e.code == 404 or e.code == 405: + return {}, query + logger.warning("claim_keys to %s received %s", uri, e.code) + return {}, query + except Exception as ex: + logger.warning("claim_keys to %s threw exception %s", uri, ex) + return {}, query + + # Check if the appservice fulfilled all of the queried user/device/algorithms + # or if some are still missing. + # + # TODO This places a lot of faith in the response shape being correct. + missing = [ + (user_id, device, algorithm) + for user_id, device, algorithm in query + if algorithm not in response.get(user_id, {}).get(device, []) + ] + + return response, missing + def _serialize( self, service: "ApplicationService", events: Iterable[EventBase] ) -> List[JsonDict]: diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 99dcd27c74..53e6fc2b54 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -74,6 +74,11 @@ class ExperimentalConfig(Config): "msc3202_transaction_extensions", False ) + # MSC3983: Proxying OTK claim requests to exclusive ASes. + self.msc3983_appservice_otk_claims: bool = experimental.get( + "msc3983_appservice_otk_claims", False + ) + # MSC3706 (server-side support for partial state in /send_join responses) # Synapse will always serve partial state responses to requests using the stable # query parameter `omit_members`. If this flag is set, Synapse will also serve diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 6d99845de5..64e99292ec 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -86,7 +86,7 @@ from synapse.storage.databases.main.lock import Lock from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.roommember import MemberSummary from synapse.types import JsonDict, StateMap, get_domain_from_id -from synapse.util import json_decoder, unwrapFirstError +from synapse.util import unwrapFirstError from synapse.util.async_helpers import Linearizer, concurrently_execute, gather_results from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import parse_server_name @@ -135,6 +135,7 @@ class FederationServer(FederationBase): self.state = hs.get_state_handler() self._event_auth_handler = hs.get_event_auth_handler() self._room_member_handler = hs.get_room_member_handler() + self._e2e_keys_handler = hs.get_e2e_keys_handler() self._state_storage_controller = hs.get_storage_controllers().state @@ -1012,15 +1013,14 @@ class FederationServer(FederationBase): query.append((user_id, device_id, algorithm)) log_kv({"message": "Claiming one time keys.", "user, device pairs": query}) - results = await self.store.claim_e2e_one_time_keys(query) - - json_result: Dict[str, Dict[str, dict]] = {} - for user_id, device_keys in results.items(): - for device_id, keys in device_keys.items(): - for key_id, json_str in keys.items(): - json_result.setdefault(user_id, {})[device_id] = { - key_id: json_decoder.decode(json_str) - } + results = await self._e2e_keys_handler.claim_local_one_time_keys(query) + + json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {} + for result in results: + for user_id, device_keys in result.items(): + for device_id, keys in device_keys.items(): + for key_id, key in keys.items(): + json_result.setdefault(user_id, {})[device_id] = {key_id: key} logger.info( "Claimed one-time-keys: %s", diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index ec3ab968e9..953df4d9cd 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -12,7 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Union +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, +) from prometheus_client import Counter @@ -829,3 +838,66 @@ class ApplicationServicesHandler: if unknown_user: return await self.query_user_exists(user_id) return True + + async def claim_e2e_one_time_keys( + self, query: Iterable[Tuple[str, str, str]] + ) -> Tuple[ + Iterable[Dict[str, Dict[str, Dict[str, JsonDict]]]], List[Tuple[str, str, str]] + ]: + """Claim one time keys from application services. + + Args: + query: An iterable of tuples of (user ID, device ID, algorithm). + + Returns: + A tuple of: + An iterable of maps of user ID -> a map device ID -> a map of key ID -> JSON bytes. + + A copy of the input which has not been fulfilled (either because + they are not appservice users or the appservice does not support + providing OTKs). + """ + services = self.store.get_app_services() + + # Partition the users by appservice. + query_by_appservice: Dict[str, List[Tuple[str, str, str]]] = {} + missing = [] + for user_id, device, algorithm in query: + if not self.store.get_if_app_services_interested_in_user(user_id): + missing.append((user_id, device, algorithm)) + continue + + # Find the associated appservice. + for service in services: + if service.is_exclusive_user(user_id): + query_by_appservice.setdefault(service.id, []).append( + (user_id, device, algorithm) + ) + continue + + # Query each service in parallel. + results = await make_deferred_yieldable( + defer.DeferredList( + [ + run_in_background( + self.appservice_api.claim_client_keys, + # We know this must be an app service. + self.store.get_app_service_by_id(service_id), # type: ignore[arg-type] + service_query, + ) + for service_id, service_query in query_by_appservice.items() + ], + consumeErrors=True, + ) + ) + + # Patch together the results -- they are all independent (since they + # require exclusive control over the users). They get returned as a list + # and the caller combines them. + claimed_keys: List[Dict[str, Dict[str, Dict[str, JsonDict]]]] = [] + for success, result in results: + if success: + claimed_keys.append(result[0]) + missing.extend(result[1]) + + return claimed_keys, missing diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 4e9c8d8db0..9e7c2c45b5 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -13,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import logging from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Tuple @@ -53,6 +52,7 @@ class E2eKeysHandler: self.store = hs.get_datastores().main self.federation = hs.get_federation_client() self.device_handler = hs.get_device_handler() + self._appservice_handler = hs.get_application_service_handler() self.is_mine = hs.is_mine self.clock = hs.get_clock() @@ -88,6 +88,10 @@ class E2eKeysHandler: max_count=10, ) + self._query_appservices_for_otks = ( + hs.config.experimental.msc3983_appservice_otk_claims + ) + @trace @cancellable async def query_devices( @@ -542,6 +546,42 @@ class E2eKeysHandler: return ret + async def claim_local_one_time_keys( + self, local_query: List[Tuple[str, str, str]] + ) -> Iterable[Dict[str, Dict[str, Dict[str, JsonDict]]]]: + """Claim one time keys for local users. + + 1. Attempt to claim OTKs from the database. + 2. Ask application services if they provide OTKs. + 3. Attempt to fetch fallback keys from the database. + + Args: + local_query: An iterable of tuples of (user ID, device ID, algorithm). + + Returns: + An iterable of maps of user ID -> a map device ID -> a map of key ID -> JSON bytes. + """ + + otk_results, not_found = await self.store.claim_e2e_one_time_keys(local_query) + + # If the application services have not provided any keys via the C-S + # API, query it directly for one-time keys. + if self._query_appservices_for_otks: + ( + appservice_results, + not_found, + ) = await self._appservice_handler.claim_e2e_one_time_keys(not_found) + else: + appservice_results = [] + + # For each user that does not have a one-time keys available, see if + # there is a fallback key. + fallback_results = await self.store.claim_e2e_fallback_keys(not_found) + + # Return the results in order, each item from the input query should + # only appear once in the combined list. + return (otk_results, *appservice_results, fallback_results) + @trace async def claim_one_time_keys( self, query: Dict[str, Dict[str, Dict[str, str]]], timeout: Optional[int] @@ -561,17 +601,18 @@ class E2eKeysHandler: set_tag("local_key_query", str(local_query)) set_tag("remote_key_query", str(remote_queries)) - results = await self.store.claim_e2e_one_time_keys(local_query) + results = await self.claim_local_one_time_keys(local_query) # A map of user ID -> device ID -> key ID -> key. json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {} + for result in results: + for user_id, device_keys in result.items(): + for device_id, keys in device_keys.items(): + for key_id, key in keys.items(): + json_result.setdefault(user_id, {})[device_id] = {key_id: key} + + # Remote failures. failures: Dict[str, JsonDict] = {} - for user_id, device_keys in results.items(): - for device_id, keys in device_keys.items(): - for key_id, json_str in keys.items(): - json_result.setdefault(user_id, {})[device_id] = { - key_id: json_decoder.decode(json_str) - } @trace async def claim_client_keys(destination: str) -> None: diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index a3b6c8ae8e..dc7768c50c 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -51,7 +51,7 @@ from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import StreamIdGenerator from synapse.types import JsonDict -from synapse.util import json_encoder +from synapse.util import json_decoder, json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.cancellation import cancellable from synapse.util.iterutils import batch_iter @@ -1028,14 +1028,17 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker async def claim_e2e_one_time_keys( self, query_list: Iterable[Tuple[str, str, str]] - ) -> Dict[str, Dict[str, Dict[str, str]]]: + ) -> Tuple[Dict[str, Dict[str, Dict[str, JsonDict]]], List[Tuple[str, str, str]]]: """Take a list of one time keys out of the database. Args: query_list: An iterable of tuples of (user ID, device ID, algorithm). Returns: - A map of user ID -> a map device ID -> a map of key ID -> JSON bytes. + A tuple pf: + A map of user ID -> a map device ID -> a map of key ID -> JSON. + + A copy of the input which has not been fulfilled. """ @trace @@ -1115,7 +1118,8 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker key_id, key_json = otk_row return f"{algorithm}:{key_id}", key_json - results: Dict[str, Dict[str, Dict[str, str]]] = {} + results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {} + missing: List[Tuple[str, str, str]] = [] for user_id, device_id, algorithm in query_list: if self.database_engine.supports_returning: # If we support RETURNING clause we can use a single query that @@ -1138,11 +1142,25 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker device_results = results.setdefault(user_id, {}).setdefault( device_id, {} ) - device_results[claim_row[0]] = claim_row[1] - continue + device_results[claim_row[0]] = json_decoder.decode(claim_row[1]) + else: + missing.append((user_id, device_id, algorithm)) + + return results, missing + + async def claim_e2e_fallback_keys( + self, query_list: Iterable[Tuple[str, str, str]] + ) -> Dict[str, Dict[str, Dict[str, JsonDict]]]: + """Take a list of fallback keys out of the database. - # No one-time key available, so see if there's a fallback - # key + Args: + query_list: An iterable of tuples of (user ID, device ID, algorithm). + + Returns: + A map of user ID -> a map device ID -> a map of key ID -> JSON. + """ + results: Dict[str, Dict[str, Dict[str, JsonDict]]] = {} + for user_id, device_id, algorithm in query_list: row = await self.db_pool.simple_select_one( table="e2e_fallback_keys_json", keyvalues={ @@ -1179,7 +1197,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker ) device_results = results.setdefault(user_id, {}).setdefault(device_id, {}) - device_results[f"{algorithm}:{key_id}"] = key_json + device_results[f"{algorithm}:{key_id}"] = json_decoder.decode(key_json) return results diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py index 9d183b733e..0dd02b7d58 100644 --- a/tests/appservice/test_api.py +++ b/tests/appservice/test_api.py @@ -105,3 +105,62 @@ class ApplicationServiceApiTestCase(unittest.HomeserverTestCase): ) self.assertEqual(self.request_url, URL_LOCATION) self.assertEqual(result, SUCCESS_RESULT_LOCATION) + + def test_claim_keys(self) -> None: + """ + Tests that the /keys/claim response is properly parsed for missing + keys. + """ + + RESPONSE: JsonDict = { + "@alice:example.org": { + "DEVICE_1": { + "signed_curve25519:AAAAHg": { + # We don't really care about the content of the keys, + # they get passed back transparently. + }, + "signed_curve25519:BBBBHg": {}, + }, + "DEVICE_2": {"signed_curve25519:CCCCHg": {}}, + }, + } + + async def post_json_get_json( + uri: str, + post_json: Any, + headers: Mapping[Union[str, bytes], Sequence[Union[str, bytes]]], + ) -> JsonDict: + # Ensure the access token is passed as both a header and query arg. + if not headers.get("Authorization"): + raise RuntimeError("Access token not provided") + + self.assertEqual(headers.get("Authorization"), [f"Bearer {TOKEN}"]) + return RESPONSE + + # We assign to a method, which mypy doesn't like. + self.api.post_json_get_json = Mock(side_effect=post_json_get_json) # type: ignore[assignment] + + MISSING_KEYS = [ + # Known user, known device, missing algorithm. + ("@alice:example.org", "DEVICE_1", "signed_curve25519:DDDDHg"), + # Known user, missing device. + ("@alice:example.org", "DEVICE_3", "signed_curve25519:EEEEHg"), + # Unknown user. + ("@bob:example.org", "DEVICE_4", "signed_curve25519:FFFFHg"), + ] + + claimed_keys, missing = self.get_success( + self.api.claim_client_keys( + self.service, + [ + # Found devices + ("@alice:example.org", "DEVICE_1", "signed_curve25519:AAAAHg"), + ("@alice:example.org", "DEVICE_1", "signed_curve25519:BBBBHg"), + ("@alice:example.org", "DEVICE_2", "signed_curve25519:CCCCHg"), + ] + + MISSING_KEYS, + ) + ) + + self.assertEqual(claimed_keys, RESPONSE) + self.assertEqual(missing, MISSING_KEYS) diff --git a/tests/handlers/test_e2e_keys.py b/tests/handlers/test_e2e_keys.py index 6b4cba65d0..4ff04fc66b 100644 --- a/tests/handlers/test_e2e_keys.py +++ b/tests/handlers/test_e2e_keys.py @@ -23,18 +23,24 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import RoomEncryptionAlgorithms from synapse.api.errors import Codes, SynapseError +from synapse.appservice import ApplicationService from synapse.handlers.device import DeviceHandler from synapse.server import HomeServer +from synapse.storage.databases.main.appservice import _make_exclusive_regex from synapse.types import JsonDict from synapse.util import Clock from tests import unittest from tests.test_utils import make_awaitable +from tests.unittest import override_config class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - return self.setup_test_homeserver(federation_client=mock.Mock()) + self.appservice_api = mock.Mock() + return self.setup_test_homeserver( + federation_client=mock.Mock(), application_service_api=self.appservice_api + ) def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.handler = hs.get_e2e_keys_handler() @@ -941,3 +947,71 @@ class E2eKeysHandlerTestCase(unittest.HomeserverTestCase): # The two requests to the local homeserver should be identical. self.assertEqual(response_1, response_2) + + @override_config({"experimental_features": {"msc3983_appservice_otk_claims": True}}) + def test_query_appservice(self) -> None: + local_user = "@boris:" + self.hs.hostname + device_id_1 = "xyz" + fallback_key = {"alg1:k1": "fallback_key1"} + device_id_2 = "abc" + otk = {"alg1:k2": "key2"} + + # Inject an appservice interested in this user. + appservice = ApplicationService( + token="i_am_an_app_service", + id="1234", + namespaces={"users": [{"regex": r"@boris:*", "exclusive": True}]}, + # Note: this user does not have to match the regex above + sender="@as_main:test", + ) + self.hs.get_datastores().main.services_cache = [appservice] + self.hs.get_datastores().main.exclusive_user_regex = _make_exclusive_regex( + [appservice] + ) + + # Setup a response, but only for device 2. + self.appservice_api.claim_client_keys.return_value = make_awaitable( + ({local_user: {device_id_2: otk}}, [(local_user, device_id_1, "alg1")]) + ) + + # we shouldn't have any unused fallback keys yet + res = self.get_success( + self.store.get_e2e_unused_fallback_key_types(local_user, device_id_1) + ) + self.assertEqual(res, []) + + self.get_success( + self.handler.upload_keys_for_user( + local_user, + device_id_1, + {"fallback_keys": fallback_key}, + ) + ) + + # we should now have an unused alg1 key + fallback_res = self.get_success( + self.store.get_e2e_unused_fallback_key_types(local_user, device_id_1) + ) + self.assertEqual(fallback_res, ["alg1"]) + + # claiming an OTK when no OTKs are available should ask the appservice, then + # query the fallback keys. + claim_res = self.get_success( + self.handler.claim_one_time_keys( + { + "one_time_keys": { + local_user: {device_id_1: "alg1", device_id_2: "alg1"} + } + }, + timeout=None, + ) + ) + self.assertEqual( + claim_res, + { + "failures": {}, + "one_time_keys": { + local_user: {device_id_1: fallback_key, device_id_2: otk} + }, + }, + ) -- cgit 1.5.1 From 78cdb72cd6b0e007c314d9fed9f629dfc5b937a6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 29 Mar 2023 12:07:14 +0100 Subject: Delete stale non-e2e devices for users, take 3 (#15183) This should help reduce the number of devices e.g. simple bots the repeatedly login rack up. We only delete non-e2e devices as they should be safe to delete, whereas if we delete e2e devices for a user we may accidentally break their ability to receive e2e keys for a message. --- changelog.d/15183.misc | 1 + synapse/handlers/device.py | 2 +- synapse/handlers/register.py | 50 ++++++++++++++++++- synapse/storage/databases/main/devices.py | 80 ++++++++++++++++++++++++++++++- tests/handlers/test_admin.py | 2 +- tests/handlers/test_device.py | 2 +- tests/storage/test_client_ips.py | 4 +- 7 files changed, 134 insertions(+), 7 deletions(-) create mode 100644 changelog.d/15183.misc (limited to 'tests') diff --git a/changelog.d/15183.misc b/changelog.d/15183.misc new file mode 100644 index 0000000000..f9bfc581ad --- /dev/null +++ b/changelog.d/15183.misc @@ -0,0 +1 @@ +Prune user's old devices on login if they have too many. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 9ded6389ac..0fc165a8d6 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -485,7 +485,7 @@ class DeviceHandler(DeviceWorkerHandler): device_ids = [d for d in device_ids if d != except_device_id] await self.delete_devices(user_id, device_ids) - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: StrCollection) -> None: """Delete several devices Args: diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index c8bf2439af..bb1df1e60f 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -16,7 +16,7 @@ """Contains functions for registering clients.""" import logging -from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple from prometheus_client import Counter from typing_extensions import TypedDict @@ -40,6 +40,7 @@ from synapse.appservice import ApplicationService from synapse.config.server import is_threepid_reserved from synapse.handlers.device import DeviceHandler from synapse.http.servlet import assert_params_in_dict +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.login import RegisterDeviceReplicationServlet from synapse.replication.http.register import ( ReplicationPostRegisterActionsServlet, @@ -48,6 +49,7 @@ from synapse.replication.http.register import ( from synapse.spam_checker_api import RegistrationBehaviour from synapse.types import RoomAlias, UserID, create_requester from synapse.types.state import StateFilter +from synapse.util.iterutils import batch_iter if TYPE_CHECKING: from synapse.server import HomeServer @@ -110,6 +112,10 @@ class RegistrationHandler: self._server_notices_mxid = hs.config.servernotices.server_notices_mxid self._server_name = hs.hostname + # The set of users that we're currently pruning devices for. Ensures + # that we don't have two such jobs for the same user running at once. + self._currently_pruning_devices_for_users: Set[str] = set() + self.spam_checker = hs.get_spam_checker() if hs.config.worker.worker_app: @@ -121,7 +127,10 @@ class RegistrationHandler: ReplicationPostRegisterActionsServlet.make_client(hs) ) else: - self.device_handler = hs.get_device_handler() + device_handler = hs.get_device_handler() + assert isinstance(device_handler, DeviceHandler) + self.device_handler = device_handler + self._register_device_client = self.register_device_inner self.pusher_pool = hs.get_pusherpool() @@ -851,6 +860,9 @@ class RegistrationHandler: # This can only run on the main process. assert isinstance(self.device_handler, DeviceHandler) + # Prune the user's device list if they already have a lot of devices. + await self._maybe_prune_too_many_devices(user_id) + registered_device_id = await self.device_handler.check_device_registered( user_id, device_id, @@ -919,6 +931,40 @@ class RegistrationHandler: "refresh_token": refresh_token, } + async def _maybe_prune_too_many_devices(self, user_id: str) -> None: + """Delete any excess old devices this user may have.""" + + if user_id in self._currently_pruning_devices_for_users: + return + + # We also cap the number of users whose devices we prune at the same + # time, to avoid performance problems. + if len(self._currently_pruning_devices_for_users) > 5: + return + + device_ids = await self.store.check_too_many_devices_for_user(user_id) + if not device_ids: + return + + # Now spawn a background loop that deletes said devices. + async def _prune_too_many_devices_loop() -> None: + if user_id in self._currently_pruning_devices_for_users: + return + + self._currently_pruning_devices_for_users.add(user_id) + + try: + for batch in batch_iter(device_ids, 10): + await self.device_handler.delete_devices(user_id, batch) + + await self.clock.sleep(60) + finally: + self._currently_pruning_devices_for_users.discard(user_id) + + run_as_background_process( + "_prune_too_many_devices_loop", _prune_too_many_devices_loop + ) + async def post_registration_actions( self, user_id: str, auth_result: dict, access_token: Optional[str] ) -> None: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 5503621ad6..7647cda2c6 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1599,6 +1599,73 @@ class DeviceBackgroundUpdateStore(SQLBaseStore): return rows + async def check_too_many_devices_for_user(self, user_id: str) -> List[str]: + """Check if the user has a lot of devices, and if so return the set of + devices we can prune. + + This does *not* return hidden devices or devices with E2E keys. + """ + + num_devices = await self.db_pool.simple_select_one_onecol( + table="devices", + keyvalues={"user_id": user_id, "hidden": False}, + retcol="COALESCE(COUNT(*), 0)", + desc="count_devices", + ) + + # We let users have up to ten devices without pruning. + if num_devices <= 10: + return [] + + # We always prune devices not seen in the last 14 days... + max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 + + # ... but we also cap the maximum number of devices the user can have to + # 50. + if num_devices > 50: + # Choose a last seen that ensures we keep at most 50 devices. + sql = """ + SELECT last_seen FROM devices + LEFT JOIN e2e_device_keys_json USING (user_id, device_id) + WHERE + user_id = ? + AND NOT hidden + AND last_seen IS NOT NULL + AND key_json IS NULL + ORDER BY last_seen DESC + LIMIT 1 + OFFSET 50 + """ + + rows = await self.db_pool.execute( + "check_too_many_devices_for_user_last_seen", None, sql, (user_id,) + ) + if rows: + max_last_seen = max(rows[0][0], max_last_seen) + + # Fetch the devices to delete. + sql = """ + SELECT DISTINCT device_id FROM devices + LEFT JOIN e2e_device_keys_json USING (user_id, device_id) + WHERE + user_id = ? + AND NOT hidden + AND last_seen < ? + AND key_json IS NULL + ORDER BY last_seen + """ + + def check_too_many_devices_for_user_txn( + txn: LoggingTransaction, + ) -> List[str]: + txn.execute(sql, (user_id, max_last_seen)) + return [device_id for device_id, in txn] + + return await self.db_pool.runInteraction( + "check_too_many_devices_for_user", + check_too_many_devices_for_user_txn, + ) + class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): # Because we have write access, this will be a StreamIdGenerator @@ -1657,6 +1724,7 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): values={}, insertion_values={ "display_name": initial_device_display_name, + "last_seen": self._clock.time_msec(), "hidden": False, }, desc="store_device", @@ -1702,7 +1770,15 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): ) raise StoreError(500, "Problem storing device.") - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + @cached(max_entries=0) + async def delete_device(self, user_id: str, device_id: str) -> None: + raise NotImplementedError() + + # Note: sometimes deleting rows out of `device_inbox` can take a long time, + # so we use a cache so that we deduplicate in flight requests to delete + # devices. + @cachedList(cached_method_name="delete_device", list_name="device_ids") + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict: """Deletes several devices. Args: @@ -1739,6 +1815,8 @@ class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) + return {} + async def update_device( self, user_id: str, device_id: str, new_display_name: Optional[str] = None ) -> None: diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py index 5569ccef8a..f0ba3775c8 100644 --- a/tests/handlers/test_admin.py +++ b/tests/handlers/test_admin.py @@ -272,7 +272,7 @@ class ExfiltrateData(unittest.HomeserverTestCase): self.assertIn("device_id", args[0][0]) self.assertIsNone(args[0][0]["display_name"]) self.assertIsNone(args[0][0]["last_seen_user_agent"]) - self.assertIsNone(args[0][0]["last_seen_ts"]) + self.assertEqual(args[0][0]["last_seen_ts"], 600) self.assertIsNone(args[0][0]["last_seen_ip"]) def test_connections(self) -> None: diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index ce7525e29c..a456bffd63 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -115,7 +115,7 @@ class DeviceTestCase(unittest.HomeserverTestCase): "device_id": "xyz", "display_name": "display 0", "last_seen_ip": None, - "last_seen_ts": None, + "last_seen_ts": 1000000, }, device_map["xyz"], ) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index cd0079871c..f989986538 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -170,6 +170,8 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): ) ) + last_seen = self.clock.time_msec() + if after_persisting: # Trigger the storage loop self.reactor.advance(10) @@ -190,7 +192,7 @@ class ClientIpStoreTestCase(unittest.HomeserverTestCase): "device_id": device_id, "ip": None, "user_agent": None, - "last_seen": None, + "last_seen": last_seen, }, ], ) -- cgit 1.5.1