From 2dff93099b5aa7e213da76a9c4b3de84385b58e1 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 7 Feb 2023 15:24:44 +0000 Subject: Typecheck tests.rest.media.v1.test_media_storage (#15008) * Fix MediaStorage type hint * Typecheck tests.rest.media.v1.test_media_storage * Changelog * Remove assert and make the comment succinct * Fix syntax for olddeps --- synapse/rest/media/v1/media_storage.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'synapse') diff --git a/synapse/rest/media/v1/media_storage.py b/synapse/rest/media/v1/media_storage.py index a5c3de192f..db25848744 100644 --- a/synapse/rest/media/v1/media_storage.py +++ b/synapse/rest/media/v1/media_storage.py @@ -46,10 +46,9 @@ from ._base import FileInfo, Responder from .filepath import MediaFilePaths if TYPE_CHECKING: + from synapse.rest.media.v1.storage_provider import StorageProvider from synapse.server import HomeServer - from .storage_provider import StorageProviderWrapper - logger = logging.getLogger(__name__) @@ -68,7 +67,7 @@ class MediaStorage: hs: "HomeServer", local_media_directory: str, filepaths: MediaFilePaths, - storage_providers: Sequence["StorageProviderWrapper"], + storage_providers: Sequence["StorageProvider"], ): self.hs = hs self.reactor = hs.get_reactor() @@ -360,7 +359,7 @@ class ReadableFileWrapper: clock: Clock path: str - async def write_chunks_to(self, callback: Callable[[bytes], None]) -> None: + async def write_chunks_to(self, callback: Callable[[bytes], object]) -> None: """Reads the file in chunks and calls the callback with each chunk.""" with open(self.path, "rb") as file: -- cgit 1.5.1 From c951fbedcb81895c199c1f4cfe2251d6c3a7b5f4 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 8 Feb 2023 13:09:41 -0500 Subject: MSC3873: Escape keys when flattening dicts. (#15004) This disambiguates keys which attempt to match fields with a dot in them (e.g. m.relates_to). Disabled by default behind an experimental configuration flag. --- changelog.d/15004.feature | 1 + synapse/config/experimental.py | 5 +++++ synapse/push/bulk_push_rule_evaluator.py | 30 ++++++++++++++++++++++++++---- tests/push/test_push_rule_evaluator.py | 8 ++++++++ 4 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 changelog.d/15004.feature (limited to 'synapse') diff --git a/changelog.d/15004.feature b/changelog.d/15004.feature new file mode 100644 index 0000000000..d11d0aca91 --- /dev/null +++ b/changelog.d/15004.feature @@ -0,0 +1 @@ +Implement [MSC3873](https://github.com/matrix-org/matrix-spec-proposals/pull/3873) to unambiguate push rule keys with dots in them. diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 53c0682dfd..5e3a889081 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -169,6 +169,11 @@ class ExperimentalConfig(Config): # MSC3925: do not replace events with their edits self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False) + # MSC3873: Disambiguate event_match keys. + self.msc3783_escape_event_match_key = experimental.get( + "msc3783_escape_event_match_key", False + ) + # MSC3952: Intentional mentions self.msc3952_intentional_mentions = experimental.get( "msc3952_intentional_mentions", False diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index d9c0a98f44..39d2f88f03 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -271,7 +271,10 @@ class BulkPushRuleEvaluator: related_event_id, allow_none=True ) if related_event is not None: - related_events[relation_type] = _flatten_dict(related_event) + related_events[relation_type] = _flatten_dict( + related_event, + msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + ) reply_event_id = ( event.content.get("m.relates_to", {}) @@ -286,7 +289,10 @@ class BulkPushRuleEvaluator: ) if related_event is not None: - related_events["m.in_reply_to"] = _flatten_dict(related_event) + related_events["m.in_reply_to"] = _flatten_dict( + related_event, + msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + ) # indicate that this is from a fallback relation. if relation_type == "m.thread" and event.content.get( @@ -405,7 +411,10 @@ class BulkPushRuleEvaluator: room_mention = mentions.get("room") is True evaluator = PushRuleEvaluator( - _flatten_dict(event), + _flatten_dict( + event, + msc3783_escape_event_match_key=self.hs.config.experimental.msc3783_escape_event_match_key, + ), has_mentions, user_mentions, room_mention, @@ -493,6 +502,8 @@ def _flatten_dict( d: Union[EventBase, Mapping[str, Any]], prefix: Optional[List[str]] = None, result: Optional[Dict[str, str]] = None, + *, + msc3783_escape_event_match_key: bool = False, ) -> Dict[str, str]: """ Given a JSON dictionary (or event) which might contain sub dictionaries, @@ -521,11 +532,22 @@ def _flatten_dict( if result is None: result = {} for key, value in d.items(): + if msc3783_escape_event_match_key: + # Escape periods in the key with a backslash (and backslashes with an + # extra backslash). This is since a period is used as a separator between + # nested fields. + key = key.replace("\\", "\\\\").replace(".", "\\.") + if isinstance(value, str): result[".".join(prefix + [key])] = value.lower() elif isinstance(value, Mapping): # do not set `room_version` due to recursion considerations below - _flatten_dict(value, prefix=(prefix + [key]), result=result) + _flatten_dict( + value, + prefix=(prefix + [key]), + result=result, + msc3783_escape_event_match_key=msc3783_escape_event_match_key, + ) # `room_version` should only ever be set when looking at the top level of an event if ( diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index da33423871..516b65cc3c 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -48,6 +48,14 @@ class FlattenDictTestCase(unittest.TestCase): input = {"foo": {"bar": "abc"}} self.assertEqual({"foo.bar": "abc"}, _flatten_dict(input)) + # If a field has a dot in it, escape it. + input = {"m.foo": {"b\\ar": "abc"}} + self.assertEqual({"m.foo.b\\ar": "abc"}, _flatten_dict(input)) + self.assertEqual( + {"m\\.foo.b\\\\ar": "abc"}, + _flatten_dict(input, msc3783_escape_event_match_key=True), + ) + def test_non_string(self) -> None: """Non-string items are dropped.""" input: Dict[str, Any] = { -- cgit 1.5.1 From 55e4d27b36fd69a3cf3eceecbd42706579ef2dc7 Mon Sep 17 00:00:00 2001 From: Shay Date: Wed, 8 Feb 2023 11:25:11 -0800 Subject: Limit concurrent event creation for a room to avoid state resolution when sending bursts of events to a local room (#14977) --- changelog.d/14977.misc | 1 + synapse/handlers/message.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/14977.misc (limited to 'synapse') diff --git a/changelog.d/14977.misc b/changelog.d/14977.misc new file mode 100644 index 0000000000..4d551c52b7 --- /dev/null +++ b/changelog.d/14977.misc @@ -0,0 +1 @@ +Limit concurrent event creation for a room to avoid state resolution when sending bursts of events to a local room. \ No newline at end of file diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e688e00575..5f6da2943f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -499,9 +499,9 @@ class EventCreationHandler: self.request_ratelimiter = hs.get_request_ratelimiter() - # We arbitrarily limit concurrent event creation for a room to 5. - # This is to stop us from diverging history *too* much. - self.limiter = Linearizer(max_count=5, name="room_event_creation_limit") + # We limit concurrent event creation for a room to 1. This prevents state resolution + # from occurring when sending bursts of events to a local room + self.limiter = Linearizer(max_count=1, name="room_event_creation_limit") self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator() -- cgit 1.5.1 From 733531ee3e695da92f10e01b24f62ee35e09e4cd Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 9 Feb 2023 09:49:04 -0500 Subject: Add final type hint to synapse.server. (#15035) --- changelog.d/15035.misc | 1 + mypy.ini | 3 --- synapse/handlers/room.py | 2 +- synapse/server.py | 12 +++++------- synapse/storage/_base.py | 2 ++ synapse/storage/database.py | 1 + synapse/storage/databases/main/events.py | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) create mode 100644 changelog.d/15035.misc (limited to 'synapse') diff --git a/changelog.d/15035.misc b/changelog.d/15035.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/15035.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/mypy.ini b/mypy.ini index 3f144e61fb..57f27ba4f7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -53,9 +53,6 @@ warn_unused_ignores = False [mypy-synapse.util.caches.treecache] disallow_untyped_defs = False -[mypy-synapse.server] -disallow_untyped_defs = False - [mypy-synapse.storage.database] disallow_untyped_defs = False diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 7ba7c4ff07..0e759b8a5d 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1076,7 +1076,7 @@ class RoomCreationHandler: state_map: MutableStateMap[str] = {} # current_state_group of last event created. Used for computing event context of # events to be batched - current_state_group = None + current_state_group: Optional[int] = None def create_event_dict(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: e = {"type": etype, "content": content} diff --git a/synapse/server.py b/synapse/server.py index 9d6d268f49..efc6b5f895 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -21,7 +21,7 @@ import abc import functools import logging -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypeVar, cast +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast from twisted.internet.interfaces import IOpenSSLContextFactory from twisted.internet.tcp import Port @@ -144,10 +144,10 @@ if TYPE_CHECKING: from synapse.handlers.saml import SamlHandler -T = TypeVar("T", bound=Callable[..., Any]) +T = TypeVar("T") -def cache_in_self(builder: T) -> T: +def cache_in_self(builder: Callable[["HomeServer"], T]) -> Callable[["HomeServer"], T]: """Wraps a function called e.g. `get_foo`, checking if `self.foo` exists and returning if so. If not, calls the given function and sets `self.foo` to it. @@ -166,7 +166,7 @@ def cache_in_self(builder: T) -> T: building = [False] @functools.wraps(builder) - def _get(self): + def _get(self: "HomeServer") -> T: try: return getattr(self, depname) except AttributeError: @@ -185,9 +185,7 @@ def cache_in_self(builder: T) -> T: return dep - # We cast here as we need to tell mypy that `_get` has the same signature as - # `builder`. - return cast(T, _get) + return _get class HomeServer(metaclass=abc.ABCMeta): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 41d9111019..481fec72fe 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -37,6 +37,8 @@ class SQLBaseStore(metaclass=ABCMeta): per data store (and not one per physical database). """ + db_pool: DatabasePool + def __init__( self, database: DatabasePool, diff --git a/synapse/storage/database.py b/synapse/storage/database.py index e20c5c5302..feaa6cdd07 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -499,6 +499,7 @@ class DatabasePool: """ _TXN_ID = 0 + engine: BaseDatabaseEngine def __init__( self, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1536937b67..cb66376fb4 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -306,7 +306,7 @@ class PersistEventsStore: # The set of event_ids to return. This includes all soft-failed events # and their prev events. - existing_prevs = set() + existing_prevs: Set[str] = set() def _get_prevs_before_rejected_txn( txn: LoggingTransaction, batch: Collection[str] -- cgit 1.5.1 From 8a6e0434889ea94893119775b6f56904cbc575c2 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 9 Feb 2023 10:56:02 -0500 Subject: Avoid mutating cached room aliases. (#15038) This might cause incorrect data in other callers which are not expecting the canonical alias to be added into the response. --- changelog.d/15038.bugfix | 1 + synapse/handlers/directory.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15038.bugfix (limited to 'synapse') diff --git a/changelog.d/15038.bugfix b/changelog.d/15038.bugfix new file mode 100644 index 0000000000..4695a09756 --- /dev/null +++ b/changelog.d/15038.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where the room aliases returned could be corrupted. diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 2ea52257cb..d31b0fbb17 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -485,7 +485,8 @@ class DirectoryHandler: ) ) if canonical_alias: - room_aliases.append(canonical_alias) + # Ensure we do not mutate room_aliases. + room_aliases = room_aliases + [canonical_alias] if not self.config.roomdirectory.is_publishing_room_allowed( user_id, room_id, room_aliases -- cgit 1.5.1 From d22c1c862c8259465a8e95c41eb1f00d0367a640 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 9 Feb 2023 13:04:24 -0500 Subject: Respond correctly to unknown methods on known endpoints (#14605) Respond with a 405 error if a request is received on a known endpoint, but to an unknown method, per MSC3743. --- changelog.d/14605.bugfix | 1 + docs/admin_api/media_admin_api.md | 10 +++++++- docs/upgrade.md | 10 ++++++++ synapse/http/server.py | 40 ++++++++++++-------------------- synapse/rest/admin/media.py | 18 +++++++++++---- synapse/rest/client/room_keys.py | 48 ++++++++++++++++++++++++++------------- synapse/rest/client/tags.py | 4 +++- tests/rest/admin/test_media.py | 9 +++++--- 8 files changed, 89 insertions(+), 51 deletions(-) create mode 100644 changelog.d/14605.bugfix (limited to 'synapse') diff --git a/changelog.d/14605.bugfix b/changelog.d/14605.bugfix new file mode 100644 index 0000000000..cb95a87d92 --- /dev/null +++ b/changelog.d/14605.bugfix @@ -0,0 +1 @@ +Return spec-compliant JSON errors when unknown endpoints are requested. diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index 7f8c8e22c1..30833f3109 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -235,6 +235,14 @@ The following fields are returned in the JSON response body: Request: +``` +POST /_synapse/admin/v1/media/delete?before_ts= + +{} +``` + +*Deprecated in Synapse v1.78.0:* This API is available at the deprecated endpoint: + ``` POST /_synapse/admin/v1/media//delete?before_ts= @@ -243,7 +251,7 @@ POST /_synapse/admin/v1/media//delete?before_ts= URL Parameters -* `server_name`: string - The name of your local server (e.g `matrix.org`). +* `server_name`: string - The name of your local server (e.g `matrix.org`). *Deprecated in Synapse v1.78.0.* * `before_ts`: string representing a positive integer - Unix timestamp in milliseconds. Files that were last used before this timestamp will be deleted. It is the timestamp of last access, not the timestamp when the file was created. diff --git a/docs/upgrade.md b/docs/upgrade.md index bc143444be..15167b8c58 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,6 +88,15 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.78.0 + +## Deprecate the `/_synapse/admin/v1/media//delete` admin API + +Synapse 1.78.0 replaces the `/_synapse/admin/v1/media//delete` +admin API with an identical endpoint at `/_synapse/admin/v1/media/delete`. Please +update your tooling to use the new endpoint. The deprecated version will be removed +in a future release. + # Upgrading to v1.76.0 ## Faster joins are enabled by default @@ -137,6 +146,7 @@ and then do `pip install matrix-synapse[user-search]` for a PyPI install. Docker images and Debian packages need nothing specific as they already include or specify ICU as an explicit dependency. + # Upgrading to v1.73.0 ## Legacy Prometheus metric names have now been removed diff --git a/synapse/http/server.py b/synapse/http/server.py index 2563858f3c..9314454af1 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -30,7 +30,6 @@ from typing import ( Iterable, Iterator, List, - NoReturn, Optional, Pattern, Tuple, @@ -340,7 +339,8 @@ class _AsyncResource(resource.Resource, metaclass=abc.ABCMeta): return callback_return - return _unrecognised_request_handler(request) + # A request with an unknown method (for a known endpoint) was received. + raise UnrecognizedRequestError(code=405) @abc.abstractmethod def _send_response( @@ -396,7 +396,6 @@ class DirectServeJsonResource(_AsyncResource): @attr.s(slots=True, frozen=True, auto_attribs=True) class _PathEntry: - pattern: Pattern callback: ServletCallback servlet_classname: str @@ -425,13 +424,14 @@ class JsonResource(DirectServeJsonResource): ): super().__init__(canonical_json, extract_context) self.clock = hs.get_clock() - self.path_regexs: Dict[bytes, List[_PathEntry]] = {} + # Map of path regex -> method -> callback. + self._routes: Dict[Pattern[str], Dict[bytes, _PathEntry]] = {} self.hs = hs def register_paths( self, method: str, - path_patterns: Iterable[Pattern], + path_patterns: Iterable[Pattern[str]], callback: ServletCallback, servlet_classname: str, ) -> None: @@ -455,8 +455,8 @@ class JsonResource(DirectServeJsonResource): for path_pattern in path_patterns: logger.debug("Registering for %s %s", method, path_pattern.pattern) - self.path_regexs.setdefault(method_bytes, []).append( - _PathEntry(path_pattern, callback, servlet_classname) + self._routes.setdefault(path_pattern, {})[method_bytes] = _PathEntry( + callback, servlet_classname ) def _get_handler_for_request( @@ -478,14 +478,17 @@ class JsonResource(DirectServeJsonResource): # Loop through all the registered callbacks to check if the method # and path regex match - for path_entry in self.path_regexs.get(request_method, []): - m = path_entry.pattern.match(request_path) + for path_pattern, methods in self._routes.items(): + m = path_pattern.match(request_path) if m: - # We found a match! + # We found a matching path! + path_entry = methods.get(request_method) + if not path_entry: + raise UnrecognizedRequestError(code=405) return path_entry.callback, path_entry.servlet_classname, m.groupdict() - # Huh. No one wanted to handle that? Fiiiiiine. Send 400. - return _unrecognised_request_handler, "unrecognised_request_handler", {} + # Huh. No one wanted to handle that? Fiiiiiine. + raise UnrecognizedRequestError(code=404) async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]: callback, servlet_classname, group_dict = self._get_handler_for_request(request) @@ -567,19 +570,6 @@ class StaticResource(File): return super().render_GET(request) -def _unrecognised_request_handler(request: Request) -> NoReturn: - """Request handler for unrecognised requests - - This is a request handler suitable for return from - _get_handler_for_request. It actually just raises an - UnrecognizedRequestError. - - Args: - request: Unused, but passed in to match the signature of ServletCallback. - """ - raise UnrecognizedRequestError(code=404) - - class UnrecognizedRequestResource(resource.Resource): """ Similar to twisted.web.resource.NoResource, but returns a JSON 404 with an diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index 0d072c42a7..c134ccfb3d 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -15,7 +15,7 @@ import logging from http import HTTPStatus -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING, Optional, Tuple from synapse.api.constants import Direction from synapse.api.errors import Codes, NotFoundError, SynapseError @@ -285,7 +285,12 @@ class DeleteMediaByDateSize(RestServlet): timestamp and size. """ - PATTERNS = admin_patterns("/media/(?P[^/]*)/delete$") + PATTERNS = [ + *admin_patterns("/media/delete$"), + # This URL kept around for legacy reasons, it is undesirable since it + # overlaps with the DeleteMediaByID servlet. + *admin_patterns("/media/(?P[^/]*)/delete$"), + ] def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main @@ -294,7 +299,7 @@ class DeleteMediaByDateSize(RestServlet): self.media_repository = hs.get_media_repository() async def on_POST( - self, request: SynapseRequest, server_name: str + self, request: SynapseRequest, server_name: Optional[str] = None ) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) @@ -322,7 +327,8 @@ class DeleteMediaByDateSize(RestServlet): errcode=Codes.INVALID_PARAM, ) - if self.server_name != server_name: + # This check is useless, we keep it for the legacy endpoint only. + if server_name is not None and self.server_name != server_name: raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only delete local media") logging.info( @@ -489,6 +495,8 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer) ProtectMediaByID(hs).register(http_server) UnprotectMediaByID(hs).register(http_server) ListMediaInRoom(hs).register(http_server) - DeleteMediaByID(hs).register(http_server) + # XXX DeleteMediaByDateSize must be registered before DeleteMediaByID as + # their URL routes overlap. DeleteMediaByDateSize(hs).register(http_server) + DeleteMediaByID(hs).register(http_server) UserMediaRestServlet(hs).register(http_server) diff --git a/synapse/rest/client/room_keys.py b/synapse/rest/client/room_keys.py index f7081f638e..4e7ffdb555 100644 --- a/synapse/rest/client/room_keys.py +++ b/synapse/rest/client/room_keys.py @@ -259,6 +259,32 @@ class RoomKeysNewVersionServlet(RestServlet): self.auth = hs.get_auth() self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler() + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + """ + Retrieve the version information about the most current backup version (if any) + + It takes out an exclusive lock on this user's room_key backups, to ensure + clients only upload to the current backup. + + Returns 404 if the given version does not exist. + + GET /room_keys/version HTTP/1.1 + { + "version": "12345", + "algorithm": "m.megolm_backup.v1", + "auth_data": "dGhpcyBzaG91bGQgYWN0dWFsbHkgYmUgZW5jcnlwdGVkIGpzb24K" + } + """ + requester = await self.auth.get_user_by_req(request, allow_guest=False) + user_id = requester.user.to_string() + + try: + info = await self.e2e_room_keys_handler.get_version_info(user_id) + except SynapseError as e: + if e.code == 404: + raise SynapseError(404, "No backup found", Codes.NOT_FOUND) + return 200, info + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: """ Create a new backup version for this user's room_keys with the given @@ -301,7 +327,7 @@ class RoomKeysNewVersionServlet(RestServlet): class RoomKeysVersionServlet(RestServlet): - PATTERNS = client_patterns("/room_keys/version(/(?P[^/]+))?$") + PATTERNS = client_patterns("/room_keys/version/(?P[^/]+)$") def __init__(self, hs: "HomeServer"): super().__init__() @@ -309,12 +335,11 @@ class RoomKeysVersionServlet(RestServlet): self.e2e_room_keys_handler = hs.get_e2e_room_keys_handler() async def on_GET( - self, request: SynapseRequest, version: Optional[str] + self, request: SynapseRequest, version: str ) -> Tuple[int, JsonDict]: """ Retrieve the version information about a given version of the user's - room_keys backup. If the version part is missing, returns info about the - most current backup version (if any) + room_keys backup. It takes out an exclusive lock on this user's room_key backups, to ensure clients only upload to the current backup. @@ -339,20 +364,16 @@ class RoomKeysVersionServlet(RestServlet): return 200, info async def on_DELETE( - self, request: SynapseRequest, version: Optional[str] + self, request: SynapseRequest, version: str ) -> Tuple[int, JsonDict]: """ Delete the information about a given version of the user's - room_keys backup. If the version part is missing, deletes the most - current backup version (if any). Doesn't delete the actual room data. + room_keys backup. Doesn't delete the actual room data. DELETE /room_keys/version/12345 HTTP/1.1 HTTP/1.1 200 OK {} """ - if version is None: - raise SynapseError(400, "No version specified to delete", Codes.NOT_FOUND) - requester = await self.auth.get_user_by_req(request, allow_guest=False) user_id = requester.user.to_string() @@ -360,7 +381,7 @@ class RoomKeysVersionServlet(RestServlet): return 200, {} async def on_PUT( - self, request: SynapseRequest, version: Optional[str] + self, request: SynapseRequest, version: str ) -> Tuple[int, JsonDict]: """ Update the information about a given version of the user's room_keys backup. @@ -386,11 +407,6 @@ class RoomKeysVersionServlet(RestServlet): user_id = requester.user.to_string() info = parse_json_object_from_request(request) - if version is None: - raise SynapseError( - 400, "No version specified to update", Codes.MISSING_PARAM - ) - await self.e2e_room_keys_handler.update_version(user_id, version, info) return 200, {} diff --git a/synapse/rest/client/tags.py b/synapse/rest/client/tags.py index ca638755c7..dde08417a4 100644 --- a/synapse/rest/client/tags.py +++ b/synapse/rest/client/tags.py @@ -34,7 +34,9 @@ class TagListServlet(RestServlet): GET /user/{user_id}/rooms/{room_id}/tags HTTP/1.1 """ - PATTERNS = client_patterns("/user/(?P[^/]*)/rooms/(?P[^/]*)/tags") + PATTERNS = client_patterns( + "/user/(?P[^/]*)/rooms/(?P[^/]*)/tags$" + ) def __init__(self, hs: "HomeServer"): super().__init__() diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index aadb31ca83..db77a45ae3 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -213,7 +213,8 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): self.admin_user_tok = self.login("admin", "pass") self.filepaths = MediaFilePaths(hs.config.media.media_store_path) - self.url = "/_synapse/admin/v1/media/%s/delete" % self.server_name + self.url = "/_synapse/admin/v1/media/delete" + self.legacy_url = "/_synapse/admin/v1/media/%s/delete" % self.server_name # Move clock up to somewhat realistic time self.reactor.advance(1000000000) @@ -332,11 +333,13 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): channel.json_body["error"], ) - def test_delete_media_never_accessed(self) -> None: + @parameterized.expand([(True,), (False,)]) + def test_delete_media_never_accessed(self, use_legacy_url: bool) -> None: """ Tests that media deleted if it is older than `before_ts` and never accessed `last_access_ts` is `NULL` and `created_ts` < `before_ts` """ + url = self.legacy_url if use_legacy_url else self.url # upload and do not access server_and_media_id = self._create_media() @@ -351,7 +354,7 @@ class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): now_ms = self.clock.time_msec() channel = self.make_request( "POST", - self.url + "?before_ts=" + str(now_ms), + url + "?before_ts=" + str(now_ms), access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) -- cgit 1.5.1 From c1d2ce2901ab1c7cfaeebb4683af05a2ebf19fa6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 9 Feb 2023 19:57:01 +0000 Subject: Do not always start a db txn on Postgres (#14840) --- changelog.d/14840.misc | 1 + synapse/storage/prepare_database.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14840.misc (limited to 'synapse') diff --git a/changelog.d/14840.misc b/changelog.d/14840.misc new file mode 100644 index 0000000000..ff6084284a --- /dev/null +++ b/changelog.d/14840.misc @@ -0,0 +1 @@ +Prevent "WARNING: there is already a transaction in progress" lines appearing in PostgreSQL's logs on some occasions. \ No newline at end of file diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 3acdb39da7..6c335a9315 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -23,7 +23,7 @@ from typing_extensions import Counter as CounterType from synapse.config.homeserver import HomeServerConfig from synapse.storage.database import LoggingDatabaseConnection -from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine +from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.schema import SCHEMA_COMPAT_VERSION, SCHEMA_VERSION from synapse.storage.types import Cursor @@ -108,9 +108,14 @@ def prepare_database( # so we start one before running anything. This ensures that any upgrades # are either applied completely, or not at all. # - # (psycopg2 automatically starts a transaction as soon as we run any statements - # at all, so this is redundant but harmless there.) - cur.execute("BEGIN TRANSACTION") + # psycopg2 does not automatically start transactions when in autocommit mode. + # While it is technically harmless to nest transactions in postgres, doing so + # results in a warning in Postgres' logs per query. And we'd rather like to + # avoid doing that. + if isinstance(database_engine, Sqlite3Engine) or ( + isinstance(database_engine, PostgresEngine) and db_conn.autocommit + ): + cur.execute("BEGIN TRANSACTION") logger.info("%r: Checking existing schema version", databases) version_info = _get_or_create_schema_state(cur, database_engine) -- cgit 1.5.1 From 03bccd542bcffe3ea12cd35108740a7d62dd38ab Mon Sep 17 00:00:00 2001 From: Shay Date: Thu, 9 Feb 2023 13:05:02 -0800 Subject: Add a class UnpersistedEventContext to allow for the batching up of storing state groups (#14675) * add class UnpersistedEventContext * modify create new client event to create unpersistedeventcontexts * persist event contexts after creation * fix tests to persist unpersisted event contexts * cleanup * misc lints + cleanup * changelog + fix comments * lints * fix batch insertion? * reduce redundant calculation * add unpersisted event classes * rework compute_event_context, split into function that returns unpersisted event context and then persists it * use calculate_context_info to create unpersisted event contexts * update typing * $%#^&* * black * fix comments and consolidate classes, use attr.s for class * requested changes * lint * requested changes * requested changes * refactor to be stupidly explicit * clearer renaming and flow * make partial state non-optional * update docstrings --------- Co-authored-by: Erik Johnston --- changelog.d/14675.misc | 1 + synapse/events/snapshot.py | 174 ++++++++++++++++++++++++++++++++- synapse/events/third_party_rules.py | 6 +- synapse/handlers/federation.py | 59 ++++++++---- synapse/handlers/federation_event.py | 6 +- synapse/handlers/message.py | 42 +++++--- synapse/state/__init__.py | 176 ++++++++++++++-------------------- tests/handlers/test_user_directory.py | 4 +- tests/rest/admin/test_user.py | 4 +- tests/storage/test_redaction.py | 24 +++-- tests/storage/test_state.py | 4 +- tests/test_utils/event_injection.py | 7 +- tests/test_visibility.py | 9 +- tests/utils.py | 5 +- 14 files changed, 359 insertions(+), 162 deletions(-) create mode 100644 changelog.d/14675.misc (limited to 'synapse') diff --git a/changelog.d/14675.misc b/changelog.d/14675.misc new file mode 100644 index 0000000000..bc1ac1c82a --- /dev/null +++ b/changelog.d/14675.misc @@ -0,0 +1 @@ +Add a class UnpersistedEventContext to allow for the batching up of storing state groups. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 6eaef8b57a..e0d82ad81c 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from abc import ABC, abstractmethod from typing import TYPE_CHECKING, List, Optional, Tuple import attr @@ -26,8 +27,51 @@ if TYPE_CHECKING: from synapse.types.state import StateFilter +class UnpersistedEventContextBase(ABC): + """ + This is a base class for EventContext and UnpersistedEventContext, objects which + hold information relevant to storing an associated event. Note that an + UnpersistedEventContexts must be converted into an EventContext before it is + suitable to send to the db with its associated event. + + Attributes: + _storage: storage controllers for interfacing with the database + app_service: If the associated event is being sent by a (local) application service, that + app service. + """ + + def __init__(self, storage_controller: "StorageControllers"): + self._storage: "StorageControllers" = storage_controller + self.app_service: Optional[ApplicationService] = None + + @abstractmethod + async def persist( + self, + event: EventBase, + ) -> "EventContext": + """ + A method to convert an UnpersistedEventContext to an EventContext, suitable for + sending to the database with the associated event. + """ + pass + + @abstractmethod + async def get_prev_state_ids( + self, state_filter: Optional["StateFilter"] = None + ) -> StateMap[str]: + """ + Gets the room state at the event (ie not including the event if the event is a + state event). + + Args: + state_filter: specifies the type of state event to fetch from DB, example: + EventTypes.JoinRules + """ + pass + + @attr.s(slots=True, auto_attribs=True) -class EventContext: +class EventContext(UnpersistedEventContextBase): """ Holds information relevant to persisting an event @@ -77,9 +121,6 @@ class EventContext: delta_ids: If ``prev_group`` is not None, the state delta between ``prev_group`` and ``state_group``. - app_service: If this event is being sent by a (local) application service, that - app service. - partial_state: if True, we may be storing this event with a temporary, incomplete state. """ @@ -122,6 +163,9 @@ class EventContext: """Return an EventContext instance suitable for persisting an outlier event""" return EventContext(storage=storage) + async def persist(self, event: EventBase) -> "EventContext": + return self + async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` @@ -254,6 +298,128 @@ class EventContext: ) +@attr.s(slots=True, auto_attribs=True) +class UnpersistedEventContext(UnpersistedEventContextBase): + """ + The event context holds information about the state groups for an event. It is important + to remember that an event technically has two state groups: the state group before the + event, and the state group after the event. If the event is not a state event, the state + group will not change (ie the state group before the event will be the same as the state + group after the event), but if it is a state event the state group before the event + will differ from the state group after the event. + This is a version of an EventContext before the new state group (if any) has been + computed and stored. It contains information about the state before the event (which + also may be the information after the event, if the event is not a state event). The + UnpersistedEventContext must be converted into an EventContext by calling the method + 'persist' on it before it is suitable to be sent to the DB for processing. + + state_group_after_event: + The state group after the event. This will always be None until it is persisted. + If the event is not a state event, this will be the same as + state_group_before_event. + + state_group_before_event: + The ID of the state group representing the state of the room before this event. + + state_delta_due_to_event: + If the event is a state event, then this is the delta of the state between + `state_group` and `state_group_before_event` + + prev_group_for_state_group_before_event: + If it is known, ``state_group_before_event``'s previous state group. + + delta_ids_to_state_group_before_event: + If ``prev_group_for_state_group_before_event`` is not None, the state delta + between ``prev_group_for_state_group_before_event`` and ``state_group_before_event``. + + partial_state: + Whether the event has partial state. + + state_map_before_event: + A map of the state before the event, i.e. the state at `state_group_before_event` + """ + + _storage: "StorageControllers" + state_group_before_event: Optional[int] + state_group_after_event: Optional[int] + state_delta_due_to_event: Optional[dict] + prev_group_for_state_group_before_event: Optional[int] + delta_ids_to_state_group_before_event: Optional[StateMap[str]] + partial_state: bool + state_map_before_event: Optional[StateMap[str]] = None + + async def get_prev_state_ids( + self, state_filter: Optional["StateFilter"] = None + ) -> StateMap[str]: + """ + Gets the room state map, excluding this event. + + Args: + state_filter: specifies the type of state event to fetch from DB + + Returns: + Maps a (type, state_key) to the event ID of the state event matching + this tuple. + """ + if self.state_map_before_event: + return self.state_map_before_event + + assert self.state_group_before_event is not None + return await self._storage.state.get_state_ids_for_group( + self.state_group_before_event, state_filter + ) + + async def persist(self, event: EventBase) -> EventContext: + """ + Creates a full `EventContext` for the event, persisting any referenced state that + has not yet been persisted. + + Args: + event: event that the EventContext is associated with. + + Returns: An EventContext suitable for sending to the database with the event + for persisting + """ + assert self.partial_state is not None + + # If we have a full set of state for before the event but don't have a state + # group for that state, we need to get one + if self.state_group_before_event is None: + assert self.state_map_before_event + state_group_before_event = await self._storage.state.store_state_group( + event.event_id, + event.room_id, + prev_group=self.prev_group_for_state_group_before_event, + delta_ids=self.delta_ids_to_state_group_before_event, + current_state_ids=self.state_map_before_event, + ) + self.state_group_before_event = state_group_before_event + + # if the event isn't a state event the state group doesn't change + if not self.state_delta_due_to_event: + state_group_after_event = self.state_group_before_event + + # otherwise if it is a state event we need to get a state group for it + else: + state_group_after_event = await self._storage.state.store_state_group( + event.event_id, + event.room_id, + prev_group=self.state_group_before_event, + delta_ids=self.state_delta_due_to_event, + current_state_ids=None, + ) + + return EventContext.with_state( + storage=self._storage, + state_group=state_group_after_event, + state_group_before_event=self.state_group_before_event, + state_delta_due_to_event=self.state_delta_due_to_event, + partial_state=self.partial_state, + prev_group=self.state_group_before_event, + delta_ids=self.state_delta_due_to_event, + ) + + def _encode_state_dict( state_dict: Optional[StateMap[str]], ) -> Optional[List[Tuple[str, str, str]]]: diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 72ab696898..97c61cc258 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -18,7 +18,7 @@ from twisted.internet.defer import CancelledError from synapse.api.errors import ModuleFailedException, SynapseError from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import UnpersistedEventContextBase from synapse.storage.roommember import ProfileInfo from synapse.types import Requester, StateMap from synapse.util.async_helpers import delay_cancellation, maybe_awaitable @@ -231,7 +231,9 @@ class ThirdPartyEventRules: self._on_threepid_bind_callbacks.append(on_threepid_bind) async def check_event_allowed( - self, event: EventBase, context: EventContext + self, + event: EventBase, + context: UnpersistedEventContextBase, ) -> Tuple[bool, Optional[dict]]: """Check if a provided event should be allowed in the given context. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 7f64130e0a..43ed4a3dd1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -56,7 +56,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion from synapse.crypto.event_signing import compute_event_signature from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import EventContext, UnpersistedEventContextBase from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict @@ -990,7 +990,10 @@ class FederationHandler: ) try: - event, context = await self.event_creation_handler.create_new_client_event( + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_new_client_event( builder=builder ) except SynapseError as e: @@ -998,7 +1001,9 @@ class FederationHandler: raise # Ensure the user can even join the room. - await self._federation_event_handler.check_join_restrictions(context, event) + await self._federation_event_handler.check_join_restrictions( + unpersisted_context, event + ) # The remote hasn't signed it yet, obviously. We'll do the full checks # when we get the event back in `on_send_join_request` @@ -1178,7 +1183,7 @@ class FederationHandler: }, ) - event, context = await self.event_creation_handler.create_new_client_event( + event, _ = await self.event_creation_handler.create_new_client_event( builder=builder ) @@ -1228,12 +1233,13 @@ class FederationHandler: }, ) - event, context = await self.event_creation_handler.create_new_client_event( - builder=builder - ) + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_new_client_event(builder=builder) event_allowed, _ = await self.third_party_event_rules.check_event_allowed( - event, context + event, unpersisted_context ) if not event_allowed: logger.warning("Creation of knock %s forbidden by third-party rules", event) @@ -1406,15 +1412,20 @@ class FederationHandler: try: ( event, - context, + unpersisted_context, ) = await self.event_creation_handler.create_new_client_event( builder=builder ) - event, context = await self.add_display_name_to_third_party_invite( - room_version_obj, event_dict, event, context + ( + event, + unpersisted_context, + ) = await self.add_display_name_to_third_party_invite( + room_version_obj, event_dict, event, unpersisted_context ) + context = await unpersisted_context.persist(event) + EventValidator().validate_new(event, self.config) # We need to tell the transaction queue to send this out, even @@ -1483,14 +1494,19 @@ class FederationHandler: try: ( event, - context, + unpersisted_context, ) = await self.event_creation_handler.create_new_client_event( builder=builder ) - event, context = await self.add_display_name_to_third_party_invite( - room_version_obj, event_dict, event, context + ( + event, + unpersisted_context, + ) = await self.add_display_name_to_third_party_invite( + room_version_obj, event_dict, event, unpersisted_context ) + context = await unpersisted_context.persist(event) + try: validate_event_for_room_version(event) await self._event_auth_handler.check_auth_rules_from_context(event) @@ -1522,8 +1538,8 @@ class FederationHandler: room_version_obj: RoomVersion, event_dict: JsonDict, event: EventBase, - context: EventContext, - ) -> Tuple[EventBase, EventContext]: + context: UnpersistedEventContextBase, + ) -> Tuple[EventBase, UnpersistedEventContextBase]: key = ( EventTypes.ThirdPartyInvite, event.content["third_party_invite"]["signed"]["token"], @@ -1557,11 +1573,14 @@ class FederationHandler: room_version_obj, event_dict ) EventValidator().validate_builder(builder) - event, context = await self.event_creation_handler.create_new_client_event( - builder=builder - ) + + ( + event, + unpersisted_context, + ) = await self.event_creation_handler.create_new_client_event(builder=builder) + EventValidator().validate_new(event, self.config) - return event, context + return event, unpersisted_context async def _check_signature(self, event: EventBase, context: EventContext) -> None: """ diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index e037acbca2..3561f2f1de 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -58,7 +58,7 @@ from synapse.event_auth import ( validate_event_for_room_version, ) from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import EventContext, UnpersistedEventContextBase from synapse.federation.federation_client import InvalidResponseError, PulledPduInfo from synapse.logging.context import nested_logging_context from synapse.logging.opentracing import ( @@ -426,7 +426,9 @@ class FederationEventHandler: return event, context async def check_join_restrictions( - self, context: EventContext, event: EventBase + self, + context: UnpersistedEventContextBase, + event: EventBase, ) -> None: """Check that restrictions in restricted join rules are matched diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5f6da2943f..3e30f52e4d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -48,7 +48,7 @@ from synapse.api.urls import ConsentURIBuilder from synapse.event_auth import validate_event_for_room_version from synapse.events import EventBase, relation_from_event from synapse.events.builder import EventBuilder -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import EventContext, UnpersistedEventContextBase from synapse.events.utils import maybe_upsert_event_field from synapse.events.validator import EventValidator from synapse.handlers.directory import DirectoryHandler @@ -708,7 +708,7 @@ class EventCreationHandler: builder.internal_metadata.historical = historical - event, context = await self.create_new_client_event( + event, unpersisted_context = await self.create_new_client_event( builder=builder, requester=requester, allow_no_prev_events=allow_no_prev_events, @@ -721,6 +721,8 @@ class EventCreationHandler: current_state_group=current_state_group, ) + context = await unpersisted_context.persist(event) + # In an ideal world we wouldn't need the second part of this condition. However, # this behaviour isn't spec'd yet, meaning we should be able to deactivate this # behaviour. Another reason is that this code is also evaluated each time a new @@ -1083,13 +1085,14 @@ class EventCreationHandler: state_map: Optional[StateMap[str]] = None, for_batch: bool = False, current_state_group: Optional[int] = None, - ) -> Tuple[EventBase, EventContext]: + ) -> Tuple[EventBase, UnpersistedEventContextBase]: """Create a new event for a local client. If bool for_batch is true, will create an event using the prev_event_ids, and will create an event context for the event using the parameters state_map and current_state_group, thus these parameters must be provided in this case if for_batch is True. The subsequently created event and context are suitable for being batched up and bulk persisted to the database - with other similarly created events. + with other similarly created events. Note that this returns an UnpersistedEventContext, + which must be converted to an EventContext before it can be sent to the DB. Args: builder: @@ -1131,7 +1134,7 @@ class EventCreationHandler: batch persisting Returns: - Tuple of created event, context + Tuple of created event, UnpersistedEventContext """ # Strip down the state_event_ids to only what we need to auth the event. # For example, we don't need extra m.room.member that don't match event.sender @@ -1192,9 +1195,16 @@ class EventCreationHandler: event = await builder.build( prev_event_ids=prev_event_ids, auth_event_ids=auth_ids, depth=depth ) - context = await self.state.compute_event_context_for_batched( - event, state_map, current_state_group + + context: UnpersistedEventContextBase = ( + await self.state.calculate_context_info( + event, + state_ids_before_event=state_map, + partial_state=False, + state_group_before_event=current_state_group, + ) ) + else: event = await builder.build( prev_event_ids=prev_event_ids, @@ -1244,16 +1254,17 @@ class EventCreationHandler: state_map_for_event[(data.event_type, data.state_key)] = state_id - context = await self.state.compute_event_context( + # TODO(faster_joins): check how MSC2716 works and whether we can have + # partial state here + # https://github.com/matrix-org/synapse/issues/13003 + context = await self.state.calculate_context_info( event, state_ids_before_event=state_map_for_event, - # TODO(faster_joins): check how MSC2716 works and whether we can have - # partial state here - # https://github.com/matrix-org/synapse/issues/13003 partial_state=False, ) + else: - context = await self.state.compute_event_context(event) + context = await self.state.calculate_context_info(event) if requester: context.app_service = requester.app_service @@ -2082,9 +2093,9 @@ class EventCreationHandler: async def _rebuild_event_after_third_party_rules( self, third_party_result: dict, original_event: EventBase - ) -> Tuple[EventBase, EventContext]: + ) -> Tuple[EventBase, UnpersistedEventContextBase]: # the third_party_event_rules want to replace the event. - # we do some basic checks, and then return the replacement event and context. + # we do some basic checks, and then return the replacement event. # Construct a new EventBuilder and validate it, which helps with the # rest of these checks. @@ -2138,5 +2149,6 @@ class EventCreationHandler: # we rebuild the event context, to be on the safe side. If nothing else, # delta_ids might need an update. - context = await self.state.compute_event_context(event) + context = await self.state.calculate_context_info(event) + return event, context diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index fdfb46ab82..e877e6f1a1 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -39,7 +39,11 @@ from prometheus_client import Counter, Histogram from synapse.api.constants import EventTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions from synapse.events import EventBase -from synapse.events.snapshot import EventContext +from synapse.events.snapshot import ( + EventContext, + UnpersistedEventContext, + UnpersistedEventContextBase, +) from synapse.logging.context import ContextResourceUsage from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 @@ -262,31 +266,31 @@ class StateHandler: state = await entry.get_state(self._state_storage_controller, StateFilter.all()) return await self.store.get_joined_hosts(room_id, state, entry) - async def compute_event_context( + async def calculate_context_info( self, event: EventBase, state_ids_before_event: Optional[StateMap[str]] = None, partial_state: Optional[bool] = None, - ) -> EventContext: - """Build an EventContext structure for a non-outlier event. - - (for an outlier, call EventContext.for_outlier directly) - - This works out what the current state should be for the event, and - generates a new state group if necessary. - - Args: - event: - state_ids_before_event: The event ids of the state before the event if - it can't be calculated from existing events. This is normally - only specified when receiving an event from federation where we - don't have the prev events, e.g. when backfilling. - partial_state: - `True` if `state_ids_before_event` is partial and omits non-critical - membership events. - `False` if `state_ids_before_event` is the full state. - `None` when `state_ids_before_event` is not provided. In this case, the - flag will be calculated based on `event`'s prev events. + state_group_before_event: Optional[int] = None, + ) -> UnpersistedEventContextBase: + """ + Calulates the contents of an unpersisted event context, other than the current + state group (which is either provided or calculated when the event context is persisted) + + state_ids_before_event: + The event ids of the full state before the event if + it can't be calculated from existing events. This is normally + only specified when receiving an event from federation where we + don't have the prev events, e.g. when backfilling or when the event + is being created for batch persisting. + partial_state: + `True` if `state_ids_before_event` is partial and omits non-critical + membership events. + `False` if `state_ids_before_event` is the full state. + `None` when `state_ids_before_event` is not provided. In this case, the + flag will be calculated based on `event`'s prev events. + state_group_before_event: + the current state group at the time of event, if known Returns: The event context. @@ -294,7 +298,6 @@ class StateHandler: RuntimeError if `state_ids_before_event` is not provided and one or more prev events are missing or outliers. """ - assert not event.internal_metadata.is_outlier() # @@ -306,17 +309,6 @@ class StateHandler: state_group_before_event_prev_group = None deltas_to_state_group_before_event = None - # .. though we need to get a state group for it. - state_group_before_event = ( - await self._state_storage_controller.store_state_group( - event.event_id, - event.room_id, - prev_group=None, - delta_ids=None, - current_state_ids=state_ids_before_event, - ) - ) - # the partial_state flag must be provided assert partial_state is not None else: @@ -345,6 +337,7 @@ class StateHandler: logger.debug("calling resolve_state_groups from compute_event_context") # we've already taken into account partial state, so no need to wait for # complete state here. + entry = await self.resolve_state_groups_for_events( event.room_id, event.prev_event_ids(), @@ -383,18 +376,19 @@ class StateHandler: # if not event.is_state(): - return EventContext.with_state( + return UnpersistedEventContext( storage=self._storage_controllers, state_group_before_event=state_group_before_event, - state_group=state_group_before_event, + state_group_after_event=state_group_before_event, state_delta_due_to_event={}, - prev_group=state_group_before_event_prev_group, - delta_ids=deltas_to_state_group_before_event, + prev_group_for_state_group_before_event=state_group_before_event_prev_group, + delta_ids_to_state_group_before_event=deltas_to_state_group_before_event, partial_state=partial_state, + state_map_before_event=state_ids_before_event, ) # - # otherwise, we'll need to create a new state group for after the event + # otherwise, we'll need to set up creating a new state group for after the event # key = (event.type, event.state_key) @@ -412,88 +406,60 @@ class StateHandler: delta_ids = {key: event.event_id} - state_group_after_event = ( - await self._state_storage_controller.store_state_group( - event.event_id, - event.room_id, - prev_group=state_group_before_event, - delta_ids=delta_ids, - current_state_ids=None, - ) - ) - - return EventContext.with_state( + return UnpersistedEventContext( storage=self._storage_controllers, - state_group=state_group_after_event, state_group_before_event=state_group_before_event, + state_group_after_event=None, state_delta_due_to_event=delta_ids, - prev_group=state_group_before_event, - delta_ids=delta_ids, + prev_group_for_state_group_before_event=state_group_before_event_prev_group, + delta_ids_to_state_group_before_event=deltas_to_state_group_before_event, partial_state=partial_state, + state_map_before_event=state_ids_before_event, ) - async def compute_event_context_for_batched( + async def compute_event_context( self, event: EventBase, - state_ids_before_event: StateMap[str], - current_state_group: int, + state_ids_before_event: Optional[StateMap[str]] = None, + partial_state: Optional[bool] = None, ) -> EventContext: - """ - Generate an event context for an event that has not yet been persisted to the - database. Intended for use with events that are created to be persisted in a batch. - Args: - event: the event the context is being computed for - state_ids_before_event: a state map consisting of the state ids of the events - created prior to this event. - current_state_group: the current state group before the event. - """ - state_group_before_event_prev_group = None - deltas_to_state_group_before_event = None - - state_group_before_event = current_state_group - - # if the event is not state, we are set - if not event.is_state(): - return EventContext.with_state( - storage=self._storage_controllers, - state_group_before_event=state_group_before_event, - state_group=state_group_before_event, - state_delta_due_to_event={}, - prev_group=state_group_before_event_prev_group, - delta_ids=deltas_to_state_group_before_event, - partial_state=False, - ) + """Build an EventContext structure for a non-outlier event. - # otherwise, we'll need to create a new state group for after the event - key = (event.type, event.state_key) + (for an outlier, call EventContext.for_outlier directly) - if state_ids_before_event is not None: - replaces = state_ids_before_event.get(key) + This works out what the current state should be for the event, and + generates a new state group if necessary. - if replaces and replaces != event.event_id: - event.unsigned["replaces_state"] = replaces + Args: + event: + state_ids_before_event: The event ids of the state before the event if + it can't be calculated from existing events. This is normally + only specified when receiving an event from federation where we + don't have the prev events, e.g. when backfilling. + partial_state: + `True` if `state_ids_before_event` is partial and omits non-critical + membership events. + `False` if `state_ids_before_event` is the full state. + `None` when `state_ids_before_event` is not provided. In this case, the + flag will be calculated based on `event`'s prev events. + entry: + A state cache entry for the resolved state across the prev events. We may + have already calculated this, so if it's available pass it in + Returns: + The event context. - delta_ids = {key: event.event_id} + Raises: + RuntimeError if `state_ids_before_event` is not provided and one or more + prev events are missing or outliers. + """ - state_group_after_event = ( - await self._state_storage_controller.store_state_group( - event.event_id, - event.room_id, - prev_group=state_group_before_event, - delta_ids=delta_ids, - current_state_ids=None, - ) + unpersisted_context = await self.calculate_context_info( + event=event, + state_ids_before_event=state_ids_before_event, + partial_state=partial_state, ) - return EventContext.with_state( - storage=self._storage_controllers, - state_group=state_group_after_event, - state_group_before_event=state_group_before_event, - state_delta_due_to_event=delta_ids, - prev_group=state_group_before_event, - delta_ids=delta_ids, - partial_state=False, - ) + return await unpersisted_context.persist(event) @measure_func() async def resolve_state_groups_for_events( diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 75fc5a17a4..e9be5fb504 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -949,10 +949,12 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success( self.hs.get_storage_controllers().persistence.persist_event(event, context) ) diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 5c1ced355f..b50406e129 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -2934,10 +2934,12 @@ class UserMembershipRestTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success(storage_controllers.persistence.persist_event(event, context)) # Now get rooms diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index df4740f9d9..0100f7da14 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -74,10 +74,12 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success(self._persistence.persist_event(event, context)) return event @@ -96,10 +98,12 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success(self._persistence.persist_event(event, context)) return event @@ -119,10 +123,12 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + self.get_success(self._persistence.persist_event(event, context)) return event @@ -259,7 +265,7 @@ class RedactionTestCase(unittest.HomeserverTestCase): def internal_metadata(self) -> _EventInternalMetadata: return self._base_builder.internal_metadata - event_1, context_1 = self.get_success( + event_1, unpersisted_context_1 = self.get_success( self.event_creation_handler.create_new_client_event( cast( EventBuilder, @@ -280,9 +286,11 @@ class RedactionTestCase(unittest.HomeserverTestCase): ) ) + context_1 = self.get_success(unpersisted_context_1.persist(event_1)) + self.get_success(self._persistence.persist_event(event_1, context_1)) - event_2, context_2 = self.get_success( + event_2, unpersisted_context_2 = self.get_success( self.event_creation_handler.create_new_client_event( cast( EventBuilder, @@ -302,6 +310,8 @@ class RedactionTestCase(unittest.HomeserverTestCase): ) ) ) + + context_2 = self.get_success(unpersisted_context_2.persist(event_2)) self.get_success(self._persistence.persist_event(event_2, context_2)) # fetch one of the redactions @@ -421,10 +431,12 @@ class RedactionTestCase(unittest.HomeserverTestCase): }, ) - redaction_event, context = self.get_success( + redaction_event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(redaction_event)) + self.get_success(self._persistence.persist_event(redaction_event, context)) # Now lets jump to the future where we have censored the redaction event diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index bad7f0bc60..f730b888f7 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -67,10 +67,12 @@ class StateStoreTestCase(HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) + assert self.storage.persistence is not None self.get_success(self.storage.persistence.persist_event(event, context)) diff --git a/tests/test_utils/event_injection.py b/tests/test_utils/event_injection.py index 1a50c2acf1..a6330ed840 100644 --- a/tests/test_utils/event_injection.py +++ b/tests/test_utils/event_injection.py @@ -92,8 +92,13 @@ async def create_event( builder = hs.get_event_builder_factory().for_room_version( KNOWN_ROOM_VERSIONS[room_version], kwargs ) - event, context = await hs.get_event_creation_handler().create_new_client_event( + ( + event, + unpersisted_context, + ) = await hs.get_event_creation_handler().create_new_client_event( builder, prev_event_ids=prev_event_ids ) + context = await unpersisted_context.persist(event) + return event, context diff --git a/tests/test_visibility.py b/tests/test_visibility.py index 875e37988f..36d6b37aa4 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -175,9 +175,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( self._storage_controllers.persistence.persist_event(event, context) ) @@ -202,9 +203,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( self._storage_controllers.persistence.persist_event(event, context) @@ -226,9 +228,10 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase): }, ) - event, context = self.get_success( + event, unpersisted_context = self.get_success( self.event_creation_handler.create_new_client_event(builder) ) + context = self.get_success(unpersisted_context.persist(event)) self.get_success( self._storage_controllers.persistence.persist_event(event, context) diff --git a/tests/utils.py b/tests/utils.py index d76bf9716a..15fabbc2d0 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -335,6 +335,9 @@ async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None: }, ) - event, context = await event_creation_handler.create_new_client_event(builder) + event, unpersisted_context = await event_creation_handler.create_new_client_event( + builder + ) + context = await unpersisted_context.persist(event) await persistence_store.persist_event(event, context) -- cgit 1.5.1 From a5a799722db0c33dc61fb2c6c7282ff7e82eb2e9 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 9 Feb 2023 22:33:39 +0000 Subject: Tag federation request spans with the worker name (#15042) * Systematically include worker name as process info * Changelog * don't bother with inner setdefault --- changelog.d/15042.feature | 1 + synapse/api/auth.py | 7 ------- synapse/logging/opentracing.py | 10 +++++++++- 3 files changed, 10 insertions(+), 8 deletions(-) create mode 100644 changelog.d/15042.feature (limited to 'synapse') diff --git a/changelog.d/15042.feature b/changelog.d/15042.feature new file mode 100644 index 0000000000..7a4de89f00 --- /dev/null +++ b/changelog.d/15042.feature @@ -0,0 +1 @@ +Tag opentracing spans for federation requests with the name of the worker serving the request. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 3d7f986ac7..66e869bc2d 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -32,7 +32,6 @@ from synapse.appservice import ApplicationService from synapse.http import get_request_user_agent from synapse.http.site import SynapseRequest from synapse.logging.opentracing import ( - SynapseTags, active_span, force_tracing, start_active_span, @@ -162,12 +161,6 @@ class Auth: parent_span.set_tag( "authenticated_entity", requester.authenticated_entity ) - # We tag the Synapse instance name so that it's an easy jumping - # off point into the logs. Can also be used to filter for an - # instance that is under load. - parent_span.set_tag( - SynapseTags.INSTANCE_NAME, self.hs.get_instance_name() - ) parent_span.set_tag("user_id", requester.user.to_string()) if requester.device_id is not None: parent_span.set_tag("device_id", requester.device_id) diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 8ef9a0dda8..6c7cf1b294 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -466,8 +466,16 @@ def init_tracer(hs: "HomeServer") -> None: STRIP_INSTANCE_NUMBER_SUFFIX_REGEX, "", hs.get_instance_name() ) + jaeger_config = hs.config.tracing.jaeger_config + tags = jaeger_config.setdefault("tags", {}) + + # tag the Synapse instance name so that it's an easy jumping + # off point into the logs. Can also be used to filter for an + # instance that is under load. + tags[SynapseTags.INSTANCE_NAME] = hs.get_instance_name() + config = JaegerConfig( - config=hs.config.tracing.jaeger_config, + config=jaeger_config, service_name=f"{hs.config.server.server_name} {instance_name_by_type}", scope_manager=LogContextScopeManager(), metrics_factory=PrometheusMetricsFactory(), -- cgit 1.5.1 From fd296b7343f2e557519f1ec81325ad836bcbdbf9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 10 Feb 2023 10:52:35 +0100 Subject: Fix exception on start up about device lists (#15041) Fixes #15010. --- changelog.d/15041.misc | 1 + synapse/storage/databases/main/devices.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/15041.misc (limited to 'synapse') diff --git a/changelog.d/15041.misc b/changelog.d/15041.misc new file mode 100644 index 0000000000..d602b0043a --- /dev/null +++ b/changelog.d/15041.misc @@ -0,0 +1 @@ +Fix a rare exception in logs on start up. diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index e8b6cc6b80..766c2052fb 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -100,6 +100,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): ("device_lists_outbound_pokes", "stream_id"), ("device_lists_changes_in_room", "stream_id"), ("device_lists_remote_pending", "stream_id"), + ("device_lists_changes_converted_stream_position", "stream_id"), ], is_writer=hs.config.worker.worker_app is None, ) -- cgit 1.5.1 From a481fb9f98ad10e5e129bdc7664c59498a7332f6 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 10 Feb 2023 08:09:47 -0500 Subject: Refactor get_user_devices_from_cache to avoid mutating cached values. (#15040) The previous version of the code could mutate a cached value, but only if the input requested all devices of a user *and* a specific device. To avoid this nonsensical situation we no longer fetch a specific device ID if all of a user's devices are returned. --- changelog.d/15040.misc | 1 + synapse/handlers/e2e_keys.py | 11 +++++++---- synapse/storage/databases/main/devices.py | 31 +++++++++++++++++-------------- 3 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 changelog.d/15040.misc (limited to 'synapse') diff --git a/changelog.d/15040.misc b/changelog.d/15040.misc new file mode 100644 index 0000000000..ca129b64af --- /dev/null +++ b/changelog.d/15040.misc @@ -0,0 +1 @@ +Avoid mutating a cached value in `get_user_devices_from_cache`. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index d2188ca08f..43cbece21b 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -159,19 +159,22 @@ class E2eKeysHandler: # A map of destination -> user ID -> device IDs. remote_queries_not_in_cache: Dict[str, Dict[str, Iterable[str]]] = {} if remote_queries: - query_list: List[Tuple[str, Optional[str]]] = [] + user_ids = set() + user_and_device_ids: List[Tuple[str, str]] = [] for user_id, device_ids in remote_queries.items(): if device_ids: - query_list.extend( + user_and_device_ids.extend( (user_id, device_id) for device_id in device_ids ) else: - query_list.append((user_id, None)) + user_ids.add(user_id) ( user_ids_not_in_cache, remote_results, - ) = await self.store.get_user_devices_from_cache(query_list) + ) = await self.store.get_user_devices_from_cache( + user_ids, user_and_device_ids + ) # Check that the homeserver still shares a room with all cached users. # Note that this check may be slightly racy when a remote user leaves a diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 766c2052fb..85c1778a81 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -746,42 +746,45 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): @trace @cancellable async def get_user_devices_from_cache( - self, query_list: List[Tuple[str, Optional[str]]] + self, user_ids: Set[str], user_and_device_ids: List[Tuple[str, str]] ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]: """Get the devices (and keys if any) for remote users from the cache. Args: - query_list: List of (user_id, device_ids), if device_ids is - falsey then return all device ids for that user. + user_ids: users which should have all device IDs returned + user_and_device_ids: List of (user_id, device_ids) Returns: A tuple of (user_ids_not_in_cache, results_map), where user_ids_not_in_cache is a set of user_ids and results_map is a mapping of user_id -> device_id -> device_info. """ - user_ids = {user_id for user_id, _ in query_list} - user_map = await self.get_device_list_last_stream_id_for_remotes(list(user_ids)) + unique_user_ids = user_ids | {user_id for user_id, _ in user_and_device_ids} + user_map = await self.get_device_list_last_stream_id_for_remotes( + list(unique_user_ids) + ) # We go and check if any of the users need to have their device lists # resynced. If they do then we remove them from the cached list. users_needing_resync = await self.get_user_ids_requiring_device_list_resync( - user_ids + unique_user_ids ) user_ids_in_cache = { user_id for user_id, stream_id in user_map.items() if stream_id } - users_needing_resync - user_ids_not_in_cache = user_ids - user_ids_in_cache + user_ids_not_in_cache = unique_user_ids - user_ids_in_cache + # First fetch all the users which all devices are to be returned. results: Dict[str, Dict[str, JsonDict]] = {} - for user_id, device_id in query_list: - if user_id not in user_ids_in_cache: - continue - - if device_id: + for user_id in user_ids: + if user_id in user_ids_in_cache: + results[user_id] = await self.get_cached_devices_for_user(user_id) + # Then fetch all device-specific requests, but skip users we've already + # fetched all devices for. + for user_id, device_id in user_and_device_ids: + if user_id in user_ids_in_cache and user_id not in user_ids: device = await self._get_cached_user_device(user_id, device_id) results.setdefault(user_id, {})[device_id] = device - else: - results[user_id] = await self.get_cached_devices_for_user(user_id) set_tag("in_cache", str(results)) set_tag("not_in_cache", str(user_ids_not_in_cache)) -- cgit 1.5.1 From b95407908dfde97e483952722b6fa7a533ff5093 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 10 Feb 2023 13:11:20 +0000 Subject: Avoid mutating cached values in `_generate_sync_entry_for_account_data` (#15047) --- changelog.d/15047.misc | 1 + synapse/handlers/sync.py | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/15047.misc (limited to 'synapse') diff --git a/changelog.d/15047.misc b/changelog.d/15047.misc new file mode 100644 index 0000000000..561dc874de --- /dev/null +++ b/changelog.d/15047.misc @@ -0,0 +1 @@ +Avoid mutating cached values in `_generate_sync_entry_for_account_data`. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 3566537894..202b35eee6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1753,6 +1753,7 @@ class SyncHandler: ) if push_rules_changed: + global_account_data = dict(global_account_data) global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) @@ -1763,6 +1764,7 @@ class SyncHandler: account_data_by_room, ) = await self.store.get_account_data_for_user(sync_config.user.to_string()) + global_account_data = dict(global_account_data) global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) -- cgit 1.5.1 From cf5233b783273efc84b991e7242fb4761ccc201a Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 10 Feb 2023 09:22:16 -0500 Subject: Avoid fetching unused account data in sync. (#14973) The per-room account data is no longer unconditionally fetched, even if all rooms will be filtered out. Global account data will not be fetched if it will all be filtered out. --- changelog.d/14973.misc | 1 + synapse/api/filtering.py | 30 +++++- synapse/handlers/account_data.py | 10 +- synapse/handlers/initial_sync.py | 5 +- synapse/handlers/room_member.py | 2 +- synapse/handlers/sync.py | 88 +++++++++-------- synapse/rest/admin/users.py | 3 +- synapse/storage/databases/main/account_data.py | 127 ++++++++++++++++++------- 8 files changed, 176 insertions(+), 90 deletions(-) create mode 100644 changelog.d/14973.misc (limited to 'synapse') diff --git a/changelog.d/14973.misc b/changelog.d/14973.misc new file mode 100644 index 0000000000..3657623602 --- /dev/null +++ b/changelog.d/14973.misc @@ -0,0 +1 @@ +Improve performance of `/sync` in a few situations. diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 83c42fc25a..b9f432cc23 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -219,9 +219,13 @@ class FilterCollection: self._room_timeline_filter = Filter(hs, room_filter_json.get("timeline", {})) self._room_state_filter = Filter(hs, room_filter_json.get("state", {})) self._room_ephemeral_filter = Filter(hs, room_filter_json.get("ephemeral", {})) - self._room_account_data = Filter(hs, room_filter_json.get("account_data", {})) + self._room_account_data_filter = Filter( + hs, room_filter_json.get("account_data", {}) + ) self._presence_filter = Filter(hs, filter_json.get("presence", {})) - self._account_data = Filter(hs, filter_json.get("account_data", {})) + self._global_account_data_filter = Filter( + hs, filter_json.get("account_data", {}) + ) self.include_leave = filter_json.get("room", {}).get("include_leave", False) self.event_fields = filter_json.get("event_fields", []) @@ -256,8 +260,10 @@ class FilterCollection: ) -> List[UserPresenceState]: return await self._presence_filter.filter(presence_states) - async def filter_account_data(self, events: Iterable[JsonDict]) -> List[JsonDict]: - return await self._account_data.filter(events) + async def filter_global_account_data( + self, events: Iterable[JsonDict] + ) -> List[JsonDict]: + return await self._global_account_data_filter.filter(events) async def filter_room_state(self, events: Iterable[EventBase]) -> List[EventBase]: return await self._room_state_filter.filter( @@ -279,7 +285,7 @@ class FilterCollection: async def filter_room_account_data( self, events: Iterable[JsonDict] ) -> List[JsonDict]: - return await self._room_account_data.filter( + return await self._room_account_data_filter.filter( await self._room_filter.filter(events) ) @@ -292,6 +298,13 @@ class FilterCollection: or self._presence_filter.filters_all_senders() ) + def blocks_all_global_account_data(self) -> bool: + """True if all global acount data will be filtered out.""" + return ( + self._global_account_data_filter.filters_all_types() + or self._global_account_data_filter.filters_all_senders() + ) + def blocks_all_room_ephemeral(self) -> bool: return ( self._room_ephemeral_filter.filters_all_types() @@ -299,6 +312,13 @@ class FilterCollection: or self._room_ephemeral_filter.filters_all_rooms() ) + def blocks_all_room_account_data(self) -> bool: + return ( + self._room_account_data_filter.filters_all_types() + or self._room_account_data_filter.filters_all_senders() + or self._room_account_data_filter.filters_all_rooms() + ) + def blocks_all_room_timeline(self) -> bool: return ( self._room_timeline_filter.filters_all_types() diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index 67e789eef7..797de46dbc 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -343,10 +343,12 @@ class AccountDataEventSource(EventSource[int, JsonDict]): } ) - ( - account_data, - room_account_data, - ) = await self.store.get_updated_account_data_for_user(user_id, last_stream_id) + account_data = await self.store.get_updated_global_account_data_for_user( + user_id, last_stream_id + ) + room_account_data = await self.store.get_updated_room_account_data_for_user( + user_id, last_stream_id + ) for account_data_type, content in account_data.items(): results.append({"type": account_data_type, "content": content}) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 191529bd8e..1a29abde98 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -154,9 +154,8 @@ class InitialSyncHandler: tags_by_room = await self.store.get_tags_for_user(user_id) - account_data, account_data_by_room = await self.store.get_account_data_for_user( - user_id - ) + account_data = await self.store.get_global_account_data_for_user(user_id) + account_data_by_room = await self.store.get_room_account_data_for_user(user_id) public_room_ids = await self.store.get_public_room_ids() diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index d236cc09b5..6e7141d2ef 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -484,7 +484,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): user_id: The user's ID. """ # Retrieve user account data for predecessor room - user_account_data, _ = await self.store.get_account_data_for_user(user_id) + user_account_data = await self.store.get_global_account_data_for_user(user_id) # Copy direct message state if applicable direct_rooms = user_account_data.get(AccountDataTypes.DIRECT, {}) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 202b35eee6..399685e5b7 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1444,9 +1444,9 @@ class SyncHandler: logger.debug("Fetching account data") - account_data_by_room = await self._generate_sync_entry_for_account_data( - sync_result_builder - ) + # Global account data is included if it is not filtered out. + if not sync_config.filter_collection.blocks_all_global_account_data(): + await self._generate_sync_entry_for_account_data(sync_result_builder) # Presence data is included if the server has it enabled and not filtered out. include_presence_data = bool( @@ -1472,9 +1472,7 @@ class SyncHandler: ( newly_joined_rooms, newly_left_rooms, - ) = await self._generate_sync_entry_for_rooms( - sync_result_builder, account_data_by_room - ) + ) = await self._generate_sync_entry_for_rooms(sync_result_builder) # Work out which users have joined or left rooms we're in. We use this # to build the presence and device_list parts of the sync response in @@ -1717,35 +1715,29 @@ class SyncHandler: async def _generate_sync_entry_for_account_data( self, sync_result_builder: "SyncResultBuilder" - ) -> Dict[str, Dict[str, JsonDict]]: - """Generates the account data portion of the sync response. + ) -> None: + """Generates the global account data portion of the sync response. Account data (called "Client Config" in the spec) can be set either globally or for a specific room. Account data consists of a list of events which accumulate state, much like a room. - This function retrieves global and per-room account data. The former is written - to the given `sync_result_builder`. The latter is returned directly, to be - later written to the `sync_result_builder` on a room-by-room basis. + This function retrieves global account data and writes it to the given + `sync_result_builder`. See `_generate_sync_entry_for_rooms` for handling + of per-room account data. Args: sync_result_builder - - Returns: - A dictionary whose keys (room ids) map to the per room account data for that - room. """ sync_config = sync_result_builder.sync_config user_id = sync_result_builder.sync_config.user.to_string() since_token = sync_result_builder.since_token if since_token and not sync_result_builder.full_state: - # TODO Do not fetch room account data if it will be unused. - ( - global_account_data, - account_data_by_room, - ) = await self.store.get_updated_account_data_for_user( - user_id, since_token.account_data_key + global_account_data = ( + await self.store.get_updated_global_account_data_for_user( + user_id, since_token.account_data_key + ) ) push_rules_changed = await self.store.have_push_rules_changed_for_user( @@ -1758,28 +1750,26 @@ class SyncHandler: sync_config.user ) else: - # TODO Do not fetch room account data if it will be unused. - ( - global_account_data, - account_data_by_room, - ) = await self.store.get_account_data_for_user(sync_config.user.to_string()) + all_global_account_data = await self.store.get_global_account_data_for_user( + user_id + ) - global_account_data = dict(global_account_data) + global_account_data = dict(all_global_account_data) global_account_data["m.push_rules"] = await self.push_rules_for_user( sync_config.user ) - account_data_for_user = await sync_config.filter_collection.filter_account_data( - [ - {"type": account_data_type, "content": content} - for account_data_type, content in global_account_data.items() - ] + account_data_for_user = ( + await sync_config.filter_collection.filter_global_account_data( + [ + {"type": account_data_type, "content": content} + for account_data_type, content in global_account_data.items() + ] + ) ) sync_result_builder.account_data = account_data_for_user - return account_data_by_room - async def _generate_sync_entry_for_presence( self, sync_result_builder: "SyncResultBuilder", @@ -1839,9 +1829,7 @@ class SyncHandler: sync_result_builder.presence = presence async def _generate_sync_entry_for_rooms( - self, - sync_result_builder: "SyncResultBuilder", - account_data_by_room: Dict[str, Dict[str, JsonDict]], + self, sync_result_builder: "SyncResultBuilder" ) -> Tuple[AbstractSet[str], AbstractSet[str]]: """Generates the rooms portion of the sync response. Populates the `sync_result_builder` with the result. @@ -1852,7 +1840,6 @@ class SyncHandler: Args: sync_result_builder - account_data_by_room: Dictionary of per room account data Returns: Returns a 2-tuple describing rooms the user has joined or left. @@ -1865,9 +1852,30 @@ class SyncHandler: since_token = sync_result_builder.since_token user_id = sync_result_builder.sync_config.user.to_string() + blocks_all_rooms = ( + sync_result_builder.sync_config.filter_collection.blocks_all_rooms() + ) + + # 0. Start by fetching room account data (if required). + if ( + blocks_all_rooms + or sync_result_builder.sync_config.filter_collection.blocks_all_room_account_data() + ): + account_data_by_room: Mapping[str, Mapping[str, JsonDict]] = {} + elif since_token and not sync_result_builder.full_state: + account_data_by_room = ( + await self.store.get_updated_room_account_data_for_user( + user_id, since_token.account_data_key + ) + ) + else: + account_data_by_room = await self.store.get_room_account_data_for_user( + user_id + ) + # 1. Start by fetching all ephemeral events in rooms we've joined (if required). block_all_room_ephemeral = ( - sync_result_builder.sync_config.filter_collection.blocks_all_rooms() + blocks_all_rooms or sync_result_builder.sync_config.filter_collection.blocks_all_room_ephemeral() ) if block_all_room_ephemeral: @@ -2294,7 +2302,7 @@ class SyncHandler: room_builder: "RoomSyncResultBuilder", ephemeral: List[JsonDict], tags: Optional[Dict[str, Dict[str, Any]]], - account_data: Dict[str, JsonDict], + account_data: Mapping[str, JsonDict], always_include: bool = False, ) -> None: """Populates the `joined` and `archived` section of `sync_result_builder` diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index b9dca8ef3a..0c0bf540b9 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -1192,7 +1192,8 @@ class AccountDataRestServlet(RestServlet): if not await self._store.get_user_by_id(user_id): raise NotFoundError("User not found") - global_data, by_room_data = await self._store.get_account_data_for_user(user_id) + global_data = await self._store.get_global_account_data_for_user(user_id) + by_room_data = await self._store.get_room_account_data_for_user(user_id) return HTTPStatus.OK, { "account_data": { "global": global_data, diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 8a359d7eb8..2d6f02c14f 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -21,6 +21,7 @@ from typing import ( FrozenSet, Iterable, List, + Mapping, Optional, Tuple, cast, @@ -122,25 +123,25 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) return self._account_data_id_gen.get_current_token() @cached() - async def get_account_data_for_user( + async def get_global_account_data_for_user( self, user_id: str - ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]: + ) -> Mapping[str, JsonDict]: """ - Get all the client account_data for a user. + Get all the global client account_data for a user. If experimental MSC3391 support is enabled, any entries with an empty content body are excluded; as this means they have been deleted. Args: user_id: The user to get the account_data for. + Returns: - A 2-tuple of a dict of global account_data and a dict mapping from - room_id string to per room account_data dicts. + The global account_data. """ - def get_account_data_for_user_txn( + def get_global_account_data_for_user( txn: LoggingTransaction, - ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]: + ) -> Dict[str, JsonDict]: # The 'content != '{}' condition below prevents us from using # `simple_select_list_txn` here, as it doesn't support conditions # other than 'equals'. @@ -158,10 +159,34 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) txn.execute(sql, (user_id,)) rows = self.db_pool.cursor_to_dict(txn) - global_account_data = { + return { row["account_data_type"]: db_to_json(row["content"]) for row in rows } + return await self.db_pool.runInteraction( + "get_global_account_data_for_user", get_global_account_data_for_user + ) + + @cached() + async def get_room_account_data_for_user( + self, user_id: str + ) -> Mapping[str, Mapping[str, JsonDict]]: + """ + Get all of the per-room client account_data for a user. + + If experimental MSC3391 support is enabled, any entries with an empty + content body are excluded; as this means they have been deleted. + + Args: + user_id: The user to get the account_data for. + + Returns: + A dict mapping from room_id string to per-room account_data dicts. + """ + + def get_room_account_data_for_user_txn( + txn: LoggingTransaction, + ) -> Dict[str, Dict[str, JsonDict]]: # The 'content != '{}' condition below prevents us from using # `simple_select_list_txn` here, as it doesn't support conditions # other than 'equals'. @@ -185,10 +210,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) room_data[row["account_data_type"]] = db_to_json(row["content"]) - return global_account_data, by_room + return by_room return await self.db_pool.runInteraction( - "get_account_data_for_user", get_account_data_for_user_txn + "get_room_account_data_for_user_txn", get_room_account_data_for_user_txn ) @cached(num_args=2, max_entries=5000, tree=True) @@ -342,36 +367,61 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) "get_updated_room_account_data", get_updated_room_account_data_txn ) - async def get_updated_account_data_for_user( + async def get_updated_global_account_data_for_user( self, user_id: str, stream_id: int - ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]: - """Get all the client account_data for a that's changed for a user + ) -> Dict[str, JsonDict]: + """Get all the global account_data that's changed for a user. Args: user_id: The user to get the account_data for. stream_id: The point in the stream since which to get updates + Returns: - A deferred pair of a dict of global account_data and a dict - mapping from room_id string to per room account_data dicts. + A dict of global account_data. """ - def get_updated_account_data_for_user_txn( + def get_updated_global_account_data_for_user( txn: LoggingTransaction, - ) -> Tuple[Dict[str, JsonDict], Dict[str, Dict[str, JsonDict]]]: - sql = ( - "SELECT account_data_type, content FROM account_data" - " WHERE user_id = ? AND stream_id > ?" - ) - + ) -> Dict[str, JsonDict]: + sql = """ + SELECT account_data_type, content FROM account_data + WHERE user_id = ? AND stream_id > ? + """ txn.execute(sql, (user_id, stream_id)) - global_account_data = {row[0]: db_to_json(row[1]) for row in txn} + return {row[0]: db_to_json(row[1]) for row in txn} - sql = ( - "SELECT room_id, account_data_type, content FROM room_account_data" - " WHERE user_id = ? AND stream_id > ?" - ) + changed = self._account_data_stream_cache.has_entity_changed( + user_id, int(stream_id) + ) + if not changed: + return {} + + return await self.db_pool.runInteraction( + "get_updated_global_account_data_for_user", + get_updated_global_account_data_for_user, + ) + + async def get_updated_room_account_data_for_user( + self, user_id: str, stream_id: int + ) -> Dict[str, Dict[str, JsonDict]]: + """Get all the room account_data that's changed for a user. + Args: + user_id: The user to get the account_data for. + stream_id: The point in the stream since which to get updates + + Returns: + A dict mapping from room_id string to per room account_data dicts. + """ + + def get_updated_room_account_data_for_user_txn( + txn: LoggingTransaction, + ) -> Dict[str, Dict[str, JsonDict]]: + sql = """ + SELECT room_id, account_data_type, content FROM room_account_data + WHERE user_id = ? AND stream_id > ? + """ txn.execute(sql, (user_id, stream_id)) account_data_by_room: Dict[str, Dict[str, JsonDict]] = {} @@ -379,16 +429,17 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) room_account_data = account_data_by_room.setdefault(row[0], {}) room_account_data[row[1]] = db_to_json(row[2]) - return global_account_data, account_data_by_room + return account_data_by_room changed = self._account_data_stream_cache.has_entity_changed( user_id, int(stream_id) ) if not changed: - return {}, {} + return {} return await self.db_pool.runInteraction( - "get_updated_account_data_for_user", get_updated_account_data_for_user_txn + "get_updated_room_account_data_for_user", + get_updated_room_account_data_for_user_txn, ) @cached(max_entries=5000, iterable=True) @@ -444,7 +495,8 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) self.get_global_account_data_by_type_for_user.invalidate( (row.user_id, row.data_type) ) - self.get_account_data_for_user.invalidate((row.user_id,)) + self.get_global_account_data_for_user.invalidate((row.user_id,)) + self.get_room_account_data_for_user.invalidate((row.user_id,)) self.get_account_data_for_room.invalidate((row.user_id, row.room_id)) self.get_account_data_for_room_and_type.invalidate( (row.user_id, row.room_id, row.data_type) @@ -492,7 +544,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) ) self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_account_data_for_user.invalidate((user_id,)) + self.get_room_account_data_for_user.invalidate((user_id,)) self.get_account_data_for_room.invalidate((user_id, room_id)) self.get_account_data_for_room_and_type.prefill( (user_id, room_id, account_data_type), content @@ -558,7 +610,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) return None self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_account_data_for_user.invalidate((user_id,)) + self.get_room_account_data_for_user.invalidate((user_id,)) self.get_account_data_for_room.invalidate((user_id, room_id)) self.get_account_data_for_room_and_type.prefill( (user_id, room_id, account_data_type), {} @@ -593,7 +645,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) ) self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_account_data_for_user.invalidate((user_id,)) + self.get_global_account_data_for_user.invalidate((user_id,)) self.get_global_account_data_by_type_for_user.invalidate( (user_id, account_data_type) ) @@ -761,7 +813,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) return None self._account_data_stream_cache.entity_has_changed(user_id, next_id) - self.get_account_data_for_user.invalidate((user_id,)) + self.get_global_account_data_for_user.invalidate((user_id,)) self.get_global_account_data_by_type_for_user.prefill( (user_id, account_data_type), {} ) @@ -822,7 +874,10 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) txn, self.get_account_data_for_room_and_type, (user_id,) ) self._invalidate_cache_and_stream( - txn, self.get_account_data_for_user, (user_id,) + txn, self.get_global_account_data_for_user, (user_id,) + ) + self._invalidate_cache_and_stream( + txn, self.get_room_account_data_for_user, (user_id,) ) self._invalidate_cache_and_stream( txn, self.get_global_account_data_by_type_for_user, (user_id,) -- cgit 1.5.1 From 14be78d492fc31e743e9e5855ddb8b4c9520985a Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 10 Feb 2023 12:37:07 -0500 Subject: Support for MSC3758: exact_event_match push condition (#14964) This specifies to search for an exact value match, instead of string globbing. It only works across non-compound JSON values (null, boolean, integer, and strings). --- changelog.d/14964.feature | 1 + rust/benches/evaluator.rs | 65 +++++++++++--- rust/src/push/evaluator.rs | 69 +++++++++++---- rust/src/push/mod.rs | 83 +++++++++++++++++ stubs/synapse/synapse_rust/push.pyi | 7 +- synapse/config/experimental.py | 5 ++ synapse/push/bulk_push_rule_evaluator.py | 18 ++-- synapse/types/__init__.py | 2 + tests/push/test_push_rule_evaluator.py | 147 ++++++++++++++++++++++++++++++- 9 files changed, 356 insertions(+), 41 deletions(-) create mode 100644 changelog.d/14964.feature (limited to 'synapse') diff --git a/changelog.d/14964.feature b/changelog.d/14964.feature new file mode 100644 index 0000000000..13c0bc193b --- /dev/null +++ b/changelog.d/14964.feature @@ -0,0 +1 @@ +Implement the experimental `exact_event_match` push rule condition from [MSC3758](https://github.com/matrix-org/matrix-spec-proposals/pull/3758). diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index 35f7a50bce..229553ebf8 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -16,6 +16,7 @@ use std::collections::BTreeSet; use synapse::push::{ evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules, + SimpleJsonValue, }; use test::Bencher; @@ -24,9 +25,18 @@ extern crate test; #[bench] fn bench_match_exact(b: &mut Bencher) { let flattened_keys = [ - ("type".to_string(), "m.text".to_string()), - ("room_id".to_string(), "!room:server".to_string()), - ("content.body".to_string(), "test message".to_string()), + ( + "type".to_string(), + SimpleJsonValue::Str("m.text".to_string()), + ), + ( + "room_id".to_string(), + SimpleJsonValue::Str("!room:server".to_string()), + ), + ( + "content.body".to_string(), + SimpleJsonValue::Str("test message".to_string()), + ), ] .into_iter() .collect(); @@ -43,6 +53,7 @@ fn bench_match_exact(b: &mut Bencher) { true, vec![], false, + false, ) .unwrap(); @@ -63,9 +74,18 @@ fn bench_match_exact(b: &mut Bencher) { #[bench] fn bench_match_word(b: &mut Bencher) { let flattened_keys = [ - ("type".to_string(), "m.text".to_string()), - ("room_id".to_string(), "!room:server".to_string()), - ("content.body".to_string(), "test message".to_string()), + ( + "type".to_string(), + SimpleJsonValue::Str("m.text".to_string()), + ), + ( + "room_id".to_string(), + SimpleJsonValue::Str("!room:server".to_string()), + ), + ( + "content.body".to_string(), + SimpleJsonValue::Str("test message".to_string()), + ), ] .into_iter() .collect(); @@ -82,6 +102,7 @@ fn bench_match_word(b: &mut Bencher) { true, vec![], false, + false, ) .unwrap(); @@ -102,9 +123,18 @@ fn bench_match_word(b: &mut Bencher) { #[bench] fn bench_match_word_miss(b: &mut Bencher) { let flattened_keys = [ - ("type".to_string(), "m.text".to_string()), - ("room_id".to_string(), "!room:server".to_string()), - ("content.body".to_string(), "test message".to_string()), + ( + "type".to_string(), + SimpleJsonValue::Str("m.text".to_string()), + ), + ( + "room_id".to_string(), + SimpleJsonValue::Str("!room:server".to_string()), + ), + ( + "content.body".to_string(), + SimpleJsonValue::Str("test message".to_string()), + ), ] .into_iter() .collect(); @@ -121,6 +151,7 @@ fn bench_match_word_miss(b: &mut Bencher) { true, vec![], false, + false, ) .unwrap(); @@ -141,9 +172,18 @@ fn bench_match_word_miss(b: &mut Bencher) { #[bench] fn bench_eval_message(b: &mut Bencher) { let flattened_keys = [ - ("type".to_string(), "m.text".to_string()), - ("room_id".to_string(), "!room:server".to_string()), - ("content.body".to_string(), "test message".to_string()), + ( + "type".to_string(), + SimpleJsonValue::Str("m.text".to_string()), + ), + ( + "room_id".to_string(), + SimpleJsonValue::Str("!room:server".to_string()), + ), + ( + "content.body".to_string(), + SimpleJsonValue::Str("test message".to_string()), + ), ] .into_iter() .collect(); @@ -160,6 +200,7 @@ fn bench_eval_message(b: &mut Bencher) { true, vec![], false, + false, ) .unwrap(); diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index ec7a8c4453..dd6b4343ec 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -22,8 +22,8 @@ use regex::Regex; use super::{ utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType}, - Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition, - RelatedEventMatchCondition, + Action, Condition, EventMatchCondition, ExactEventMatchCondition, FilteredPushRules, + KnownCondition, RelatedEventMatchCondition, SimpleJsonValue, }; lazy_static! { @@ -61,9 +61,9 @@ impl RoomVersionFeatures { /// Allows running a set of push rules against a particular event. #[pyclass] pub struct PushRuleEvaluator { - /// A mapping of "flattened" keys to string values in the event, e.g. + /// A mapping of "flattened" keys to simple JSON values in the event, e.g. /// includes things like "type" and "content.msgtype". - flattened_keys: BTreeMap, + flattened_keys: BTreeMap, /// The "content.body", if any. body: String, @@ -87,7 +87,7 @@ pub struct PushRuleEvaluator { /// The related events, indexed by relation type. Flattened in the same manner as /// `flattened_keys`. - related_events_flattened: BTreeMap>, + related_events_flattened: BTreeMap>, /// If msc3664, push rules for related events, is enabled. related_event_match_enabled: bool, @@ -98,6 +98,9 @@ pub struct PushRuleEvaluator { /// If MSC3931 (room version feature flags) is enabled. Usually controlled by the same /// flag as MSC1767 (extensible events core). msc3931_enabled: bool, + + /// If MSC3758 (exact_event_match push rule condition) is enabled. + msc3758_exact_event_match: bool, } #[pymethods] @@ -106,22 +109,23 @@ impl PushRuleEvaluator { #[allow(clippy::too_many_arguments)] #[new] pub fn py_new( - flattened_keys: BTreeMap, + flattened_keys: BTreeMap, has_mentions: bool, user_mentions: BTreeSet, room_mention: bool, room_member_count: u64, sender_power_level: Option, notification_power_levels: BTreeMap, - related_events_flattened: BTreeMap>, + related_events_flattened: BTreeMap>, related_event_match_enabled: bool, room_version_feature_flags: Vec, msc3931_enabled: bool, + msc3758_exact_event_match: bool, ) -> Result { - let body = flattened_keys - .get("content.body") - .cloned() - .unwrap_or_default(); + let body = match flattened_keys.get("content.body") { + Some(SimpleJsonValue::Str(s)) => s.clone(), + _ => String::new(), + }; Ok(PushRuleEvaluator { flattened_keys, @@ -136,6 +140,7 @@ impl PushRuleEvaluator { related_event_match_enabled, room_version_feature_flags, msc3931_enabled, + msc3758_exact_event_match, }) } @@ -252,6 +257,9 @@ impl PushRuleEvaluator { KnownCondition::EventMatch(event_match) => { self.match_event_match(event_match, user_id)? } + KnownCondition::ExactEventMatch(exact_event_match) => { + self.match_exact_event_match(exact_event_match)? + } KnownCondition::RelatedEventMatch(event_match) => { self.match_related_event_match(event_match, user_id)? } @@ -337,7 +345,9 @@ impl PushRuleEvaluator { return Ok(false); }; - let haystack = if let Some(haystack) = self.flattened_keys.get(&*event_match.key) { + let haystack = if let Some(SimpleJsonValue::Str(haystack)) = + self.flattened_keys.get(&*event_match.key) + { haystack } else { return Ok(false); @@ -355,6 +365,27 @@ impl PushRuleEvaluator { compiled_pattern.is_match(haystack) } + /// Evaluates a `exact_event_match` condition. (MSC3758) + fn match_exact_event_match( + &self, + exact_event_match: &ExactEventMatchCondition, + ) -> Result { + // First check if the feature is enabled. + if !self.msc3758_exact_event_match { + return Ok(false); + } + + let value = &exact_event_match.value; + + let haystack = if let Some(haystack) = self.flattened_keys.get(&*exact_event_match.key) { + haystack + } else { + return Ok(false); + }; + + Ok(haystack == &**value) + } + /// Evaluates a `related_event_match` condition. (MSC3664) fn match_related_event_match( &self, @@ -410,7 +441,7 @@ impl PushRuleEvaluator { return Ok(false); }; - let haystack = if let Some(haystack) = event.get(&**key) { + let haystack = if let Some(SimpleJsonValue::Str(haystack)) = event.get(&**key) { haystack } else { return Ok(false); @@ -455,7 +486,10 @@ impl PushRuleEvaluator { #[test] fn push_rule_evaluator() { let mut flattened_keys = BTreeMap::new(); - flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string()); + flattened_keys.insert( + "content.body".to_string(), + SimpleJsonValue::Str("foo bar bob hello".to_string()), + ); let evaluator = PushRuleEvaluator::py_new( flattened_keys, false, @@ -468,6 +502,7 @@ fn push_rule_evaluator() { true, vec![], true, + true, ) .unwrap(); @@ -482,7 +517,10 @@ fn test_requires_room_version_supports_condition() { use crate::push::{PushRule, PushRules}; let mut flattened_keys = BTreeMap::new(); - flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string()); + flattened_keys.insert( + "content.body".to_string(), + SimpleJsonValue::Str("foo bar bob hello".to_string()), + ); let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()]; let evaluator = PushRuleEvaluator::py_new( flattened_keys, @@ -496,6 +534,7 @@ fn test_requires_room_version_supports_condition() { false, flags, true, + true, ) .unwrap(); diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 3c4f876cab..79e519fe11 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -56,7 +56,9 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use anyhow::{Context, Error}; use log::warn; +use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; +use pyo3::types::{PyBool, PyLong, PyString}; use pythonize::{depythonize, pythonize}; use serde::de::Error as _; use serde::{Deserialize, Serialize}; @@ -248,6 +250,36 @@ impl<'de> Deserialize<'de> for Action { } } +/// A simple JSON values (string, int, boolean, or null). +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(untagged)] +pub enum SimpleJsonValue { + Str(String), + Int(i64), + Bool(bool), + Null, +} + +impl<'source> FromPyObject<'source> for SimpleJsonValue { + fn extract(ob: &'source PyAny) -> PyResult { + if let Ok(s) = ::try_from(ob) { + Ok(SimpleJsonValue::Str(s.to_string())) + // A bool *is* an int, ensure we try bool first. + } else if let Ok(b) = ::try_from(ob) { + Ok(SimpleJsonValue::Bool(b.extract()?)) + } else if let Ok(i) = ::try_from(ob) { + Ok(SimpleJsonValue::Int(i.extract()?)) + } else if ob.is_none() { + Ok(SimpleJsonValue::Null) + } else { + Err(PyTypeError::new_err(format!( + "Can't convert from {} to SimpleJsonValue", + ob.get_type().name()? + ))) + } + } +} + /// A condition used in push rules to match against an event. /// /// We need this split as `serde` doesn't give us the ability to have a @@ -267,6 +299,8 @@ pub enum Condition { #[serde(tag = "kind")] pub enum KnownCondition { EventMatch(EventMatchCondition), + #[serde(rename = "com.beeper.msc3758.exact_event_match")] + ExactEventMatch(ExactEventMatchCondition), #[serde(rename = "im.nheko.msc3664.related_event_match")] RelatedEventMatch(RelatedEventMatchCondition), #[serde(rename = "org.matrix.msc3952.is_user_mention")] @@ -309,6 +343,13 @@ pub struct EventMatchCondition { pub pattern_type: Option>, } +/// The body of a [`Condition::ExactEventMatch`] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct ExactEventMatchCondition { + pub key: Cow<'static, str>, + pub value: Cow<'static, SimpleJsonValue>, +} + /// The body of a [`Condition::RelatedEventMatch`] #[derive(Serialize, Deserialize, Debug, Clone)] pub struct RelatedEventMatchCondition { @@ -542,6 +583,48 @@ fn test_deserialize_unstable_msc3931_condition() { )); } +#[test] +fn test_deserialize_unstable_msc3758_condition() { + // A string condition should work. + let json = + r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":"foo"}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::ExactEventMatch(_)) + )); + + // A boolean condition should work. + let json = + r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":true}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::ExactEventMatch(_)) + )); + + // An integer condition should work. + let json = r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":1}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::ExactEventMatch(_)) + )); + + // A null condition should work + let json = + r#"{"kind":"com.beeper.msc3758.exact_event_match","key":"content.value","value":null}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!( + condition, + Condition::Known(KnownCondition::ExactEventMatch(_)) + )); +} + #[test] fn test_deserialize_unstable_msc3952_user_condition() { let json = r#"{"kind":"org.matrix.msc3952.is_user_mention"}"#; diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index 754acab2f9..328f681a29 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -14,7 +14,7 @@ from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union -from synapse.types import JsonDict +from synapse.types import JsonDict, SimpleJsonValue class PushRule: @property @@ -56,17 +56,18 @@ def get_base_rule_ids() -> Collection[str]: ... class PushRuleEvaluator: def __init__( self, - flattened_keys: Mapping[str, str], + flattened_keys: Mapping[str, SimpleJsonValue], has_mentions: bool, user_mentions: Set[str], room_mention: bool, room_member_count: int, sender_power_level: Optional[int], notification_power_levels: Mapping[str, int], - related_events_flattened: Mapping[str, Mapping[str, str]], + related_events_flattened: Mapping[str, Mapping[str, SimpleJsonValue]], related_event_match_enabled: bool, room_version_feature_flags: Tuple[str, ...], msc3931_enabled: bool, + msc3758_exact_event_match: bool, ): ... def run( self, diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 5e3a889081..6ac2f0c10d 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -169,6 +169,11 @@ class ExperimentalConfig(Config): # MSC3925: do not replace events with their edits self.msc3925_inhibit_edit = experimental.get("msc3925_inhibit_edit", False) + # MSC3758: exact_event_match push rule condition + self.msc3758_exact_event_match = experimental.get( + "msc3758_exact_event_match", False + ) + # MSC3873: Disambiguate event_match keys. self.msc3783_escape_event_match_key = experimental.get( "msc3783_escape_event_match_key", False diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 39d2f88f03..8568aca528 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -43,6 +43,7 @@ from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator +from synapse.types import SimpleJsonValue from synapse.types.state import StateFilter from synapse.util.caches import register_cache from synapse.util.metrics import measure_func @@ -256,13 +257,15 @@ class BulkPushRuleEvaluator: return pl_event.content if pl_event else {}, sender_level - async def _related_events(self, event: EventBase) -> Dict[str, Dict[str, str]]: + async def _related_events( + self, event: EventBase + ) -> Dict[str, Dict[str, SimpleJsonValue]]: """Fetches the related events for 'event'. Sets the im.vector.is_falling_back key if the event is from a fallback relation Returns: Mapping of relation type to flattened events. """ - related_events: Dict[str, Dict[str, str]] = {} + related_events: Dict[str, Dict[str, SimpleJsonValue]] = {} if self._related_event_match_enabled: related_event_id = event.content.get("m.relates_to", {}).get("event_id") relation_type = event.content.get("m.relates_to", {}).get("rel_type") @@ -425,6 +428,7 @@ class BulkPushRuleEvaluator: self._related_event_match_enabled, event.room_version.msc3931_push_features, self.hs.config.experimental.msc1767_enabled, # MSC3931 flag + self.hs.config.experimental.msc3758_exact_event_match, ) users = rules_by_user.keys() @@ -501,15 +505,15 @@ StateGroup = Union[object, int] def _flatten_dict( d: Union[EventBase, Mapping[str, Any]], prefix: Optional[List[str]] = None, - result: Optional[Dict[str, str]] = None, + result: Optional[Dict[str, SimpleJsonValue]] = None, *, msc3783_escape_event_match_key: bool = False, -) -> Dict[str, str]: +) -> Dict[str, SimpleJsonValue]: """ Given a JSON dictionary (or event) which might contain sub dictionaries, flatten it into a single layer dictionary by combining the keys & sub-keys. - Any (non-dictionary), non-string value is dropped. + String, integer, boolean, and null values are kept. All others are dropped. Transforms: @@ -538,8 +542,8 @@ def _flatten_dict( # nested fields. key = key.replace("\\", "\\\\").replace(".", "\\.") - if isinstance(value, str): - result[".".join(prefix + [key])] = value.lower() + if isinstance(value, (bool, str)) or type(value) is int or value is None: + result[".".join(prefix + [key])] = value elif isinstance(value, Mapping): # do not set `room_version` due to recursion considerations below _flatten_dict( diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index f82d1cfc29..52e366c8ae 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -69,6 +69,8 @@ StateMap = Mapping[StateKey, T] MutableStateMap = MutableMapping[StateKey, T] # JSON types. These could be made stronger, but will do for now. +# A "simple" (canonical) JSON value. +SimpleJsonValue = Optional[Union[str, int, bool]] # A JSON-serialisable dict. JsonDict = Dict[str, Any] # A JSON-serialisable mapping; roughly speaking an immutable JSONDict. diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 516b65cc3c..6603447341 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -57,7 +57,7 @@ class FlattenDictTestCase(unittest.TestCase): ) def test_non_string(self) -> None: - """Non-string items are dropped.""" + """Booleans, ints, and nulls should be kept while other items are dropped.""" input: Dict[str, Any] = { "woo": "woo", "foo": True, @@ -66,7 +66,9 @@ class FlattenDictTestCase(unittest.TestCase): "fuzz": [], "boo": {}, } - self.assertEqual({"woo": "woo"}, _flatten_dict(input)) + self.assertEqual( + {"woo": "woo", "foo": True, "bar": 1, "baz": None}, _flatten_dict(input) + ) def test_event(self) -> None: """Events can also be flattened.""" @@ -86,9 +88,9 @@ class FlattenDictTestCase(unittest.TestCase): ) expected = { "content.msgtype": "m.text", - "content.body": "hello world!", + "content.body": "Hello world!", "content.format": "org.matrix.custom.html", - "content.formatted_body": "

hello world!

", + "content.formatted_body": "

Hello world!

", "room_id": "!test:test", "sender": "@alice:test", "type": "m.room.message", @@ -166,6 +168,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): related_event_match_enabled=True, room_version_feature_flags=event.room_version.msc3931_push_features, msc3931_enabled=True, + msc3758_exact_event_match=True, ) def test_display_name(self) -> None: @@ -410,6 +413,142 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "pattern should not match before a newline", ) + def test_exact_event_match_string(self) -> None: + """Check that exact_event_match conditions work as expected for strings.""" + + # Test against a string value. + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": "foobaz", + } + self._assert_matches( + condition, + {"value": "foobaz"}, + "exact value should match", + ) + self._assert_not_matches( + condition, + {"value": "FoobaZ"}, + "values should match and be case-sensitive", + ) + self._assert_not_matches( + condition, + {"value": "test foobaz test"}, + "values must exactly match", + ) + value: Any + for value in (True, False, 1, 1.1, None, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + + # it should work on frozendicts too + self._assert_matches( + condition, + frozendict.frozendict({"value": "foobaz"}), + "values should match on frozendicts", + ) + + def test_exact_event_match_boolean(self) -> None: + """Check that exact_event_match conditions work as expected for booleans.""" + + # Test against a True boolean value. + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": True, + } + self._assert_matches( + condition, + {"value": True}, + "exact value should match", + ) + self._assert_not_matches( + condition, + {"value": False}, + "incorrect values should not match", + ) + for value in ("foobaz", 1, 1.1, None, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + + # Test against a False boolean value. + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": False, + } + self._assert_matches( + condition, + {"value": False}, + "exact value should match", + ) + self._assert_not_matches( + condition, + {"value": True}, + "incorrect values should not match", + ) + # Choose false-y values to ensure there's no type coercion. + for value in ("", 0, 1.1, None, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + + def test_exact_event_match_null(self) -> None: + """Check that exact_event_match conditions work as expected for null.""" + + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": None, + } + self._assert_matches( + condition, + {"value": None}, + "exact value should match", + ) + for value in ("foobaz", True, False, 1, 1.1, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + + def test_exact_event_match_integer(self) -> None: + """Check that exact_event_match conditions work as expected for integers.""" + + condition = { + "kind": "com.beeper.msc3758.exact_event_match", + "key": "content.value", + "value": 1, + } + self._assert_matches( + condition, + {"value": 1}, + "exact value should match", + ) + value: Any + for value in (1.1, -1, 0): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect values should not match", + ) + for value in ("1", True, False, None, [], {}): + self._assert_not_matches( + condition, + {"value": value}, + "incorrect types should not match", + ) + def test_no_body(self) -> None: """Not having a body shouldn't break the evaluator.""" evaluator = self._get_evaluator({}) -- cgit 1.5.1 From d0c713cc85f094c323b2ba3f02d8ac411a7f0705 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 10 Feb 2023 23:29:00 +0000 Subject: Return read-only collections from `@cached` methods (#13755) It's important that collections returned from `@cached` methods are not modified, otherwise future retrievals from the cache will return the modified collection. This applies to the return values from `@cached` methods and the values inside the dictionaries returned by `@cachedList` methods. It's not necessary for the dictionaries returned by `@cachedList` methods themselves to be read-only. Signed-off-by: Sean Quah Co-authored-by: David Robertson --- changelog.d/13755.misc | 1 + synapse/app/phone_stats_home.py | 4 ++-- synapse/config/room_directory.py | 6 +++--- synapse/events/builder.py | 6 +++--- synapse/federation/federation_server.py | 3 ++- synapse/handlers/directory.py | 6 +++--- synapse/handlers/receipts.py | 4 ++-- synapse/handlers/room.py | 2 +- synapse/handlers/sync.py | 4 ++-- synapse/push/bulk_push_rule_evaluator.py | 4 ++-- synapse/state/__init__.py | 2 +- synapse/storage/controllers/state.py | 6 +++--- synapse/storage/databases/main/account_data.py | 2 +- synapse/storage/databases/main/appservice.py | 2 +- synapse/storage/databases/main/devices.py | 17 +++++++++------ synapse/storage/databases/main/directory.py | 4 ++-- synapse/storage/databases/main/end_to_end_keys.py | 25 +++++++++++++--------- synapse/storage/databases/main/event_federation.py | 11 ++++++---- .../storage/databases/main/monthly_active_users.py | 4 ++-- synapse/storage/databases/main/receipts.py | 10 +++++---- synapse/storage/databases/main/registration.py | 4 ++-- synapse/storage/databases/main/relations.py | 7 ++++-- synapse/storage/databases/main/roommember.py | 19 ++++++++-------- synapse/storage/databases/main/signatures.py | 6 +++--- synapse/storage/databases/main/tags.py | 8 ++++--- synapse/storage/databases/main/user_directory.py | 4 ++-- tests/rest/admin/test_server_notice.py | 4 ++-- 27 files changed, 98 insertions(+), 77 deletions(-) create mode 100644 changelog.d/13755.misc (limited to 'synapse') diff --git a/changelog.d/13755.misc b/changelog.d/13755.misc new file mode 100644 index 0000000000..662ee00e99 --- /dev/null +++ b/changelog.d/13755.misc @@ -0,0 +1 @@ +Re-type hint some collections as read-only. diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 53db1e85b3..897dd3edac 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -15,7 +15,7 @@ import logging import math import resource import sys -from typing import TYPE_CHECKING, List, Sized, Tuple +from typing import TYPE_CHECKING, List, Mapping, Sized, Tuple from prometheus_client import Gauge @@ -194,7 +194,7 @@ def start_phone_stats_home(hs: "HomeServer") -> None: @wrap_as_background_process("generate_monthly_active_users") async def generate_monthly_active_users() -> None: current_mau_count = 0 - current_mau_count_by_service = {} + current_mau_count_by_service: Mapping[str, int] = {} reserved_users: Sized = () store = hs.get_datastores().main if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only: diff --git a/synapse/config/room_directory.py b/synapse/config/room_directory.py index 3ed236217f..8666c22f01 100644 --- a/synapse/config/room_directory.py +++ b/synapse/config/room_directory.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, List +from typing import Any, Collection from matrix_common.regex import glob_to_regex @@ -70,7 +70,7 @@ class RoomDirectoryConfig(Config): return False def is_publishing_room_allowed( - self, user_id: str, room_id: str, aliases: List[str] + self, user_id: str, room_id: str, aliases: Collection[str] ) -> bool: """Checks if the given user is allowed to publish the room @@ -122,7 +122,7 @@ class _RoomDirectoryRule: except Exception as e: raise ConfigError("Failed to parse glob into regex") from e - def matches(self, user_id: str, room_id: str, aliases: List[str]) -> bool: + def matches(self, user_id: str, room_id: str, aliases: Collection[str]) -> bool: """Tests if this rule matches the given user_id, room_id and aliases. Args: diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 94dd1298e1..c82745275f 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional, Tuple, Union import attr from signedjson.types import SigningKey @@ -103,7 +103,7 @@ class EventBuilder: async def build( self, - prev_event_ids: List[str], + prev_event_ids: Collection[str], auth_event_ids: Optional[List[str]], depth: Optional[int] = None, ) -> EventBase: @@ -136,7 +136,7 @@ class EventBuilder: format_version = self.room_version.event_format # The types of auth/prev events changes between event versions. - prev_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] + prev_events: Union[Collection[str], List[Tuple[str, Dict[str, str]]]] auth_events: Union[List[str], List[Tuple[str, Dict[str, str]]]] if format_version == EventFormatVersions.ROOM_V1_V2: auth_events = await self._store.add_event_hashes(auth_event_ids) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 8d36172484..6addc0bb65 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -23,6 +23,7 @@ from typing import ( Collection, Dict, List, + Mapping, Optional, Tuple, Union, @@ -1512,7 +1513,7 @@ class FederationHandlerRegistry: def _get_event_ids_for_partial_state_join( join_event: EventBase, prev_state_ids: StateMap[str], - summary: Dict[str, MemberSummary], + summary: Mapping[str, MemberSummary], ) -> Collection[str]: """Calculate state to be returned in a partial_state send_join diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index d31b0fbb17..a5798e9483 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -14,7 +14,7 @@ import logging import string -from typing import TYPE_CHECKING, Iterable, List, Optional +from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence from typing_extensions import Literal @@ -486,7 +486,7 @@ class DirectoryHandler: ) if canonical_alias: # Ensure we do not mutate room_aliases. - room_aliases = room_aliases + [canonical_alias] + room_aliases = list(room_aliases) + [canonical_alias] if not self.config.roomdirectory.is_publishing_room_allowed( user_id, room_id, room_aliases @@ -529,7 +529,7 @@ class DirectoryHandler: async def get_aliases_for_room( self, requester: Requester, room_id: str - ) -> List[str]: + ) -> Sequence[str]: """ Get a list of the aliases that currently point to this room on this server """ diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 04c61ae3dd..2bacdebfb5 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Sequence, Tuple from synapse.api.constants import EduTypes, ReceiptTypes from synapse.appservice import ApplicationService @@ -189,7 +189,7 @@ class ReceiptEventSource(EventSource[int, JsonDict]): @staticmethod def filter_out_private_receipts( - rooms: List[JsonDict], user_id: str + rooms: Sequence[JsonDict], user_id: str ) -> List[JsonDict]: """ Filters a list of serialized receipts (as returned by /sync and /initialSync) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 0e759b8a5d..060bbcb181 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1928,6 +1928,6 @@ class RoomShutdownHandler: return { "kicked_users": kicked_users, "failed_to_kick_users": failed_to_kick_users, - "local_aliases": aliases_for_room, + "local_aliases": list(aliases_for_room), "new_room_id": new_room_id, } diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 399685e5b7..4bae46158a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1519,7 +1519,7 @@ class SyncHandler: one_time_keys_count = await self.store.count_e2e_one_time_keys( user_id, device_id ) - unused_fallback_key_types = ( + unused_fallback_key_types = list( await self.store.get_e2e_unused_fallback_key_types(user_id, device_id) ) @@ -2301,7 +2301,7 @@ class SyncHandler: sync_result_builder: "SyncResultBuilder", room_builder: "RoomSyncResultBuilder", ephemeral: List[JsonDict], - tags: Optional[Dict[str, Dict[str, Any]]], + tags: Optional[Mapping[str, Mapping[str, Any]]], account_data: Mapping[str, JsonDict], always_include: bool = False, ) -> None: diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 8568aca528..f6a5bffb0f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -22,6 +22,7 @@ from typing import ( List, Mapping, Optional, + Sequence, Set, Tuple, Union, @@ -149,7 +150,7 @@ class BulkPushRuleEvaluator: # little, we can skip fetching a huge number of push rules in large rooms. # This helps make joins and leaves faster. if event.type == EventTypes.Member: - local_users = [] + local_users: Sequence[str] = [] # We never notify a user about their own actions. This is enforced in # `_action_for_event_by_user` in the loop over `rules_by_user`, but we # do the same check here to avoid unnecessary DB queries. @@ -184,7 +185,6 @@ class BulkPushRuleEvaluator: if event.type == EventTypes.Member and event.membership == Membership.INVITE: invited = event.state_key if invited and self.hs.is_mine_id(invited) and invited not in local_users: - local_users = list(local_users) local_users.append(invited) if not local_users: diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index e877e6f1a1..4dc25df67e 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -226,7 +226,7 @@ class StateHandler: return await ret.get_state(self._state_storage_controller, state_filter) async def get_current_user_ids_in_room( - self, room_id: str, latest_event_ids: List[str] + self, room_id: str, latest_event_ids: Collection[str] ) -> Set[str]: """ Get the users IDs who are currently in a room. diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 52efd4a171..9d7a8a792f 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -14,6 +14,7 @@ import logging from typing import ( TYPE_CHECKING, + AbstractSet, Any, Awaitable, Callable, @@ -23,7 +24,6 @@ from typing import ( List, Mapping, Optional, - Set, Tuple, ) @@ -527,7 +527,7 @@ class StateStorageController: ) return state_map.get(key) - async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + async def get_current_hosts_in_room(self, room_id: str) -> AbstractSet[str]: """Get current hosts in room based on current state. Blocks until we have full state for the given room. This only happens for rooms @@ -584,7 +584,7 @@ class StateStorageController: async def get_users_in_room_with_profiles( self, room_id: str - ) -> Dict[str, ProfileInfo]: + ) -> Mapping[str, ProfileInfo]: """ Get the current users in the room with their profiles. If the room is currently partial-stated, this will block until the room has diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 2d6f02c14f..95567826f2 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -240,7 +240,7 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) @cached(num_args=2, tree=True) async def get_account_data_for_room( self, user_id: str, room_id: str - ) -> Dict[str, JsonDict]: + ) -> Mapping[str, JsonDict]: """Get all the client account_data for a user for a room. Args: diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 5fb152c4ff..484db175d0 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -166,7 +166,7 @@ class ApplicationServiceWorkerStore(RoomMemberWorkerStore): room_id: str, app_service: "ApplicationService", cache_context: _CacheContext, - ) -> List[str]: + ) -> Sequence[str]: """ Get all users in a room that the appservice controls. diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 85c1778a81..1ca66d57d4 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -21,6 +21,7 @@ from typing import ( Dict, Iterable, List, + Mapping, Optional, Set, Tuple, @@ -202,7 +203,9 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): def get_device_stream_token(self) -> int: return self._device_list_id_gen.get_current_token() - async def count_devices_by_users(self, user_ids: Optional[List[str]] = None) -> int: + async def count_devices_by_users( + self, user_ids: Optional[Collection[str]] = None + ) -> int: """Retrieve number of all devices of given users. Only returns number of devices that are not marked as hidden. @@ -213,7 +216,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): """ def count_devices_by_users_txn( - txn: LoggingTransaction, user_ids: List[str] + txn: LoggingTransaction, user_ids: Collection[str] ) -> int: sql = """ SELECT count(*) @@ -747,7 +750,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): @cancellable async def get_user_devices_from_cache( self, user_ids: Set[str], user_and_device_ids: List[Tuple[str, str]] - ) -> Tuple[Set[str], Dict[str, Dict[str, JsonDict]]]: + ) -> Tuple[Set[str], Dict[str, Mapping[str, JsonDict]]]: """Get the devices (and keys if any) for remote users from the cache. Args: @@ -775,16 +778,18 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): user_ids_not_in_cache = unique_user_ids - user_ids_in_cache # First fetch all the users which all devices are to be returned. - results: Dict[str, Dict[str, JsonDict]] = {} + results: Dict[str, Mapping[str, JsonDict]] = {} for user_id in user_ids: if user_id in user_ids_in_cache: results[user_id] = await self.get_cached_devices_for_user(user_id) # Then fetch all device-specific requests, but skip users we've already # fetched all devices for. + device_specific_results: Dict[str, Dict[str, JsonDict]] = {} for user_id, device_id in user_and_device_ids: if user_id in user_ids_in_cache and user_id not in user_ids: device = await self._get_cached_user_device(user_id, device_id) - results.setdefault(user_id, {})[device_id] = device + device_specific_results.setdefault(user_id, {})[device_id] = device + results.update(device_specific_results) set_tag("in_cache", str(results)) set_tag("not_in_cache", str(user_ids_not_in_cache)) @@ -802,7 +807,7 @@ class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): return db_to_json(content) @cached() - async def get_cached_devices_for_user(self, user_id: str) -> Dict[str, JsonDict]: + async def get_cached_devices_for_user(self, user_id: str) -> Mapping[str, JsonDict]: devices = await self.db_pool.simple_select_list( table="device_lists_remote_cache", keyvalues={"user_id": user_id}, diff --git a/synapse/storage/databases/main/directory.py b/synapse/storage/databases/main/directory.py index 5903fdaf00..44aa181174 100644 --- a/synapse/storage/databases/main/directory.py +++ b/synapse/storage/databases/main/directory.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Iterable, List, Optional, Tuple +from typing import Iterable, List, Optional, Sequence, Tuple import attr @@ -74,7 +74,7 @@ class DirectoryWorkerStore(CacheInvalidationWorkerStore): ) @cached(max_entries=5000) - async def get_aliases_for_room(self, room_id: str) -> List[str]: + async def get_aliases_for_room(self, room_id: str) -> Sequence[str]: return await self.db_pool.simple_select_onecol( "room_aliases", {"room_id": room_id}, diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index c4ac6c33ba..752dc16e17 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -20,7 +20,9 @@ from typing import ( Dict, Iterable, List, + Mapping, Optional, + Sequence, Tuple, Union, cast, @@ -691,7 +693,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker @cached(max_entries=10000) async def get_e2e_unused_fallback_key_types( self, user_id: str, device_id: str - ) -> List[str]: + ) -> Sequence[str]: """Returns the fallback key types that have an unused key. Args: @@ -731,7 +733,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker return user_keys.get(key_type) @cached(num_args=1) - def _get_bare_e2e_cross_signing_keys(self, user_id: str) -> Dict[str, JsonDict]: + def _get_bare_e2e_cross_signing_keys(self, user_id: str) -> Mapping[str, JsonDict]: """Dummy function. Only used to make a cache for _get_bare_e2e_cross_signing_keys_bulk. """ @@ -744,7 +746,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker ) async def _get_bare_e2e_cross_signing_keys_bulk( self, user_ids: Iterable[str] - ) -> Dict[str, Optional[Dict[str, JsonDict]]]: + ) -> Dict[str, Optional[Mapping[str, JsonDict]]]: """Returns the cross-signing keys for a set of users. The output of this function should be passed to _get_e2e_cross_signing_signatures_txn if the signatures for the calling user need to be fetched. @@ -765,7 +767,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker ) # The `Optional` comes from the `@cachedList` decorator. - return cast(Dict[str, Optional[Dict[str, JsonDict]]], result) + return cast(Dict[str, Optional[Mapping[str, JsonDict]]], result) def _get_bare_e2e_cross_signing_keys_bulk_txn( self, @@ -924,7 +926,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker @cancellable async def get_e2e_cross_signing_keys_bulk( self, user_ids: List[str], from_user_id: Optional[str] = None - ) -> Dict[str, Optional[Dict[str, JsonDict]]]: + ) -> Dict[str, Optional[Mapping[str, JsonDict]]]: """Returns the cross-signing keys for a set of users. Args: @@ -940,11 +942,14 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker result = await self._get_bare_e2e_cross_signing_keys_bulk(user_ids) if from_user_id: - result = await self.db_pool.runInteraction( - "get_e2e_cross_signing_signatures", - self._get_e2e_cross_signing_signatures_txn, - result, - from_user_id, + result = cast( + Dict[str, Optional[Mapping[str, JsonDict]]], + await self.db_pool.runInteraction( + "get_e2e_cross_signing_signatures", + self._get_e2e_cross_signing_signatures_txn, + result, + from_user_id, + ), ) return result diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index bbee02ab18..ca780cca36 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -22,6 +22,7 @@ from typing import ( Iterable, List, Optional, + Sequence, Set, Tuple, cast, @@ -1004,7 +1005,9 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas room_id, ) - async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[Optional[str], int]: + async def get_max_depth_of( + self, event_ids: Collection[str] + ) -> Tuple[Optional[str], int]: """Returns the event ID and depth for the event that has the max depth from a set of event IDs Args: @@ -1141,7 +1144,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) @cached(max_entries=5000, iterable=True) - async def get_latest_event_ids_in_room(self, room_id: str) -> List[str]: + async def get_latest_event_ids_in_room(self, room_id: str) -> Sequence[str]: return await self.db_pool.simple_select_onecol( table="event_forward_extremities", keyvalues={"room_id": room_id}, @@ -1171,7 +1174,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas @cancellable async def get_forward_extremities_for_room_at_stream_ordering( self, room_id: str, stream_ordering: int - ) -> List[str]: + ) -> Sequence[str]: """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". @@ -1204,7 +1207,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas @cached(max_entries=5000, num_args=2) async def _get_forward_extremeties_for_room( self, room_id: str, stream_ordering: int - ) -> List[str]: + ) -> Sequence[str]: """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". diff --git a/synapse/storage/databases/main/monthly_active_users.py b/synapse/storage/databases/main/monthly_active_users.py index db9a24db5e..4b1061e6d7 100644 --- a/synapse/storage/databases/main/monthly_active_users.py +++ b/synapse/storage/databases/main/monthly_active_users.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, cast +from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, cast from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage.database import ( @@ -95,7 +95,7 @@ class MonthlyActiveUsersWorkerStore(RegistrationWorkerStore): return await self.db_pool.runInteraction("count_users", _count_users) @cached(num_args=0) - async def get_monthly_active_count_by_service(self) -> Dict[str, int]: + async def get_monthly_active_count_by_service(self) -> Mapping[str, int]: """Generates current count of monthly active users broken down by service. A service is typically an appservice but also includes native matrix users. Since the `monthly_active_users` table is populated from the `user_ips` table diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 29972d5204..dddf49c2d5 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -21,7 +21,9 @@ from typing import ( Dict, Iterable, List, + Mapping, Optional, + Sequence, Tuple, cast, ) @@ -288,7 +290,7 @@ class ReceiptsWorkerStore(SQLBaseStore): async def get_linearized_receipts_for_room( self, room_id: str, to_key: int, from_key: Optional[int] = None - ) -> List[dict]: + ) -> Sequence[JsonDict]: """Get receipts for a single room for sending to clients. Args: @@ -311,7 +313,7 @@ class ReceiptsWorkerStore(SQLBaseStore): @cached(tree=True) async def _get_linearized_receipts_for_room( self, room_id: str, to_key: int, from_key: Optional[int] = None - ) -> List[JsonDict]: + ) -> Sequence[JsonDict]: """See get_linearized_receipts_for_room""" def f(txn: LoggingTransaction) -> List[Dict[str, Any]]: @@ -354,7 +356,7 @@ class ReceiptsWorkerStore(SQLBaseStore): ) async def _get_linearized_receipts_for_rooms( self, room_ids: Collection[str], to_key: int, from_key: Optional[int] = None - ) -> Dict[str, List[JsonDict]]: + ) -> Dict[str, Sequence[JsonDict]]: if not room_ids: return {} @@ -416,7 +418,7 @@ class ReceiptsWorkerStore(SQLBaseStore): ) async def get_linearized_receipts_for_all_rooms( self, to_key: int, from_key: Optional[int] = None - ) -> Dict[str, JsonDict]: + ) -> Mapping[str, JsonDict]: """Get receipts for all rooms between two stream_ids, up to a limit of the latest 100 read receipts. diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index 31f0f2bd3d..9a55e17624 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -16,7 +16,7 @@ import logging import random import re -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast import attr @@ -192,7 +192,7 @@ class RegistrationWorkerStore(CacheInvalidationWorkerStore): ) @cached() - async def get_user_by_id(self, user_id: str) -> Optional[Dict[str, Any]]: + async def get_user_by_id(self, user_id: str) -> Optional[Mapping[str, Any]]: """Deprecated: use get_userinfo_by_id instead""" def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]: diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 0018d6f7ab..fa3266c081 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -22,6 +22,7 @@ from typing import ( List, Mapping, Optional, + Sequence, Set, Tuple, Union, @@ -171,7 +172,7 @@ class RelationsWorkerStore(SQLBaseStore): direction: Direction = Direction.BACKWARDS, from_token: Optional[StreamToken] = None, to_token: Optional[StreamToken] = None, - ) -> Tuple[List[_RelatedEvent], Optional[StreamToken]]: + ) -> Tuple[Sequence[_RelatedEvent], Optional[StreamToken]]: """Get a list of relations for an event, ordered by topological ordering. Args: @@ -397,7 +398,9 @@ class RelationsWorkerStore(SQLBaseStore): return result is not None @cached() - async def get_aggregation_groups_for_event(self, event_id: str) -> List[JsonDict]: + async def get_aggregation_groups_for_event( + self, event_id: str + ) -> Sequence[JsonDict]: raise NotImplementedError() @cachedList( diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index ea6a5e2f34..694a5b802c 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -24,6 +24,7 @@ from typing import ( List, Mapping, Optional, + Sequence, Set, Tuple, Union, @@ -153,7 +154,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return self._known_servers_count @cached(max_entries=100000, iterable=True) - async def get_users_in_room(self, room_id: str) -> List[str]: + async def get_users_in_room(self, room_id: str) -> Sequence[str]: """Returns a list of users in the room. Will return inaccurate results for rooms with partial state, since the state for @@ -190,9 +191,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) @cached() - def get_user_in_room_with_profile( - self, room_id: str, user_id: str - ) -> Dict[str, ProfileInfo]: + def get_user_in_room_with_profile(self, room_id: str, user_id: str) -> ProfileInfo: raise NotImplementedError() @cachedList( @@ -246,7 +245,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached(max_entries=100000, iterable=True) async def get_users_in_room_with_profiles( self, room_id: str - ) -> Dict[str, ProfileInfo]: + ) -> Mapping[str, ProfileInfo]: """Get a mapping from user ID to profile information for all users in a given room. The profile information comes directly from this room's `m.room.member` @@ -285,7 +284,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) @cached(max_entries=100000) - async def get_room_summary(self, room_id: str) -> Dict[str, MemberSummary]: + async def get_room_summary(self, room_id: str) -> Mapping[str, MemberSummary]: """Get the details of a room roughly suitable for use by the room summary extension to /sync. Useful when lazy loading room members. Args: @@ -357,7 +356,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): @cached() async def get_invited_rooms_for_local_user( self, user_id: str - ) -> List[RoomsForUser]: + ) -> Sequence[RoomsForUser]: """Get all the rooms the *local* user is invited to. Args: @@ -475,7 +474,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results @cached(iterable=True) - async def get_local_users_in_room(self, room_id: str) -> List[str]: + async def get_local_users_in_room(self, room_id: str) -> Sequence[str]: """ Retrieves a list of the current roommembers who are local to the server. """ @@ -791,7 +790,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): """Returns the set of users who share a room with `user_id`""" room_ids = await self.get_rooms_for_user(user_id) - user_who_share_room = set() + user_who_share_room: Set[str] = set() for room_id in room_ids: user_ids = await self.get_users_in_room(room_id) user_who_share_room.update(user_ids) @@ -953,7 +952,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return True @cached(iterable=True, max_entries=10000) - async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + async def get_current_hosts_in_room(self, room_id: str) -> AbstractSet[str]: """Get current hosts in room based on current state.""" # First we check if we already have `get_users_in_room` in the cache, as diff --git a/synapse/storage/databases/main/signatures.py b/synapse/storage/databases/main/signatures.py index 05da15074a..5dcb1fc0b5 100644 --- a/synapse/storage/databases/main/signatures.py +++ b/synapse/storage/databases/main/signatures.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Collection, Dict, List, Tuple +from typing import Collection, Dict, List, Mapping, Tuple from unpaddedbase64 import encode_base64 @@ -26,7 +26,7 @@ from synapse.util.caches.descriptors import cached, cachedList class SignatureWorkerStore(EventsWorkerStore): @cached() - def get_event_reference_hash(self, event_id: str) -> Dict[str, Dict[str, bytes]]: + def get_event_reference_hash(self, event_id: str) -> Mapping[str, bytes]: # This is a dummy function to allow get_event_reference_hashes # to use its cache raise NotImplementedError() @@ -36,7 +36,7 @@ class SignatureWorkerStore(EventsWorkerStore): ) async def get_event_reference_hashes( self, event_ids: Collection[str] - ) -> Dict[str, Dict[str, bytes]]: + ) -> Mapping[str, Mapping[str, bytes]]: """Get all hashes for given events. Args: diff --git a/synapse/storage/databases/main/tags.py b/synapse/storage/databases/main/tags.py index d5500cdd47..c149a9eacb 100644 --- a/synapse/storage/databases/main/tags.py +++ b/synapse/storage/databases/main/tags.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import Any, Dict, Iterable, List, Tuple, cast +from typing import Any, Dict, Iterable, List, Mapping, Tuple, cast from synapse.api.constants import AccountDataTypes from synapse.replication.tcp.streams import AccountDataStream @@ -32,7 +32,9 @@ logger = logging.getLogger(__name__) class TagsWorkerStore(AccountDataWorkerStore): @cached() - async def get_tags_for_user(self, user_id: str) -> Dict[str, Dict[str, JsonDict]]: + async def get_tags_for_user( + self, user_id: str + ) -> Mapping[str, Mapping[str, JsonDict]]: """Get all the tags for a user. @@ -107,7 +109,7 @@ class TagsWorkerStore(AccountDataWorkerStore): async def get_updated_tags( self, user_id: str, stream_id: int - ) -> Dict[str, Dict[str, JsonDict]]: + ) -> Mapping[str, Mapping[str, JsonDict]]: """Get all the tags for the rooms where the tags have changed since the given version diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 14ef5b040d..f6a6fd4079 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -16,9 +16,9 @@ import logging import re from typing import ( TYPE_CHECKING, - Dict, Iterable, List, + Mapping, Optional, Sequence, Set, @@ -586,7 +586,7 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore): ) @cached() - async def get_user_in_directory(self, user_id: str) -> Optional[Dict[str, str]]: + async def get_user_in_directory(self, user_id: str) -> Optional[Mapping[str, str]]: return await self.db_pool.simple_select_one( table="user_directory", keyvalues={"user_id": user_id}, diff --git a/tests/rest/admin/test_server_notice.py b/tests/rest/admin/test_server_notice.py index a2f347f666..f71ff46d87 100644 --- a/tests/rest/admin/test_server_notice.py +++ b/tests/rest/admin/test_server_notice.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +from typing import List, Sequence from twisted.test.proto_helpers import MemoryReactor @@ -558,7 +558,7 @@ class ServerNoticeTestCase(unittest.HomeserverTestCase): def _check_invite_and_join_status( self, user_id: str, expected_invites: int, expected_memberships: int - ) -> List[RoomsForUser]: + ) -> Sequence[RoomsForUser]: """Check invite and room membership status of a user. Args -- cgit 1.5.1 From 6cddf24e361fe43f086307c833cd814dc03363b6 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Sat, 11 Feb 2023 00:31:05 +0100 Subject: Faster joins: don't stall when a user joins during a fast join (#14606) Fixes #12801. Complement tests are at https://github.com/matrix-org/complement/pull/567. Avoid blocking on full state when handling a subsequent join into a partial state room. Also always perform a remote join into partial state rooms, since we do not know whether the joining user has been banned and want to avoid leaking history to banned users. Signed-off-by: Mathieu Velten Co-authored-by: Sean Quah Co-authored-by: David Robertson --- changelog.d/14606.misc | 1 + synapse/api/errors.py | 22 ++++++ synapse/federation/federation_server.py | 2 +- synapse/handlers/event_auth.py | 16 ++--- synapse/handlers/federation.py | 2 +- synapse/handlers/federation_event.py | 59 ++++++++++++++-- synapse/handlers/message.py | 2 +- synapse/handlers/room.py | 2 +- synapse/handlers/room_member.py | 118 ++++++++++++++++++++++--------- synapse/handlers/room_member_worker.py | 5 +- synapse/storage/databases/main/events.py | 21 +----- tests/handlers/test_federation.py | 40 +++++------ 12 files changed, 196 insertions(+), 94 deletions(-) create mode 100644 changelog.d/14606.misc (limited to 'synapse') diff --git a/changelog.d/14606.misc b/changelog.d/14606.misc new file mode 100644 index 0000000000..e2debc96d8 --- /dev/null +++ b/changelog.d/14606.misc @@ -0,0 +1 @@ +Faster joins: don't stall when another user joins during a fast join resync. diff --git a/synapse/api/errors.py b/synapse/api/errors.py index c2c177fd71..9235ce6536 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -751,3 +751,25 @@ class ModuleFailedException(Exception): Raised when a module API callback fails, for example because it raised an exception. """ + + +class PartialStateConflictError(SynapseError): + """An internal error raised when attempting to persist an event with partial state + after the room containing the event has been un-partial stated. + + This error should be handled by recomputing the event context and trying again. + + This error has an HTTP status code so that it can be transported over replication. + It should not be exposed to clients. + """ + + @staticmethod + def message() -> str: + return "Cannot persist partial state event in un-partial stated room" + + def __init__(self) -> None: + super().__init__( + HTTPStatus.CONFLICT, + msg=PartialStateConflictError.message(), + errcode=Codes.UNKNOWN, + ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 6addc0bb65..6d99845de5 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -48,6 +48,7 @@ from synapse.api.errors import ( FederationError, IncompatibleRoomVersionError, NotFoundError, + PartialStateConflictError, SynapseError, UnsupportedRoomVersionError, ) @@ -81,7 +82,6 @@ from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, ReplicationGetQueryRestServlet, ) -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.lock import Lock from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.roommember import MemberSummary diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index a23a8ce2a1..46dd63c3f0 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -202,7 +202,7 @@ class EventAuthHandler: state_ids: StateMap[str], room_version: RoomVersion, user_id: str, - prev_member_event: Optional[EventBase], + prev_membership: Optional[str], ) -> None: """ Check whether a user can join a room without an invite due to restricted join rules. @@ -214,15 +214,14 @@ class EventAuthHandler: state_ids: The state of the room as it currently is. room_version: The room version of the room being joined. user_id: The user joining the room. - prev_member_event: The current membership event for this user. + prev_membership: The current membership state for this user. `None` if the + user has never joined the room (equivalent to "leave"). Raises: AuthError if the user cannot join the room. """ # If the member is invited or currently joined, then nothing to do. - if prev_member_event and ( - prev_member_event.membership in (Membership.JOIN, Membership.INVITE) - ): + if prev_membership in (Membership.JOIN, Membership.INVITE): return # This is not a room with a restricted join rule, so we don't need to do the @@ -255,13 +254,14 @@ class EventAuthHandler: ) async def has_restricted_join_rules( - self, state_ids: StateMap[str], room_version: RoomVersion + self, partial_state_ids: StateMap[str], room_version: RoomVersion ) -> bool: """ Return if the room has the proper join rules set for access via rooms. Args: - state_ids: The state of the room as it currently is. + state_ids: The state of the room as it currently is. May be full or partial + state. room_version: The room version of the room to query. Returns: @@ -272,7 +272,7 @@ class EventAuthHandler: return False # If there's no join rule, then it defaults to invite (so this doesn't apply). - join_rules_event_id = state_ids.get((EventTypes.JoinRules, ""), None) + join_rules_event_id = partial_state_ids.get((EventTypes.JoinRules, ""), None) if not join_rules_event_id: return False diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 43ed4a3dd1..08727e4857 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -49,6 +49,7 @@ from synapse.api.errors import ( FederationPullAttemptBackoffError, HttpResponseException, NotFoundError, + PartialStateConflictError, RequestSendFailed, SynapseError, ) @@ -68,7 +69,6 @@ from synapse.replication.http.federation import ( ReplicationCleanRoomRestServlet, ReplicationStoreRoomOnOutlierMembershipRestServlet, ) -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import JsonDict, StrCollection, get_domain_from_id from synapse.types.state import StateFilter diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 3561f2f1de..b7136f8d1c 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -47,6 +47,7 @@ from synapse.api.errors import ( FederationError, FederationPullAttemptBackoffError, HttpResponseException, + PartialStateConflictError, RequestSendFailed, SynapseError, ) @@ -74,7 +75,6 @@ from synapse.replication.http.federation import ( ReplicationFederationSendEventsRestServlet, ) from synapse.state import StateResolutionStore -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import ( PersistedEventPosition, @@ -441,16 +441,17 @@ class FederationEventHandler: # Check if the user is already in the room or invited to the room. user_id = event.state_key prev_member_event_id = prev_state_ids.get((EventTypes.Member, user_id), None) - prev_member_event = None + prev_membership = None if prev_member_event_id: prev_member_event = await self._store.get_event(prev_member_event_id) + prev_membership = prev_member_event.membership # Check if the member should be allowed access via membership in a space. await self._event_auth_handler.check_restricted_join_rules( prev_state_ids, event.room_version, user_id, - prev_member_event, + prev_membership, ) @trace @@ -526,11 +527,57 @@ class FederationEventHandler: "Peristing join-via-remote %s (partial_state: %s)", event, partial_state ) with nested_logging_context(suffix=event.event_id): + if partial_state: + # When handling a second partial state join into a partial state room, + # the returned state will exclude the membership from the first join. To + # preserve prior memberships, we try to compute the partial state before + # the event ourselves if we know about any of the prev events. + # + # When we don't know about any of the prev events, it's fine to just use + # the returned state, since the new join will create a new forward + # extremity, and leave the forward extremity containing our prior + # memberships alone. + prev_event_ids = set(event.prev_event_ids()) + seen_event_ids = await self._store.have_events_in_timeline( + prev_event_ids + ) + missing_event_ids = prev_event_ids - seen_event_ids + + state_maps_to_resolve: List[StateMap[str]] = [] + + # Fetch the state after the prev events that we know about. + state_maps_to_resolve.extend( + ( + await self._state_storage_controller.get_state_groups_ids( + room_id, seen_event_ids, await_full_state=False + ) + ).values() + ) + + # When there are prev events we do not have the state for, we state + # resolve with the state returned by the remote homeserver. + if missing_event_ids or len(state_maps_to_resolve) == 0: + state_maps_to_resolve.append( + {(e.type, e.state_key): e.event_id for e in state} + ) + + state_ids_before_event = ( + await self._state_resolution_handler.resolve_events_with_store( + event.room_id, + room_version.identifier, + state_maps_to_resolve, + event_map=None, + state_res_store=StateResolutionStore(self._store), + ) + ) + else: + state_ids_before_event = { + (e.type, e.state_key): e.event_id for e in state + } + context = await self._state_handler.compute_event_context( event, - state_ids_before_event={ - (e.type, e.state_key): e.event_id for e in state - }, + state_ids_before_event=state_ids_before_event, partial_state=partial_state, ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 3e30f52e4d..8f5b658d9d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -38,6 +38,7 @@ from synapse.api.errors import ( Codes, ConsentNotGivenError, NotFoundError, + PartialStateConflictError, ShadowBanError, SynapseError, UnstableSpecAuthError, @@ -57,7 +58,6 @@ from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.send_event import ReplicationSendEventRestServlet from synapse.replication.http.send_events import ReplicationSendEventsRestServlet -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.types import ( MutableStateMap, diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 060bbcb181..837dabb3b7 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -43,6 +43,7 @@ from synapse.api.errors import ( Codes, LimitExceededError, NotFoundError, + PartialStateConflictError, StoreError, SynapseError, ) @@ -54,7 +55,6 @@ from synapse.events.utils import copy_and_fixup_power_levels_contents from synapse.handlers.relations import BundledAggregations from synapse.module_api import NOT_SPAM from synapse.rest.admin._base import assert_user_is_admin -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.streams import EventSource from synapse.types import ( JsonDict, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 6e7141d2ef..a965c7ec76 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -26,7 +26,13 @@ from synapse.api.constants import ( GuestAccess, Membership, ) -from synapse.api.errors import AuthError, Codes, ShadowBanError, SynapseError +from synapse.api.errors import ( + AuthError, + Codes, + PartialStateConflictError, + ShadowBanError, + SynapseError, +) from synapse.api.ratelimiting import Ratelimiter from synapse.event_auth import get_named_level, get_power_level_event from synapse.events import EventBase @@ -34,7 +40,6 @@ from synapse.events.snapshot import EventContext from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN from synapse.logging import opentracing from synapse.module_api import NOT_SPAM -from synapse.storage.databases.main.events import PartialStateConflictError from synapse.types import ( JsonDict, Requester, @@ -56,6 +61,13 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +class NoKnownServersError(SynapseError): + """No server already resident to the room was provided to the join/knock operation.""" + + def __init__(self, msg: str = "No known servers"): + super().__init__(404, msg) + + class RoomMemberHandler(metaclass=abc.ABCMeta): # TODO(paul): This handler currently contains a messy conflation of # low-level API that works on UserID objects and so on, and REST-level @@ -185,6 +197,10 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id: Room that we are trying to join user: User who is trying to join content: A dict that should be used as the content of the join event. + + Raises: + NoKnownServersError: if remote_room_hosts does not contain a server joined to + the room. """ raise NotImplementedError() @@ -823,14 +839,19 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): latest_event_ids = await self.store.get_prev_events_for_room(room_id) - state_before_join = await self.state_handler.compute_state_after_events( - room_id, latest_event_ids + is_partial_state_room = await self.store.is_partial_state_room(room_id) + partial_state_before_join = await self.state_handler.compute_state_after_events( + room_id, latest_event_ids, await_full_state=False ) + # `is_partial_state_room` also indicates whether `partial_state_before_join` is + # partial. # TODO: Refactor into dictionary of explicitly allowed transitions # between old and new state, with specific error messages for some # transitions and generic otherwise - old_state_id = state_before_join.get((EventTypes.Member, target.to_string())) + old_state_id = partial_state_before_join.get( + (EventTypes.Member, target.to_string()) + ) if old_state_id: old_state = await self.store.get_event(old_state_id, allow_none=True) old_membership = old_state.content.get("membership") if old_state else None @@ -881,11 +902,11 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if action == "kick": raise AuthError(403, "The target user is not in the room") - is_host_in_room = await self._is_host_in_room(state_before_join) + is_host_in_room = await self._is_host_in_room(partial_state_before_join) if effective_membership_state == Membership.JOIN: if requester.is_guest: - guest_can_join = await self._can_guest_join(state_before_join) + guest_can_join = await self._can_guest_join(partial_state_before_join) if not guest_can_join: # This should be an auth check, but guests are a local concept, # so don't really fit into the general auth process. @@ -927,8 +948,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id, remote_room_hosts, content, + is_partial_state_room, is_host_in_room, - state_before_join, + partial_state_before_join, ) if remote_join: if ratelimit: @@ -1073,8 +1095,9 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): room_id: str, remote_room_hosts: List[str], content: JsonDict, + is_partial_state_room: bool, is_host_in_room: bool, - state_before_join: StateMap[str], + partial_state_before_join: StateMap[str], ) -> Tuple[bool, List[str]]: """ Check whether the server should do a remote join (as opposed to a local @@ -1093,9 +1116,12 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): remote_room_hosts: A list of remote room hosts. content: The content to use as the event body of the join. This may be modified. - is_host_in_room: True if the host is in the room. - state_before_join: The state before the join event (i.e. the resolution of - the states after its parent events). + is_partial_state_room: `True` if the server currently doesn't hold the full + state of the room. + is_host_in_room: `True` if the host is in the room. + partial_state_before_join: The state before the join event (i.e. the + resolution of the states after its parent events). May be full or + partial state, depending on `is_partial_state_room`. Returns: A tuple of: @@ -1109,6 +1135,23 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if not is_host_in_room: return True, remote_room_hosts + prev_member_event_id = partial_state_before_join.get( + (EventTypes.Member, user_id), None + ) + previous_membership = None + if prev_member_event_id: + prev_member_event = await self.store.get_event(prev_member_event_id) + previous_membership = prev_member_event.membership + + # If we are not fully joined yet, and the target is not already in the room, + # let's do a remote join so another server with the full state can validate + # that the user has not been banned for example. + # We could just accept the join and wait for state res to resolve that later on + # but we would then leak room history to this person until then, which is pretty + # bad. + if is_partial_state_room and previous_membership != Membership.JOIN: + return True, remote_room_hosts + # If the host is in the room, but not one of the authorised hosts # for restricted join rules, a remote join must be used. room_version = await self.store.get_room_version(room_id) @@ -1116,21 +1159,19 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # If restricted join rules are not being used, a local join can always # be used. if not await self.event_auth_handler.has_restricted_join_rules( - state_before_join, room_version + partial_state_before_join, room_version ): return False, [] # If the user is invited to the room or already joined, the join # event can always be issued locally. - prev_member_event_id = state_before_join.get((EventTypes.Member, user_id), None) - prev_member_event = None - if prev_member_event_id: - prev_member_event = await self.store.get_event(prev_member_event_id) - if prev_member_event.membership in ( - Membership.JOIN, - Membership.INVITE, - ): - return False, [] + if previous_membership in (Membership.JOIN, Membership.INVITE): + return False, [] + + # All the partial state cases are covered above. We have been given the full + # state of the room. + assert not is_partial_state_room + state_before_join = partial_state_before_join # If the local host has a user who can issue invites, then a local # join can be done. @@ -1154,7 +1195,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): # Ensure the member should be allowed access via membership in a room. await self.event_auth_handler.check_restricted_join_rules( - state_before_join, room_version, user_id, prev_member_event + state_before_join, room_version, user_id, previous_membership ) # If this is going to be a local join, additional information must @@ -1304,11 +1345,17 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): if prev_member_event.membership == Membership.JOIN: await self._user_left_room(target_user, room_id) - async def _can_guest_join(self, current_state_ids: StateMap[str]) -> bool: + async def _can_guest_join(self, partial_current_state_ids: StateMap[str]) -> bool: """ Returns whether a guest can join a room based on its current state. + + Args: + partial_current_state_ids: The current state of the room. May be full or + partial state. """ - guest_access_id = current_state_ids.get((EventTypes.GuestAccess, ""), None) + guest_access_id = partial_current_state_ids.get( + (EventTypes.GuestAccess, ""), None + ) if not guest_access_id: return False @@ -1634,19 +1681,25 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ) return event, stream_id - async def _is_host_in_room(self, current_state_ids: StateMap[str]) -> bool: + async def _is_host_in_room(self, partial_current_state_ids: StateMap[str]) -> bool: + """Returns whether the homeserver is in the room based on its current state. + + Args: + partial_current_state_ids: The current state of the room. May be full or + partial state. + """ # Have we just created the room, and is this about to be the very # first member event? - create_event_id = current_state_ids.get(("m.room.create", "")) - if len(current_state_ids) == 1 and create_event_id: + create_event_id = partial_current_state_ids.get(("m.room.create", "")) + if len(partial_current_state_ids) == 1 and create_event_id: # We can only get here if we're in the process of creating the room return True - for etype, state_key in current_state_ids: + for etype, state_key in partial_current_state_ids: if etype != EventTypes.Member or not self.hs.is_mine_id(state_key): continue - event_id = current_state_ids[(etype, state_key)] + event_id = partial_current_state_ids[(etype, state_key)] event = await self.store.get_event(event_id, allow_none=True) if not event: continue @@ -1715,8 +1768,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): ] if len(remote_room_hosts) == 0: - raise SynapseError( - 404, + raise NoKnownServersError( "Can't join remote room because no servers " "that are in the room have been provided.", ) @@ -1947,7 +1999,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): ] if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") + raise NoKnownServersError() return await self.federation_handler.do_knock( remote_room_hosts, room_id, user.to_string(), content=content diff --git a/synapse/handlers/room_member_worker.py b/synapse/handlers/room_member_worker.py index 221552a2a6..ba261702d4 100644 --- a/synapse/handlers/room_member_worker.py +++ b/synapse/handlers/room_member_worker.py @@ -15,8 +15,7 @@ import logging from typing import TYPE_CHECKING, List, Optional, Tuple -from synapse.api.errors import SynapseError -from synapse.handlers.room_member import RoomMemberHandler +from synapse.handlers.room_member import NoKnownServersError, RoomMemberHandler from synapse.replication.http.membership import ( ReplicationRemoteJoinRestServlet as ReplRemoteJoin, ReplicationRemoteKnockRestServlet as ReplRemoteKnock, @@ -52,7 +51,7 @@ class RoomMemberWorkerHandler(RoomMemberHandler): ) -> Tuple[str, int]: """Implements RoomMemberHandler._remote_join""" if len(remote_room_hosts) == 0: - raise SynapseError(404, "No known servers") + raise NoKnownServersError() ret = await self._remote_join_client( requester=requester, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index cb66376fb4..ffe766fd56 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -16,7 +16,6 @@ import itertools import logging from collections import OrderedDict -from http import HTTPStatus from typing import ( TYPE_CHECKING, Any, @@ -36,7 +35,7 @@ from prometheus_client import Counter import synapse.metrics from synapse.api.constants import EventContentFields, EventTypes, RelationTypes -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import PartialStateConflictError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -72,24 +71,6 @@ event_counter = Counter( ) -class PartialStateConflictError(SynapseError): - """An internal error raised when attempting to persist an event with partial state - after the room containing the event has been un-partial stated. - - This error should be handled by recomputing the event context and trying again. - - This error has an HTTP status code so that it can be transported over replication. - It should not be exposed to clients. - """ - - def __init__(self) -> None: - super().__init__( - HTTPStatus.CONFLICT, - msg="Cannot persist partial state event in un-partial stated room", - errcode=Codes.UNKNOWN, - ) - - @attr.s(slots=True, auto_attribs=True) class DeltaState: """Deltas to use to update the `current_state_events` table. diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 57675fa407..5868eb2da7 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -575,26 +575,6 @@ class PartialJoinTestCase(unittest.FederatingHomeserverTestCase): fed_client = fed_handler.federation_client room_id = "!room:example.com" - membership_event = make_event_from_dict( - { - "room_id": room_id, - "type": "m.room.member", - "sender": "@alice:test", - "state_key": "@alice:test", - "content": {"membership": "join"}, - }, - RoomVersions.V10, - ) - - mock_make_membership_event = Mock( - return_value=make_awaitable( - ( - "example.com", - membership_event, - RoomVersions.V10, - ) - ) - ) EVENT_CREATE = make_event_from_dict( { @@ -640,6 +620,26 @@ class PartialJoinTestCase(unittest.FederatingHomeserverTestCase): }, room_version=RoomVersions.V10, ) + membership_event = make_event_from_dict( + { + "room_id": room_id, + "type": "m.room.member", + "sender": "@alice:test", + "state_key": "@alice:test", + "content": {"membership": "join"}, + "prev_events": [EVENT_INVITATION_MEMBERSHIP.event_id], + }, + RoomVersions.V10, + ) + mock_make_membership_event = Mock( + return_value=make_awaitable( + ( + "example.com", + membership_event, + RoomVersions.V10, + ) + ) + ) mock_send_join = Mock( return_value=make_awaitable( SendJoinResult( -- cgit 1.5.1 From c10e13125057e506381d1be8c2ec1394eee45d62 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 13 Feb 2023 11:49:20 +0000 Subject: Apply logging from hotfixes branch to develop (#15054) * Apply logging from hotfixes branch to develop Part of #4826. Originally added in #11882. * Changelog --- changelog.d/15054.misc | 1 + synapse/rest/client/account.py | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 changelog.d/15054.misc (limited to 'synapse') diff --git a/changelog.d/15054.misc b/changelog.d/15054.misc new file mode 100644 index 0000000000..d800b107cf --- /dev/null +++ b/changelog.d/15054.misc @@ -0,0 +1 @@ +Merge debug logging from the hotfixes branch. diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index 4373c73662..232f3a976d 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -415,6 +415,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): request, MsisdnRequestTokenBody ) msisdn = phone_number_to_msisdn(body.country, body.phone_number) + logger.info("Request #%s to verify ownership of %s", body.send_attempt, msisdn) if not await check_3pid_allowed(self.hs, "msisdn", msisdn): raise SynapseError( @@ -444,6 +445,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): await self.hs.get_clock().sleep(random.randint(1, 10) / 10) return 200, {"sid": random_string(16)} + logger.info("MSISDN %s is already in use by %s", msisdn, existing_user_id) raise SynapseError(400, "MSISDN is already in use", Codes.THREEPID_IN_USE) if not self.hs.config.registration.account_threepid_delegate_msisdn: @@ -468,6 +470,7 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): threepid_send_requests.labels(type="msisdn", reason="add_threepid").observe( body.send_attempt ) + logger.info("MSISDN %s: got response from identity server: %s", msisdn, ret) return 200, ret -- cgit 1.5.1 From bdccfd24773d7482ae497263634312640dab01d1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 13 Feb 2023 12:12:48 +0000 Subject: Refactor arguments of `try_unbind_threepid(_with_id_server)` from dict to separate args (#15053) --- changelog.d/15053.misc | 1 + synapse/handlers/auth.py | 5 ++-- synapse/handlers/deactivate_account.py | 7 +---- synapse/handlers/identity.py | 47 +++++++++++++++++----------------- synapse/rest/client/account.py | 7 +---- 5 files changed, 28 insertions(+), 39 deletions(-) create mode 100644 changelog.d/15053.misc (limited to 'synapse') diff --git a/changelog.d/15053.misc b/changelog.d/15053.misc new file mode 100644 index 0000000000..c27528f5c6 --- /dev/null +++ b/changelog.d/15053.misc @@ -0,0 +1 @@ +Refactor arguments of `try_unbind_threepid` and `_try_unbind_threepid_with_id_server` to not use dictionaries. \ No newline at end of file diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 30f2d46c3c..57a6854b1e 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -1593,9 +1593,8 @@ class AuthHandler: if medium == "email": address = canonicalise_email(address) - identity_handler = self.hs.get_identity_handler() - result = await identity_handler.try_unbind_threepid( - user_id, {"medium": medium, "address": address, "id_server": id_server} + result = await self.hs.get_identity_handler().try_unbind_threepid( + user_id, medium, address, id_server ) await self.store.user_delete_threepid(user_id, medium, address) diff --git a/synapse/handlers/deactivate_account.py b/synapse/handlers/deactivate_account.py index d74d135c0c..d24f649382 100644 --- a/synapse/handlers/deactivate_account.py +++ b/synapse/handlers/deactivate_account.py @@ -106,12 +106,7 @@ class DeactivateAccountHandler: for threepid in threepids: try: result = await self._identity_handler.try_unbind_threepid( - user_id, - { - "medium": threepid["medium"], - "address": threepid["address"], - "id_server": id_server, - }, + user_id, threepid["medium"], threepid["address"], id_server ) identity_server_supports_unbinding &= result except Exception: diff --git a/synapse/handlers/identity.py b/synapse/handlers/identity.py index 848e46eb9b..bf0f7acf80 100644 --- a/synapse/handlers/identity.py +++ b/synapse/handlers/identity.py @@ -219,28 +219,31 @@ class IdentityHandler: data = json_decoder.decode(e.msg) # XXX WAT? return data - async def try_unbind_threepid(self, mxid: str, threepid: dict) -> bool: - """Attempt to remove a 3PID from an identity server, or if one is not provided, all - identity servers we're aware the binding is present on + async def try_unbind_threepid( + self, mxid: str, medium: str, address: str, id_server: Optional[str] + ) -> bool: + """Attempt to remove a 3PID from one or more identity servers. Args: mxid: Matrix user ID of binding to be removed - threepid: Dict with medium & address of binding to be - removed, and an optional id_server. + medium: The medium of the third-party ID. + address: The address of the third-party ID. + id_server: An identity server to attempt to unbind from. If None, + attempt to remove the association from all identity servers + known to potentially have it. Raises: - SynapseError: If we failed to contact the identity server + SynapseError: If we failed to contact one or more identity servers. Returns: - True on success, otherwise False if the identity - server doesn't support unbinding (or no identity server found to - contact). + True on success, otherwise False if the identity server doesn't + support unbinding (or no identity server to contact was found). """ - if threepid.get("id_server"): - id_servers = [threepid["id_server"]] + if id_server: + id_servers = [id_server] else: id_servers = await self.store.get_id_servers_user_bound( - user_id=mxid, medium=threepid["medium"], address=threepid["address"] + mxid, medium, address ) # We don't know where to unbind, so we don't have a choice but to return @@ -249,20 +252,21 @@ class IdentityHandler: changed = True for id_server in id_servers: - changed &= await self.try_unbind_threepid_with_id_server( - mxid, threepid, id_server + changed &= await self._try_unbind_threepid_with_id_server( + mxid, medium, address, id_server ) return changed - async def try_unbind_threepid_with_id_server( - self, mxid: str, threepid: dict, id_server: str + async def _try_unbind_threepid_with_id_server( + self, mxid: str, medium: str, address: str, id_server: str ) -> bool: """Removes a binding from an identity server Args: mxid: Matrix user ID of binding to be removed - threepid: Dict with medium & address of binding to be removed + medium: The medium of the third-party ID + address: The address of the third-party ID id_server: Identity server to unbind from Raises: @@ -286,7 +290,7 @@ class IdentityHandler: content = { "mxid": mxid, - "threepid": {"medium": threepid["medium"], "address": threepid["address"]}, + "threepid": {"medium": medium, "address": address}, } # we abuse the federation http client to sign the request, but we have to send it @@ -319,12 +323,7 @@ class IdentityHandler: except RequestTimedOutError: raise SynapseError(500, "Timed out contacting identity server") - await self.store.remove_user_bound_threepid( - user_id=mxid, - medium=threepid["medium"], - address=threepid["address"], - id_server=id_server, - ) + await self.store.remove_user_bound_threepid(mxid, medium, address, id_server) return changed diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index 232f3a976d..662f5bf762 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -737,12 +737,7 @@ class ThreepidUnbindRestServlet(RestServlet): # Attempt to unbind the threepid from an identity server. If id_server is None, try to # unbind from all identity servers this threepid has been added to in the past result = await self.identity_handler.try_unbind_threepid( - requester.user.to_string(), - { - "address": body.address, - "medium": body.medium, - "id_server": body.id_server, - }, + requester.user.to_string(), body.medium, body.address, body.id_server ) return 200, {"id_server_unbind_result": "success" if result else "no-support"} -- cgit 1.5.1 From 3d7aead5d62e6da97e006199b3f957325e54b053 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 13 Feb 2023 16:30:58 +0000 Subject: Tweak comment on `_is_local_room_accessible` as part of room visibility in `/hierarchy` to clarify the condition for a room being visible. (#14834) --- changelog.d/14834.misc | 1 + synapse/handlers/room_summary.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/14834.misc (limited to 'synapse') diff --git a/changelog.d/14834.misc b/changelog.d/14834.misc new file mode 100644 index 0000000000..e683212dc4 --- /dev/null +++ b/changelog.d/14834.misc @@ -0,0 +1 @@ +Tweak comment on `_is_local_room_accessible` as part of room visibility in `/hierarchy` to clarify the condition for a room being visible. \ No newline at end of file diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index 4472019fbc..807245160d 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -521,8 +521,8 @@ class RoomSummaryHandler: It should return true if: - * The requester is joined or can join the room (per MSC3173). - * The origin server has any user that is joined or can join the room. + * The requesting user is joined or can join the room (per MSC3173); or + * The origin server has any user that is joined or can join the room; or * The history visibility is set to world readable. Args: -- cgit 1.5.1 From db2b105d69fa331bb3f050df82266314f61577ea Mon Sep 17 00:00:00 2001 From: Harishankar Kumar <31770598+hari01584@users.noreply.github.com> Date: Tue, 14 Feb 2023 15:07:08 +0530 Subject: Change collection[str] to StrCollection in event_auth code (#14929) Signed-off-by: Harishankar Kumar --- changelog.d/14929.misc | 1 + synapse/event_auth.py | 23 +++++++++------------- synapse/events/__init__.py | 6 +++--- synapse/storage/databases/main/events.py | 7 +++---- .../storage/databases/main/events_bg_updates.py | 6 +++--- 5 files changed, 19 insertions(+), 24 deletions(-) create mode 100644 changelog.d/14929.misc (limited to 'synapse') diff --git a/changelog.d/14929.misc b/changelog.d/14929.misc new file mode 100644 index 0000000000..2cc3614dfd --- /dev/null +++ b/changelog.d/14929.misc @@ -0,0 +1 @@ +Use `StrCollection` to avoid potential bugs with `Collection[str]`. diff --git a/synapse/event_auth.py b/synapse/event_auth.py index e0be9f88cc..4d6d1b8ebd 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -16,18 +16,7 @@ import collections.abc import logging import typing -from typing import ( - Any, - Collection, - Dict, - Iterable, - List, - Mapping, - Optional, - Set, - Tuple, - Union, -) +from typing import Any, Dict, Iterable, List, Mapping, Optional, Set, Tuple, Union from canonicaljson import encode_canonical_json from signedjson.key import decode_verify_key_bytes @@ -56,7 +45,13 @@ from synapse.api.room_versions import ( RoomVersions, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.types import MutableStateMap, StateMap, UserID, get_domain_from_id +from synapse.types import ( + MutableStateMap, + StateMap, + StrCollection, + UserID, + get_domain_from_id, +) if typing.TYPE_CHECKING: # conditional imports to avoid import cycle @@ -69,7 +64,7 @@ logger = logging.getLogger(__name__) class _EventSourceStore(Protocol): async def get_events( self, - event_ids: Collection[str], + event_ids: StrCollection, redact_behaviour: EventRedactBehaviour, get_prev_content: bool = False, allow_rejected: bool = False, diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index 8aca9a3ab9..91118a8d84 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -39,7 +39,7 @@ from unpaddedbase64 import encode_base64 from synapse.api.constants import RelationTypes from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions -from synapse.types import JsonDict, RoomStreamToken +from synapse.types import JsonDict, RoomStreamToken, StrCollection from synapse.util.caches import intern_dict from synapse.util.frozenutils import freeze from synapse.util.stringutils import strtobool @@ -413,7 +413,7 @@ class EventBase(metaclass=abc.ABCMeta): """ return [e for e, _ in self._dict["prev_events"]] - def auth_event_ids(self) -> Sequence[str]: + def auth_event_ids(self) -> StrCollection: """Returns the list of auth event IDs. The order matches the order specified in the event, though there is no meaning to it. @@ -558,7 +558,7 @@ class FrozenEventV2(EventBase): """ return self._dict["prev_events"] - def auth_event_ids(self) -> Sequence[str]: + def auth_event_ids(self) -> StrCollection: """Returns the list of auth event IDs. The order matches the order specified in the event, though there is no meaning to it. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ffe766fd56..7996cbb557 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -25,7 +25,6 @@ from typing import ( Iterable, List, Optional, - Sequence, Set, Tuple, ) @@ -51,7 +50,7 @@ from synapse.storage.databases.main.search import SearchEntry from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import AbstractStreamIdGenerator from synapse.storage.util.sequence import SequenceGenerator -from synapse.types import JsonDict, StateMap, get_domain_from_id +from synapse.types import JsonDict, StateMap, StrCollection, get_domain_from_id from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically from synapse.util.stringutils import non_null_str_or_none @@ -552,7 +551,7 @@ class PersistEventsStore: event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], event_to_types: Dict[str, Tuple[str, str]], - event_to_auth_chain: Dict[str, Sequence[str]], + event_to_auth_chain: Dict[str, StrCollection], ) -> None: """Calculate the chain cover index for the given events. @@ -846,7 +845,7 @@ class PersistEventsStore: event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], event_to_types: Dict[str, Tuple[str, str]], - event_to_auth_chain: Dict[str, Sequence[str]], + event_to_auth_chain: Dict[str, StrCollection], events_to_calc_chain_id_for: Set[str], chain_map: Dict[str, Tuple[int, int]], ) -> Dict[str, Tuple[int, int]]: diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index b9d3c36d60..584536111d 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Set, Tuple, cast +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast import attr @@ -29,7 +29,7 @@ from synapse.storage.database import ( ) from synapse.storage.databases.main.events import PersistEventsStore from synapse.storage.types import Cursor -from synapse.types import JsonDict +from synapse.types import JsonDict, StrCollection if TYPE_CHECKING: from synapse.server import HomeServer @@ -1061,7 +1061,7 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): self.event_chain_id_gen, # type: ignore[attr-defined] event_to_room_id, event_to_types, - cast(Dict[str, Sequence[str]], event_to_auth_chain), + cast(Dict[str, StrCollection], event_to_auth_chain), ) return _CalculateChainCover( -- cgit 1.5.1 From f09db5c9918b6aaeb1f53ab4fac3a7f05f512c5f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 Feb 2023 12:10:29 +0100 Subject: Skip calculating unread push actions in `/sync` when `enable_push` is false. (#14980) --- changelog.d/14980.misc | 1 + synapse/handlers/sync.py | 8 ++++++++ synapse/storage/databases/main/event_push_actions.py | 7 +++++++ 3 files changed, 16 insertions(+) create mode 100644 changelog.d/14980.misc (limited to 'synapse') diff --git a/changelog.d/14980.misc b/changelog.d/14980.misc new file mode 100644 index 0000000000..145f4a788b --- /dev/null +++ b/changelog.d/14980.misc @@ -0,0 +1 @@ +Skip calculating unread push actions in /sync when enable_push is false. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4bae46158a..3a9cddf15a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -269,6 +269,8 @@ class SyncHandler: self._state_storage_controller = self._storage_controllers.state self._device_handler = hs.get_device_handler() + self.should_calculate_push_rules = hs.config.push.enable_push + # TODO: flush cache entries on subsequent sync request. # Once we get the next /sync request (ie, one with the same access token # that sets 'since' to 'next_batch'), we know that device won't need a @@ -1288,6 +1290,12 @@ class SyncHandler: async def unread_notifs_for_room_id( self, room_id: str, sync_config: SyncConfig ) -> RoomNotifCounts: + if not self.should_calculate_push_rules: + # If push rules have been universally disabled then we know we won't + # have any unread counts in the DB, so we may as well skip asking + # the DB. + return RoomNotifCounts.empty() + with Measure(self.clock, "unread_notifs_for_room_id"): return await self.store.get_unread_event_push_actions_by_room_for_user( diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 3a0c370fde..eeccf5db24 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -203,11 +203,18 @@ class RoomNotifCounts: # Map of thread ID to the notification counts. threads: Dict[str, NotifCounts] + @staticmethod + def empty() -> "RoomNotifCounts": + return _EMPTY_ROOM_NOTIF_COUNTS + def __len__(self) -> int: # To properly account for the amount of space in any caches. return len(self.threads) + 1 +_EMPTY_ROOM_NOTIF_COUNTS = RoomNotifCounts(NotifCounts(), {}) + + def _serialize_action( actions: Collection[Union[Mapping, str]], is_highlight: bool ) -> str: -- cgit 1.5.1 From cb262713b701d1abcbca03334d17e2d0f81eee4a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 14 Feb 2023 12:20:25 +0100 Subject: Fix clashing DB txn name (#15070) * Fix clashing DB txn name * Newsfile --- changelog.d/15070.misc | 1 + synapse/storage/databases/main/end_to_end_keys.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15070.misc (limited to 'synapse') diff --git a/changelog.d/15070.misc b/changelog.d/15070.misc new file mode 100644 index 0000000000..0f3244de9f --- /dev/null +++ b/changelog.d/15070.misc @@ -0,0 +1 @@ +Fix clashing database transaction name. diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 752dc16e17..2c2d145666 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -262,7 +262,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker for batch in batch_iter(signature_query, 50): cross_sigs_result = await self.db_pool.runInteraction( - "get_e2e_cross_signing_signatures", + "get_e2e_cross_signing_signatures_for_devices", self._get_e2e_cross_signing_signatures_for_devices_txn, batch, ) -- cgit 1.5.1 From 463c19ac3648b242c480e299349d2ef90bf38a0b Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 14 Feb 2023 12:32:19 +0000 Subject: Faster joins: Omit device list updates from partial state rooms in /sync (#15069) ...when lazy loading of members is not enabled. It's weird to notify a client that another user's device list has changed when the client doesn't think that they share a room. Note that when a room is un-partial stated, device list updates are emitted for every member in that room over /sync. Signed-off-by: Sean Quah --- changelog.d/15069.misc | 1 + synapse/handlers/sync.py | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 changelog.d/15069.misc (limited to 'synapse') diff --git a/changelog.d/15069.misc b/changelog.d/15069.misc new file mode 100644 index 0000000000..e7a619ad2b --- /dev/null +++ b/changelog.d/15069.misc @@ -0,0 +1 @@ +Faster joins: omit device list updates originating from partial state rooms in /sync responses without lazy loading of members enabled. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 3a9cddf15a..4e4595312c 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1399,6 +1399,11 @@ class SyncHandler: for room_id, is_partial_state in results.items() if is_partial_state ) + membership_change_events = [ + event + for event in membership_change_events + if not results.get(event.room_id, False) + ] # Incremental eager syncs should additionally include rooms that # - we are joined to -- cgit 1.5.1 From e9b1ff9f31f8ff093e7eaf9c54fa8f40a3b66aa8 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 14 Feb 2023 15:50:59 +0000 Subject: Prevent clients from reporting nonexistent events. (#13779) --- changelog.d/13779.bugfix | 1 + synapse/rest/client/report_event.py | 11 ++++++++++- tests/rest/client/test_report_event.py | 12 ++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13779.bugfix (limited to 'synapse') diff --git a/changelog.d/13779.bugfix b/changelog.d/13779.bugfix new file mode 100644 index 0000000000..a92c722c6e --- /dev/null +++ b/changelog.d/13779.bugfix @@ -0,0 +1 @@ +Prevent clients from reporting nonexistent events. \ No newline at end of file diff --git a/synapse/rest/client/report_event.py b/synapse/rest/client/report_event.py index e2b410cf32..9be5860221 100644 --- a/synapse/rest/client/report_event.py +++ b/synapse/rest/client/report_event.py @@ -16,7 +16,7 @@ import logging from http import HTTPStatus from typing import TYPE_CHECKING, Tuple -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, NotFoundError, SynapseError from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest @@ -39,6 +39,7 @@ class ReportEventRestServlet(RestServlet): self.auth = hs.get_auth() self.clock = hs.get_clock() self.store = hs.get_datastores().main + self._event_handler = self.hs.get_event_handler() async def on_POST( self, request: SynapseRequest, room_id: str, event_id: str @@ -61,6 +62,14 @@ class ReportEventRestServlet(RestServlet): Codes.BAD_JSON, ) + event = await self._event_handler.get_event( + requester.user, room_id, event_id, show_redacted=False + ) + if event is None: + raise NotFoundError( + "Unable to report event: it does not exist or you aren't able to see it." + ) + await self.store.add_event_report( room_id=room_id, event_id=event_id, diff --git a/tests/rest/client/test_report_event.py b/tests/rest/client/test_report_event.py index 7cb1017a4a..1250685d39 100644 --- a/tests/rest/client/test_report_event.py +++ b/tests/rest/client/test_report_event.py @@ -73,6 +73,18 @@ class ReportEventTestCase(unittest.HomeserverTestCase): data = {"reason": None, "score": None} self._assert_status(400, data) + def test_cannot_report_nonexistent_event(self) -> None: + """ + Tests that we don't accept event reports for events which do not exist. + """ + channel = self.make_request( + "POST", + f"rooms/{self.room_id}/report/$nonsenseeventid:test", + {"reason": "i am very sad"}, + access_token=self.other_user_tok, + ) + self.assertEqual(404, channel.code, msg=channel.result["body"]) + def _assert_status(self, response_status: int, data: JsonDict) -> None: channel = self.make_request( "POST", self.report_path, data, access_token=self.other_user_tok -- cgit 1.5.1 From 119e0795a58548fb38fab299e7c362fcbb388d68 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 14 Feb 2023 14:02:19 -0500 Subject: Implement MSC3966: Add a push rule condition to search for a value in an array. (#15045) The `exact_event_property_contains` condition can be used to search for a value inside of an array. --- changelog.d/15045.feature | 1 + rust/benches/evaluator.rs | 32 +++++++++------- rust/src/push/evaluator.rs | 65 +++++++++++++++++++++++++------- rust/src/push/mod.rs | 33 +++++++++++++++- stubs/synapse/synapse_rust/push.pyi | 7 ++-- synapse/config/experimental.py | 5 +++ synapse/push/bulk_push_rule_evaluator.py | 21 +++++++---- synapse/types/__init__.py | 1 + tests/push/test_push_rule_evaluator.py | 53 ++++++++++++++++++++++++-- 9 files changed, 176 insertions(+), 42 deletions(-) create mode 100644 changelog.d/15045.feature (limited to 'synapse') diff --git a/changelog.d/15045.feature b/changelog.d/15045.feature new file mode 100644 index 0000000000..87766befda --- /dev/null +++ b/changelog.d/15045.feature @@ -0,0 +1 @@ +Experimental support for [MSC3966](https://github.com/matrix-org/matrix-spec-proposals/pull/3966): the `exact_event_property_contains` push rule condition. diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index 229553ebf8..8213dfd9ea 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -15,8 +15,8 @@ #![feature(test)] use std::collections::BTreeSet; use synapse::push::{ - evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules, - SimpleJsonValue, + evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, JsonValue, + PushRules, SimpleJsonValue, }; use test::Bencher; @@ -27,15 +27,15 @@ fn bench_match_exact(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - SimpleJsonValue::Str("m.text".to_string()), + JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), ), ( "room_id".to_string(), - SimpleJsonValue::Str("!room:server".to_string()), + JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), ), ( "content.body".to_string(), - SimpleJsonValue::Str("test message".to_string()), + JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), ), ] .into_iter() @@ -54,6 +54,7 @@ fn bench_match_exact(b: &mut Bencher) { vec![], false, false, + false, ) .unwrap(); @@ -76,15 +77,15 @@ fn bench_match_word(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - SimpleJsonValue::Str("m.text".to_string()), + JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), ), ( "room_id".to_string(), - SimpleJsonValue::Str("!room:server".to_string()), + JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), ), ( "content.body".to_string(), - SimpleJsonValue::Str("test message".to_string()), + JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), ), ] .into_iter() @@ -103,6 +104,7 @@ fn bench_match_word(b: &mut Bencher) { vec![], false, false, + false, ) .unwrap(); @@ -125,15 +127,15 @@ fn bench_match_word_miss(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - SimpleJsonValue::Str("m.text".to_string()), + JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), ), ( "room_id".to_string(), - SimpleJsonValue::Str("!room:server".to_string()), + JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), ), ( "content.body".to_string(), - SimpleJsonValue::Str("test message".to_string()), + JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), ), ] .into_iter() @@ -152,6 +154,7 @@ fn bench_match_word_miss(b: &mut Bencher) { vec![], false, false, + false, ) .unwrap(); @@ -174,15 +177,15 @@ fn bench_eval_message(b: &mut Bencher) { let flattened_keys = [ ( "type".to_string(), - SimpleJsonValue::Str("m.text".to_string()), + JsonValue::Value(SimpleJsonValue::Str("m.text".to_string())), ), ( "room_id".to_string(), - SimpleJsonValue::Str("!room:server".to_string()), + JsonValue::Value(SimpleJsonValue::Str("!room:server".to_string())), ), ( "content.body".to_string(), - SimpleJsonValue::Str("test message".to_string()), + JsonValue::Value(SimpleJsonValue::Str("test message".to_string())), ), ] .into_iter() @@ -201,6 +204,7 @@ fn bench_eval_message(b: &mut Bencher) { vec![], false, false, + false, ) .unwrap(); diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index dd6b4343ec..2eaa06ad76 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -14,6 +14,7 @@ use std::collections::{BTreeMap, BTreeSet}; +use crate::push::JsonValue; use anyhow::{Context, Error}; use lazy_static::lazy_static; use log::warn; @@ -63,7 +64,7 @@ impl RoomVersionFeatures { pub struct PushRuleEvaluator { /// A mapping of "flattened" keys to simple JSON values in the event, e.g. /// includes things like "type" and "content.msgtype". - flattened_keys: BTreeMap, + flattened_keys: BTreeMap, /// The "content.body", if any. body: String, @@ -87,7 +88,7 @@ pub struct PushRuleEvaluator { /// The related events, indexed by relation type. Flattened in the same manner as /// `flattened_keys`. - related_events_flattened: BTreeMap>, + related_events_flattened: BTreeMap>, /// If msc3664, push rules for related events, is enabled. related_event_match_enabled: bool, @@ -101,6 +102,9 @@ pub struct PushRuleEvaluator { /// If MSC3758 (exact_event_match push rule condition) is enabled. msc3758_exact_event_match: bool, + + /// If MSC3966 (exact_event_property_contains push rule condition) is enabled. + msc3966_exact_event_property_contains: bool, } #[pymethods] @@ -109,21 +113,22 @@ impl PushRuleEvaluator { #[allow(clippy::too_many_arguments)] #[new] pub fn py_new( - flattened_keys: BTreeMap, + flattened_keys: BTreeMap, has_mentions: bool, user_mentions: BTreeSet, room_mention: bool, room_member_count: u64, sender_power_level: Option, notification_power_levels: BTreeMap, - related_events_flattened: BTreeMap>, + related_events_flattened: BTreeMap>, related_event_match_enabled: bool, room_version_feature_flags: Vec, msc3931_enabled: bool, msc3758_exact_event_match: bool, + msc3966_exact_event_property_contains: bool, ) -> Result { let body = match flattened_keys.get("content.body") { - Some(SimpleJsonValue::Str(s)) => s.clone(), + Some(JsonValue::Value(SimpleJsonValue::Str(s))) => s.clone(), _ => String::new(), }; @@ -141,6 +146,7 @@ impl PushRuleEvaluator { room_version_feature_flags, msc3931_enabled, msc3758_exact_event_match, + msc3966_exact_event_property_contains, }) } @@ -263,6 +269,9 @@ impl PushRuleEvaluator { KnownCondition::RelatedEventMatch(event_match) => { self.match_related_event_match(event_match, user_id)? } + KnownCondition::ExactEventPropertyContains(exact_event_match) => { + self.match_exact_event_property_contains(exact_event_match)? + } KnownCondition::IsUserMention => { if let Some(uid) = user_id { self.user_mentions.contains(uid) @@ -345,7 +354,7 @@ impl PushRuleEvaluator { return Ok(false); }; - let haystack = if let Some(SimpleJsonValue::Str(haystack)) = + let haystack = if let Some(JsonValue::Value(SimpleJsonValue::Str(haystack))) = self.flattened_keys.get(&*event_match.key) { haystack @@ -377,7 +386,9 @@ impl PushRuleEvaluator { let value = &exact_event_match.value; - let haystack = if let Some(haystack) = self.flattened_keys.get(&*exact_event_match.key) { + let haystack = if let Some(JsonValue::Value(haystack)) = + self.flattened_keys.get(&*exact_event_match.key) + { haystack } else { return Ok(false); @@ -441,11 +452,12 @@ impl PushRuleEvaluator { return Ok(false); }; - let haystack = if let Some(SimpleJsonValue::Str(haystack)) = event.get(&**key) { - haystack - } else { - return Ok(false); - }; + let haystack = + if let Some(JsonValue::Value(SimpleJsonValue::Str(haystack))) = event.get(&**key) { + haystack + } else { + return Ok(false); + }; // For the content.body we match against "words", but for everything // else we match against the entire value. @@ -459,6 +471,29 @@ impl PushRuleEvaluator { compiled_pattern.is_match(haystack) } + /// Evaluates a `exact_event_property_contains` condition. (MSC3758) + fn match_exact_event_property_contains( + &self, + exact_event_match: &ExactEventMatchCondition, + ) -> Result { + // First check if the feature is enabled. + if !self.msc3966_exact_event_property_contains { + return Ok(false); + } + + let value = &exact_event_match.value; + + let haystack = if let Some(JsonValue::Array(haystack)) = + self.flattened_keys.get(&*exact_event_match.key) + { + haystack + } else { + return Ok(false); + }; + + Ok(haystack.contains(&**value)) + } + /// Match the member count against an 'is' condition /// The `is` condition can be things like '>2', '==3' or even just '4'. fn match_member_count(&self, is: &str) -> Result { @@ -488,7 +523,7 @@ fn push_rule_evaluator() { let mut flattened_keys = BTreeMap::new(); flattened_keys.insert( "content.body".to_string(), - SimpleJsonValue::Str("foo bar bob hello".to_string()), + JsonValue::Value(SimpleJsonValue::Str("foo bar bob hello".to_string())), ); let evaluator = PushRuleEvaluator::py_new( flattened_keys, @@ -503,6 +538,7 @@ fn push_rule_evaluator() { vec![], true, true, + true, ) .unwrap(); @@ -519,7 +555,7 @@ fn test_requires_room_version_supports_condition() { let mut flattened_keys = BTreeMap::new(); flattened_keys.insert( "content.body".to_string(), - SimpleJsonValue::Str("foo bar bob hello".to_string()), + JsonValue::Value(SimpleJsonValue::Str("foo bar bob hello".to_string())), ); let flags = vec![RoomVersionFeatures::ExtensibleEvents.as_str().to_string()]; let evaluator = PushRuleEvaluator::py_new( @@ -535,6 +571,7 @@ fn test_requires_room_version_supports_condition() { flags, true, true, + true, ) .unwrap(); diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 79e519fe11..253b5f367c 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -58,7 +58,7 @@ use anyhow::{Context, Error}; use log::warn; use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; -use pyo3::types::{PyBool, PyLong, PyString}; +use pyo3::types::{PyBool, PyList, PyLong, PyString}; use pythonize::{depythonize, pythonize}; use serde::de::Error as _; use serde::{Deserialize, Serialize}; @@ -280,6 +280,35 @@ impl<'source> FromPyObject<'source> for SimpleJsonValue { } } +/// A JSON values (list, string, int, boolean, or null). +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(untagged)] +pub enum JsonValue { + Array(Vec), + Value(SimpleJsonValue), +} + +impl<'source> FromPyObject<'source> for JsonValue { + fn extract(ob: &'source PyAny) -> PyResult { + if let Ok(l) = ::try_from(ob) { + match l.iter().map(SimpleJsonValue::extract).collect() { + Ok(a) => Ok(JsonValue::Array(a)), + Err(e) => Err(PyTypeError::new_err(format!( + "Can't convert to JsonValue::Array: {}", + e + ))), + } + } else if let Ok(v) = SimpleJsonValue::extract(ob) { + Ok(JsonValue::Value(v)) + } else { + Err(PyTypeError::new_err(format!( + "Can't convert from {} to JsonValue", + ob.get_type().name()? + ))) + } + } +} + /// A condition used in push rules to match against an event. /// /// We need this split as `serde` doesn't give us the ability to have a @@ -303,6 +332,8 @@ pub enum KnownCondition { ExactEventMatch(ExactEventMatchCondition), #[serde(rename = "im.nheko.msc3664.related_event_match")] RelatedEventMatch(RelatedEventMatchCondition), + #[serde(rename = "org.matrix.msc3966.exact_event_property_contains")] + ExactEventPropertyContains(ExactEventMatchCondition), #[serde(rename = "org.matrix.msc3952.is_user_mention")] IsUserMention, #[serde(rename = "org.matrix.msc3952.is_room_mention")] diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index 328f681a29..7b33c30cc9 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -14,7 +14,7 @@ from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union -from synapse.types import JsonDict, SimpleJsonValue +from synapse.types import JsonDict, JsonValue class PushRule: @property @@ -56,18 +56,19 @@ def get_base_rule_ids() -> Collection[str]: ... class PushRuleEvaluator: def __init__( self, - flattened_keys: Mapping[str, SimpleJsonValue], + flattened_keys: Mapping[str, JsonValue], has_mentions: bool, user_mentions: Set[str], room_mention: bool, room_member_count: int, sender_power_level: Optional[int], notification_power_levels: Mapping[str, int], - related_events_flattened: Mapping[str, Mapping[str, SimpleJsonValue]], + related_events_flattened: Mapping[str, Mapping[str, JsonValue]], related_event_match_enabled: bool, room_version_feature_flags: Tuple[str, ...], msc3931_enabled: bool, msc3758_exact_event_match: bool, + msc3966_exact_event_property_contains: bool, ): ... def run( self, diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 6ac2f0c10d..1d294f8798 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -188,3 +188,8 @@ class ExperimentalConfig(Config): self.msc3958_supress_edit_notifs = experimental.get( "msc3958_supress_edit_notifs", False ) + + # MSC3966: exact_event_property_contains push rule condition. + self.msc3966_exact_event_property_contains = experimental.get( + "msc3966_exact_event_property_contains", False + ) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index f6a5bffb0f..2e917c90c4 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -44,7 +44,7 @@ from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership from synapse.synapse_rust.push import FilteredPushRules, PushRuleEvaluator -from synapse.types import SimpleJsonValue +from synapse.types import JsonValue from synapse.types.state import StateFilter from synapse.util.caches import register_cache from synapse.util.metrics import measure_func @@ -259,13 +259,13 @@ class BulkPushRuleEvaluator: async def _related_events( self, event: EventBase - ) -> Dict[str, Dict[str, SimpleJsonValue]]: + ) -> Dict[str, Dict[str, JsonValue]]: """Fetches the related events for 'event'. Sets the im.vector.is_falling_back key if the event is from a fallback relation Returns: Mapping of relation type to flattened events. """ - related_events: Dict[str, Dict[str, SimpleJsonValue]] = {} + related_events: Dict[str, Dict[str, JsonValue]] = {} if self._related_event_match_enabled: related_event_id = event.content.get("m.relates_to", {}).get("event_id") relation_type = event.content.get("m.relates_to", {}).get("rel_type") @@ -429,6 +429,7 @@ class BulkPushRuleEvaluator: event.room_version.msc3931_push_features, self.hs.config.experimental.msc1767_enabled, # MSC3931 flag self.hs.config.experimental.msc3758_exact_event_match, + self.hs.config.experimental.msc3966_exact_event_property_contains, ) users = rules_by_user.keys() @@ -502,18 +503,22 @@ RulesByUser = Dict[str, List[Rule]] StateGroup = Union[object, int] +def _is_simple_value(value: Any) -> bool: + return isinstance(value, (bool, str)) or type(value) is int or value is None + + def _flatten_dict( d: Union[EventBase, Mapping[str, Any]], prefix: Optional[List[str]] = None, - result: Optional[Dict[str, SimpleJsonValue]] = None, + result: Optional[Dict[str, JsonValue]] = None, *, msc3783_escape_event_match_key: bool = False, -) -> Dict[str, SimpleJsonValue]: +) -> Dict[str, JsonValue]: """ Given a JSON dictionary (or event) which might contain sub dictionaries, flatten it into a single layer dictionary by combining the keys & sub-keys. - String, integer, boolean, and null values are kept. All others are dropped. + String, integer, boolean, null or lists of those values are kept. All others are dropped. Transforms: @@ -542,8 +547,10 @@ def _flatten_dict( # nested fields. key = key.replace("\\", "\\\\").replace(".", "\\.") - if isinstance(value, (bool, str)) or type(value) is int or value is None: + if _is_simple_value(value): result[".".join(prefix + [key])] = value + elif isinstance(value, (list, tuple)): + result[".".join(prefix + [key])] = [v for v in value if _is_simple_value(v)] elif isinstance(value, Mapping): # do not set `room_version` due to recursion considerations below _flatten_dict( diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 52e366c8ae..33363867c4 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -71,6 +71,7 @@ MutableStateMap = MutableMapping[StateKey, T] # JSON types. These could be made stronger, but will do for now. # A "simple" (canonical) JSON value. SimpleJsonValue = Optional[Union[str, int, bool]] +JsonValue = Union[List[SimpleJsonValue], Tuple[SimpleJsonValue, ...], SimpleJsonValue] # A JSON-serialisable dict. JsonDict = Dict[str, Any] # A JSON-serialisable mapping; roughly speaking an immutable JSONDict. diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 6603447341..0554d247bc 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -32,6 +32,7 @@ from synapse.storage.databases.main.appservice import _make_exclusive_regex from synapse.synapse_rust.push import PushRuleEvaluator from synapse.types import JsonDict, JsonMapping, UserID from synapse.util import Clock +from synapse.util.frozenutils import freeze from tests import unittest from tests.test_utils.event_injection import create_event, inject_member_event @@ -57,17 +58,24 @@ class FlattenDictTestCase(unittest.TestCase): ) def test_non_string(self) -> None: - """Booleans, ints, and nulls should be kept while other items are dropped.""" + """String, booleans, ints, nulls and list of those should be kept while other items are dropped.""" input: Dict[str, Any] = { "woo": "woo", "foo": True, "bar": 1, "baz": None, - "fuzz": [], + "fuzz": ["woo", True, 1, None, [], {}], "boo": {}, } self.assertEqual( - {"woo": "woo", "foo": True, "bar": 1, "baz": None}, _flatten_dict(input) + { + "woo": "woo", + "foo": True, + "bar": 1, + "baz": None, + "fuzz": ["woo", True, 1, None], + }, + _flatten_dict(input), ) def test_event(self) -> None: @@ -117,6 +125,7 @@ class FlattenDictTestCase(unittest.TestCase): "room_id": "!test:test", "sender": "@alice:test", "type": "m.room.message", + "content.org.matrix.msc1767.markup": [], } self.assertEqual(expected, _flatten_dict(event)) @@ -128,6 +137,7 @@ class FlattenDictTestCase(unittest.TestCase): "room_id": "!test:test", "sender": "@alice:test", "type": "m.room.message", + "content.org.matrix.msc1767.markup": [], } self.assertEqual(expected, _flatten_dict(event)) @@ -169,6 +179,7 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): room_version_feature_flags=event.room_version.msc3931_push_features, msc3931_enabled=True, msc3758_exact_event_match=True, + msc3966_exact_event_property_contains=True, ) def test_display_name(self) -> None: @@ -549,6 +560,42 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): "incorrect types should not match", ) + def test_exact_event_property_contains(self) -> None: + """Check that exact_event_property_contains conditions work as expected.""" + + condition = { + "kind": "org.matrix.msc3966.exact_event_property_contains", + "key": "content.value", + "value": "foobaz", + } + self._assert_matches( + condition, + {"value": ["foobaz"]}, + "exact value should match", + ) + self._assert_matches( + condition, + {"value": ["foobaz", "bugz"]}, + "extra values should match", + ) + self._assert_not_matches( + condition, + {"value": ["FoobaZ"]}, + "values should match and be case-sensitive", + ) + self._assert_not_matches( + condition, + {"value": "foobaz"}, + "does not search in a string", + ) + + # it should work on frozendicts too + self._assert_matches( + condition, + freeze({"value": ["foobaz"]}), + "values should match on frozendicts", + ) + def test_no_body(self) -> None: """Not having a body shouldn't break the evaluator.""" evaluator = self._get_evaluator({}) -- cgit 1.5.1 From 06ba71083eefbe1fd9a8eeed10e541dd7b52796f Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Feb 2023 23:42:29 +0000 Subject: Fix order of partial state tables when purging (#15068) * Fix order of partial state tables when purging `partial_state_rooms` has an FK on `events` pointing to the join event we get from `/send_join`, so we must delete from that table before deleting from `events`. **NB:** It would be nice to cancel any resync processes for the room being purged. We do not do this at present. To do so reliably we'd need an internal HTTP "replication" endpoint, because the worker doing the resync process may be different to that handling the purge request. The first time the resync process tries to write data after the deletion it will fail because we have deleted necessary data e.g. auth events. AFAICS it will not retry the resync, so the only downside to not cancelling the resync is a scary-looking traceback. (This is presumably extremely race-sensitive.) * Changelog * admist(?) -> between * Warn about a race * Fix typo, thanks Sean Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --------- Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/15068.bugfix | 1 + synapse/handlers/federation.py | 5 +++++ synapse/storage/databases/main/purge_events.py | 6 ++++-- 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 changelog.d/15068.bugfix (limited to 'synapse') diff --git a/changelog.d/15068.bugfix b/changelog.d/15068.bugfix new file mode 100644 index 0000000000..f09ffa2877 --- /dev/null +++ b/changelog.d/15068.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.76.0 where partially-joined rooms could not be deleted using the [purge room API](https://matrix-org.github.io/synapse/latest/admin_api/rooms.html#delete-room-api). diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 08727e4857..1d0f6bcd6f 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1880,6 +1880,11 @@ class FederationHandler: logger.info("Updating current state for %s", room_id) # TODO(faster_joins): notify workers in notify_room_un_partial_stated # https://github.com/matrix-org/synapse/issues/12994 + # + # NB: there's a potential race here. If room is purged just before we + # call this, we _might_ end up inserting rows into current_state_events. + # (The logic is hard to chase through.) We think this is fine, but if + # not the HS admin should purge the room again. await self.state_handler.update_current_state(room_id) logger.info("Handling any pending device list updates") diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 9213ce0b5a..9c41d01e13 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -420,12 +420,14 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore): "event_push_actions", "event_search", "event_failed_pull_attempts", + # Note: the partial state tables have foreign keys between each other, and to + # `events` and `rooms`. We need to delete from them in the right order. "partial_state_events", + "partial_state_rooms_servers", + "partial_state_rooms", "events", "federation_inbound_events_staging", "local_current_membership", - "partial_state_rooms_servers", - "partial_state_rooms", "receipts_graph", "receipts_linearized", "room_aliases", -- cgit 1.5.1 From 5febf88b6c5194582f427142dc0850625547c0d9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 15 Feb 2023 11:47:57 +0000 Subject: Update the error code for duplicate annotation (#15075) --- changelog.d/15075.feature | 2 ++ synapse/api/errors.py | 4 ++++ synapse/handlers/message.py | 6 +++++- 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15075.feature (limited to 'synapse') diff --git a/changelog.d/15075.feature b/changelog.d/15075.feature new file mode 100644 index 0000000000..d25a7567a4 --- /dev/null +++ b/changelog.d/15075.feature @@ -0,0 +1,2 @@ +Update the error code returned when user sends a duplicate annotation. + diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 9235ce6536..e1737de59b 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -108,6 +108,10 @@ class Codes(str, Enum): USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL" + # Attempt to send a second annotation with the same event type & annotation key + # MSC2677 + DUPLICATE_ANNOTATION = "M_DUPLICATE_ANNOTATION" + class CodeMessageException(RuntimeError): """An exception with integer code and message string attributes. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8f5b658d9d..aa90d0000d 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1337,7 +1337,11 @@ class EventCreationHandler: relation.parent_id, event.type, aggregation_key, event.sender ) if already_exists: - raise SynapseError(400, "Can't send same reaction twice") + raise SynapseError( + 400, + "Can't send same reaction twice", + errcode=Codes.DUPLICATE_ANNOTATION, + ) # Don't attempt to start a thread if the parent event is a relation. elif relation.rel_type == RelationTypes.THREAD: -- cgit 1.5.1 From 27a3a72a50cb24f25e48fad1e6e79aba2cd1bea2 Mon Sep 17 00:00:00 2001 From: 999lakhisidhu <42063995+999lakhisidhu@users.noreply.github.com> Date: Wed, 15 Feb 2023 16:39:31 +0400 Subject: Support for selecting the Redis logical database. (#15034) Note that this is only used for key-value store (cached values) and not for the pub/sub replication used by Synapse. --- changelog.d/15034.feature | 1 + contrib/docker_compose_workers/README.md | 1 + docs/usage/configuration/config_documentation.md | 4 ++++ synapse/config/redis.py | 1 + synapse/server.py | 1 + 5 files changed, 8 insertions(+) create mode 100644 changelog.d/15034.feature (limited to 'synapse') diff --git a/changelog.d/15034.feature b/changelog.d/15034.feature new file mode 100644 index 0000000000..34f320da92 --- /dev/null +++ b/changelog.d/15034.feature @@ -0,0 +1 @@ +Allow Synapse to use a specific Redis [logical database](https://redis.io/commands/select/) in worker-mode deployments. diff --git a/contrib/docker_compose_workers/README.md b/contrib/docker_compose_workers/README.md index bdd3dd32e0..d3cdfe5614 100644 --- a/contrib/docker_compose_workers/README.md +++ b/contrib/docker_compose_workers/README.md @@ -68,6 +68,7 @@ redis: enabled: true host: redis port: 6379 + # dbid: # password: ``` diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 2883f76a26..75483bfb12 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3927,6 +3927,9 @@ This setting has the following sub-options: * `host` and `port`: Optional host and port to use to connect to redis. Defaults to localhost and 6379 * `password`: Optional password if configured on the Redis instance. +* `dbid`: Optional redis dbid if needs to connect to specific redis logical db. + + _Added in Synapse 1.78.0._ Example configuration: ```yaml @@ -3935,6 +3938,7 @@ redis: host: localhost port: 6379 password: + dbid: ``` --- ## Individual worker configuration diff --git a/synapse/config/redis.py b/synapse/config/redis.py index b42dd2e93a..e6a75be434 100644 --- a/synapse/config/redis.py +++ b/synapse/config/redis.py @@ -33,4 +33,5 @@ class RedisConfig(Config): self.redis_host = redis_config.get("host", "localhost") self.redis_port = redis_config.get("port", 6379) + self.redis_dbid = redis_config.get("dbid", None) self.redis_password = redis_config.get("password") diff --git a/synapse/server.py b/synapse/server.py index efc6b5f895..e5a3475247 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -827,6 +827,7 @@ class HomeServer(metaclass=abc.ABCMeta): hs=self, host=self.config.redis.redis_host, port=self.config.redis.redis_port, + dbid=self.config.redis.redis_dbid, password=self.config.redis.redis_password, reconnect=True, ) -- cgit 1.5.1 From 3ad817bfe561e0b7ddcd8398a76a4a4d3d789138 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 15 Feb 2023 13:59:06 +0000 Subject: Fix federated joins when the first server in the list is not in the room (#15074) Previously we would give up upon receiving a 404 from the first server, instead of trying the rest of the servers in the list. Signed-off-by: Sean Quah --- changelog.d/15074.bugfix | 1 + synapse/federation/federation_client.py | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 changelog.d/15074.bugfix (limited to 'synapse') diff --git a/changelog.d/15074.bugfix b/changelog.d/15074.bugfix new file mode 100644 index 0000000000..d1ceb4f4c8 --- /dev/null +++ b/changelog.d/15074.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where federated joins would fail if the first server in the list of servers to try is not in the room. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 0ac85a3be7..7d04560dca 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -884,7 +884,7 @@ class FederationClient(FederationBase): if 500 <= e.code < 600: failover = True - elif e.code == 400 and synapse_error.errcode in failover_errcodes: + elif 400 <= e.code < 500 and synapse_error.errcode in failover_errcodes: failover = True elif failover_on_unknown_endpoint and self._is_unknown_endpoint( @@ -999,14 +999,13 @@ class FederationClient(FederationBase): return destination, ev, room_version + failover_errcodes = {Codes.NOT_FOUND} # MSC3083 defines additional error codes for room joins. Unfortunately # we do not yet know the room version, assume these will only be returned # by valid room versions. - failover_errcodes = ( - (Codes.UNABLE_AUTHORISE_JOIN, Codes.UNABLE_TO_GRANT_JOIN) - if membership == Membership.JOIN - else None - ) + if membership == Membership.JOIN: + failover_errcodes.add(Codes.UNABLE_AUTHORISE_JOIN) + failover_errcodes.add(Codes.UNABLE_TO_GRANT_JOIN) return await self._try_destination_list( "make_" + membership, -- cgit 1.5.1 From 979f237b282cbdaab8d74cc4c7473117093d63d9 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 16 Feb 2023 09:51:22 -0500 Subject: Update intentional mentions (MSC3952) to depend on `exact_event_match` (MSC3758). (#15037) This replaces the specific `is_room_mention` push rule condition used in MSC3952 with the generic `exact_event_match` push rule condition from MSC3758. No functionality changes due to this. --- changelog.d/15037.misc | 1 + rust/benches/evaluator.rs | 4 ---- rust/src/push/base_rules.rs | 7 +++++-- rust/src/push/evaluator.rs | 7 ------- rust/src/push/mod.rs | 13 ------------- stubs/synapse/synapse_rust/push.pyi | 1 - synapse/config/experimental.py | 7 ++++--- synapse/push/bulk_push_rule_evaluator.py | 4 ---- tests/push/test_bulk_push_rule_evaluator.py | 18 ++++++++++++++++-- tests/push/test_push_rule_evaluator.py | 23 ----------------------- 10 files changed, 26 insertions(+), 59 deletions(-) create mode 100644 changelog.d/15037.misc (limited to 'synapse') diff --git a/changelog.d/15037.misc b/changelog.d/15037.misc new file mode 100644 index 0000000000..fabfe77d35 --- /dev/null +++ b/changelog.d/15037.misc @@ -0,0 +1 @@ +Update [MSC3952](https://github.com/matrix-org/matrix-spec-proposals/pull/3952) support based on changes to the MSC. diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs index 8213dfd9ea..efd19a2165 100644 --- a/rust/benches/evaluator.rs +++ b/rust/benches/evaluator.rs @@ -45,7 +45,6 @@ fn bench_match_exact(b: &mut Bencher) { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), Default::default(), @@ -95,7 +94,6 @@ fn bench_match_word(b: &mut Bencher) { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), Default::default(), @@ -145,7 +143,6 @@ fn bench_match_word_miss(b: &mut Bencher) { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), Default::default(), @@ -195,7 +192,6 @@ fn bench_eval_message(b: &mut Bencher) { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), Default::default(), diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs index dcbca340fe..4a62b9696f 100644 --- a/rust/src/push/base_rules.rs +++ b/rust/src/push/base_rules.rs @@ -21,13 +21,13 @@ use lazy_static::lazy_static; use serde_json::Value; use super::KnownCondition; -use crate::push::Action; use crate::push::Condition; use crate::push::EventMatchCondition; use crate::push::PushRule; use crate::push::RelatedEventMatchCondition; use crate::push::SetTweak; use crate::push::TweakValue; +use crate::push::{Action, ExactEventMatchCondition, SimpleJsonValue}; const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak { set_tweak: Cow::Borrowed("highlight"), @@ -168,7 +168,10 @@ pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ rule_id: Cow::Borrowed(".org.matrix.msc3952.is_room_mention"), priority_class: 5, conditions: Cow::Borrowed(&[ - Condition::Known(KnownCondition::IsRoomMention), + Condition::Known(KnownCondition::ExactEventMatch(ExactEventMatchCondition { + key: Cow::Borrowed("content.org.matrix.msc3952.mentions.room"), + value: Cow::Borrowed(&SimpleJsonValue::Bool(true)), + })), Condition::Known(KnownCondition::SenderNotificationPermission { key: Cow::Borrowed("room"), }), diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs index 2eaa06ad76..55551ecb56 100644 --- a/rust/src/push/evaluator.rs +++ b/rust/src/push/evaluator.rs @@ -73,8 +73,6 @@ pub struct PushRuleEvaluator { has_mentions: bool, /// The user mentions that were part of the message. user_mentions: BTreeSet, - /// True if the message is a room message. - room_mention: bool, /// The number of users in the room. room_member_count: u64, @@ -116,7 +114,6 @@ impl PushRuleEvaluator { flattened_keys: BTreeMap, has_mentions: bool, user_mentions: BTreeSet, - room_mention: bool, room_member_count: u64, sender_power_level: Option, notification_power_levels: BTreeMap, @@ -137,7 +134,6 @@ impl PushRuleEvaluator { body, has_mentions, user_mentions, - room_mention, room_member_count, notification_power_levels, sender_power_level, @@ -279,7 +275,6 @@ impl PushRuleEvaluator { false } } - KnownCondition::IsRoomMention => self.room_mention, KnownCondition::ContainsDisplayName => { if let Some(dn) = display_name { if !dn.is_empty() { @@ -529,7 +524,6 @@ fn push_rule_evaluator() { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), BTreeMap::new(), @@ -562,7 +556,6 @@ fn test_requires_room_version_supports_condition() { flattened_keys, false, BTreeSet::new(), - false, 10, Some(0), BTreeMap::new(), diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs index 253b5f367c..fdd2b2c143 100644 --- a/rust/src/push/mod.rs +++ b/rust/src/push/mod.rs @@ -336,8 +336,6 @@ pub enum KnownCondition { ExactEventPropertyContains(ExactEventMatchCondition), #[serde(rename = "org.matrix.msc3952.is_user_mention")] IsUserMention, - #[serde(rename = "org.matrix.msc3952.is_room_mention")] - IsRoomMention, ContainsDisplayName, RoomMemberCount { #[serde(skip_serializing_if = "Option::is_none")] @@ -667,17 +665,6 @@ fn test_deserialize_unstable_msc3952_user_condition() { )); } -#[test] -fn test_deserialize_unstable_msc3952_room_condition() { - let json = r#"{"kind":"org.matrix.msc3952.is_room_mention"}"#; - - let condition: Condition = serde_json::from_str(json).unwrap(); - assert!(matches!( - condition, - Condition::Known(KnownCondition::IsRoomMention) - )); -} - #[test] fn test_deserialize_custom_condition() { let json = r#"{"kind":"custom_tag"}"#; diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi index 7b33c30cc9..a8f0ed2435 100644 --- a/stubs/synapse/synapse_rust/push.pyi +++ b/stubs/synapse/synapse_rust/push.pyi @@ -59,7 +59,6 @@ class PushRuleEvaluator: flattened_keys: Mapping[str, JsonValue], has_mentions: bool, user_mentions: Set[str], - room_mention: bool, room_member_count: int, sender_power_level: Optional[int], notification_power_levels: Mapping[str, int], diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 1d294f8798..54c91953e1 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -179,9 +179,10 @@ class ExperimentalConfig(Config): "msc3783_escape_event_match_key", False ) - # MSC3952: Intentional mentions - self.msc3952_intentional_mentions = experimental.get( - "msc3952_intentional_mentions", False + # MSC3952: Intentional mentions, this depends on MSC3758. + self.msc3952_intentional_mentions = ( + experimental.get("msc3952_intentional_mentions", False) + and self.msc3758_exact_event_match ) # MSC3959: Do not generate notifications for edits. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 2e917c90c4..5fc38431ba 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -400,7 +400,6 @@ class BulkPushRuleEvaluator: mentions = event.content.get(EventContentFields.MSC3952_MENTIONS) has_mentions = self._intentional_mentions_enabled and isinstance(mentions, dict) user_mentions: Set[str] = set() - room_mention = False if has_mentions: # mypy seems to have lost the type even though it must be a dict here. assert isinstance(mentions, dict) @@ -410,8 +409,6 @@ class BulkPushRuleEvaluator: user_mentions = set( filter(lambda item: isinstance(item, str), user_mentions_raw) ) - # Room mention is only true if the value is exactly true. - room_mention = mentions.get("room") is True evaluator = PushRuleEvaluator( _flatten_dict( @@ -420,7 +417,6 @@ class BulkPushRuleEvaluator: ), has_mentions, user_mentions, - room_mention, room_member_count, sender_power_level, notification_levels, diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py index 7567756135..199e3d7b70 100644 --- a/tests/push/test_bulk_push_rule_evaluator.py +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -227,7 +227,14 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) return len(result) > 0 - @override_config({"experimental_features": {"msc3952_intentional_mentions": True}}) + @override_config( + { + "experimental_features": { + "msc3758_exact_event_match": True, + "msc3952_intentional_mentions": True, + } + } + ) def test_user_mentions(self) -> None: """Test the behavior of an event which includes invalid user mentions.""" bulk_evaluator = BulkPushRuleEvaluator(self.hs) @@ -323,7 +330,14 @@ class TestBulkPushRuleEvaluator(HomeserverTestCase): ) ) - @override_config({"experimental_features": {"msc3952_intentional_mentions": True}}) + @override_config( + { + "experimental_features": { + "msc3758_exact_event_match": True, + "msc3952_intentional_mentions": True, + } + } + ) def test_room_mentions(self) -> None: """Test the behavior of an event which includes invalid room mentions.""" bulk_evaluator = BulkPushRuleEvaluator(self.hs) diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 0554d247bc..d320a12f96 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -149,7 +149,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): *, has_mentions: bool = False, user_mentions: Optional[Set[str]] = None, - room_mention: bool = False, related_events: Optional[JsonDict] = None, ) -> PushRuleEvaluator: event = FrozenEvent( @@ -170,7 +169,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): _flatten_dict(event), has_mentions, user_mentions or set(), - room_mention, room_member_count, sender_power_level, cast(Dict[str, int], power_levels.get("notifications", {})), @@ -232,27 +230,6 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions # since the BulkPushRuleEvaluator is what handles data sanitisation. - def test_room_mentions(self) -> None: - """Check for room mentions.""" - condition = {"kind": "org.matrix.msc3952.is_room_mention"} - - # No room mention shouldn't match. - evaluator = self._get_evaluator({}, has_mentions=True) - self.assertFalse(evaluator.matches(condition, None, None)) - - # Room mention should match. - evaluator = self._get_evaluator({}, has_mentions=True, room_mention=True) - self.assertTrue(evaluator.matches(condition, None, None)) - - # A room mention and user mention is valid. - evaluator = self._get_evaluator( - {}, has_mentions=True, user_mentions={"@another:test"}, room_mention=True - ) - self.assertTrue(evaluator.matches(condition, None, None)) - - # Note that invalid data is tested at tests.push.test_bulk_push_rule_evaluator.TestBulkPushRuleEvaluator.test_mentions - # since the BulkPushRuleEvaluator is what handles data sanitisation. - def _assert_matches( self, condition: JsonDict, content: JsonMapping, msg: Optional[str] = None ) -> None: -- cgit 1.5.1 From ffc2ee521d26f5b842df7902ade5de7a538e602d Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 16 Feb 2023 16:09:11 +0000 Subject: Use mypy 1.0 (#15052) * Update mypy and mypy-zope * Remove unused ignores These used to suppress ``` synapse/storage/engines/__init__.py:28: error: "__new__" must return a class instance (got "NoReturn") [misc] ``` and ``` synapse/http/matrixfederationclient.py:1270: error: "BaseException" has no attribute "reasons" [attr-defined] ``` (note that we check `hasattr(e, "reasons")` above) * Avoid empty body warnings, sometimes by marking methods as abstract E.g. ``` tests/handlers/test_register.py:58: error: Missing return statement [empty-body] tests/handlers/test_register.py:108: error: Missing return statement [empty-body] ``` * Suppress false positive about `JaegerConfig` Complaint was ``` synapse/logging/opentracing.py:450: error: Function "Type[Config]" could always be true in boolean context [truthy-function] ``` * Fix not calling `is_state()` Oops! ``` tests/rest/client/test_third_party_rules.py:428: error: Function "Callable[[], bool]" could always be true in boolean context [truthy-function] ``` * Suppress false positives from ParamSpecs ```` synapse/logging/opentracing.py:971: error: Argument 2 to "_custom_sync_async_decorator" has incompatible type "Callable[[Arg(Callable[P, R], 'func'), **P], _GeneratorContextManager[None]]"; expected "Callable[[Callable[P, R], **P], _GeneratorContextManager[None]]" [arg-type] synapse/logging/opentracing.py:1017: error: Argument 2 to "_custom_sync_async_decorator" has incompatible type "Callable[[Arg(Callable[P, R], 'func'), **P], _GeneratorContextManager[None]]"; expected "Callable[[Callable[P, R], **P], _GeneratorContextManager[None]]" [arg-type] ```` * Drive-by improvement to `wrapping_logic` annotation * Workaround false "unreachable" positives See https://github.com/Shoobx/mypy-zope/issues/91 ``` tests/http/test_proxyagent.py:626: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:762: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:826: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:838: error: Statement is unreachable [unreachable] tests/http/test_proxyagent.py:845: error: Statement is unreachable [unreachable] tests/http/federation/test_matrix_federation_agent.py:151: error: Statement is unreachable [unreachable] tests/http/federation/test_matrix_federation_agent.py:452: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:60: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:93: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:127: error: Statement is unreachable [unreachable] tests/logging/test_remote_handler.py:152: error: Statement is unreachable [unreachable] ``` * Changelog * Tweak DBAPI2 Protocol to be accepted by mypy 1.0 Some extra context in: - https://github.com/matrix-org/python-canonicaljson/pull/57 - https://github.com/python/mypy/issues/6002 - https://mypy.readthedocs.io/en/latest/common_issues.html#covariant-subtyping-of-mutable-protocol-members-is-rejected * Pull in updated canonicaljson lib so the protocol check just works * Improve comments in opentracing I tried to workaround the ignores but found it too much trouble. I think the corresponding issue is https://github.com/python/mypy/issues/12909. The mypy repo has a PR claiming to fix this (https://github.com/python/mypy/pull/14677) which might mean this gets resolved soon? * Better annotation for INTERACTIVE_AUTH_CHECKERS * Drive-by AUTH_TYPE annotation, to remove an ignore --- changelog.d/15052.misc | 1 + poetry.lock | 69 ++++++++++---------- synapse/handlers/auth.py | 2 +- synapse/handlers/ui_auth/checkers.py | 18 ++++-- synapse/http/matrixfederationclient.py | 2 +- synapse/logging/opentracing.py | 24 +++++-- synapse/rest/media/v1/_base.py | 9 ++- synapse/storage/engines/__init__.py | 4 +- synapse/storage/types.py | 74 ++++++++++++++++++---- synapse/streams/__init__.py | 7 +- tests/handlers/test_register.py | 4 +- .../federation/test_matrix_federation_agent.py | 11 ++-- tests/http/test_proxyagent.py | 40 ++++++------ tests/logging/test_remote_handler.py | 17 ++--- tests/rest/client/test_auth.py | 3 + tests/rest/client/test_third_party_rules.py | 2 +- tests/utils.py | 26 +++++++- 17 files changed, 209 insertions(+), 104 deletions(-) create mode 100644 changelog.d/15052.misc (limited to 'synapse') diff --git a/changelog.d/15052.misc b/changelog.d/15052.misc new file mode 100644 index 0000000000..93ceaeafc9 --- /dev/null +++ b/changelog.d/15052.misc @@ -0,0 +1 @@ +Improve type hints. diff --git a/poetry.lock b/poetry.lock index e534b30d2b..eb1e3d797b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -146,14 +146,14 @@ css = ["tinycss2 (>=1.1.0,<1.2)"] [[package]] name = "canonicaljson" -version = "1.6.4" +version = "1.6.5" description = "Canonical JSON" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "canonicaljson-1.6.4-py3-none-any.whl", hash = "sha256:55d282853b4245dbcd953fe54c39b91571813d7c44e1dbf66e3c4f97ff134a48"}, - {file = "canonicaljson-1.6.4.tar.gz", hash = "sha256:6c09b2119511f30eb1126cfcd973a10824e20f1cfd25039cde3d1218dd9c8d8f"}, + {file = "canonicaljson-1.6.5-py3-none-any.whl", hash = "sha256:806ea6f2cbb7405d20259e1c36dd1214ba5c242fa9165f5bd0bf2081f82c23fb"}, + {file = "canonicaljson-1.6.5.tar.gz", hash = "sha256:68dfc157b011e07d94bf74b5d4ccc01958584ed942d9dfd5fdd706609e81cd4b"}, ] [package.dependencies] @@ -1146,36 +1146,38 @@ files = [ [[package]] name = "mypy" -version = "0.981" +version = "1.0.0" description = "Optional static typing for Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "mypy-0.981-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4bc460e43b7785f78862dab78674e62ec3cd523485baecfdf81a555ed29ecfa0"}, - {file = "mypy-0.981-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:756fad8b263b3ba39e4e204ee53042671b660c36c9017412b43af210ddee7b08"}, - {file = "mypy-0.981-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16a0145d6d7d00fbede2da3a3096dcc9ecea091adfa8da48fa6a7b75d35562d"}, - {file = "mypy-0.981-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce65f70b14a21fdac84c294cde75e6dbdabbcff22975335e20827b3b94bdbf49"}, - {file = "mypy-0.981-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e35d764784b42c3e256848fb8ed1d4292c9fc0098413adb28d84974c095b279"}, - {file = "mypy-0.981-cp310-cp310-win_amd64.whl", hash = "sha256:e53773073c864d5f5cec7f3fc72fbbcef65410cde8cc18d4f7242dea60dac52e"}, - {file = "mypy-0.981-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6ee196b1d10b8b215e835f438e06965d7a480f6fe016eddbc285f13955cca659"}, - {file = "mypy-0.981-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad21d4c9d3673726cf986ea1d0c9fb66905258709550ddf7944c8f885f208be"}, - {file = "mypy-0.981-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d1debb09043e1f5ee845fa1e96d180e89115b30e47c5d3ce53bc967bab53f62d"}, - {file = "mypy-0.981-cp37-cp37m-win_amd64.whl", hash = "sha256:9f362470a3480165c4c6151786b5379351b790d56952005be18bdbdd4c7ce0ae"}, - {file = "mypy-0.981-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c9e0efb95ed6ca1654951bd5ec2f3fa91b295d78bf6527e026529d4aaa1e0c30"}, - {file = "mypy-0.981-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e178eaffc3c5cd211a87965c8c0df6da91ed7d258b5fc72b8e047c3771317ddb"}, - {file = "mypy-0.981-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06e1eac8d99bd404ed8dd34ca29673c4346e76dd8e612ea507763dccd7e13c7a"}, - {file = "mypy-0.981-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa38f82f53e1e7beb45557ff167c177802ba7b387ad017eab1663d567017c8ee"}, - {file = "mypy-0.981-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:64e1f6af81c003f85f0dfed52db632817dabb51b65c0318ffbf5ff51995bbb08"}, - {file = "mypy-0.981-cp38-cp38-win_amd64.whl", hash = "sha256:e1acf62a8c4f7c092462c738aa2c2489e275ed386320c10b2e9bff31f6f7e8d6"}, - {file = "mypy-0.981-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6ede64e52257931315826fdbfc6ea878d89a965580d1a65638ef77cb551f56d"}, - {file = "mypy-0.981-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb3978b191b9fa0488524bb4ffedf2c573340e8c2b4206fc191d44c7093abfb7"}, - {file = "mypy-0.981-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f8fcf7b4b3cc0c74fb33ae54a4cd00bb854d65645c48beccf65fa10b17882c"}, - {file = "mypy-0.981-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64d2ce043a209a297df322eb4054dfbaa9de9e8738291706eaafda81ab2b362"}, - {file = "mypy-0.981-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2ee3dbc53d4df7e6e3b1c68ac6a971d3a4fb2852bf10a05fda228721dd44fae1"}, - {file = "mypy-0.981-cp39-cp39-win_amd64.whl", hash = "sha256:8e8e49aa9cc23aa4c926dc200ce32959d3501c4905147a66ce032f05cb5ecb92"}, - {file = "mypy-0.981-py3-none-any.whl", hash = "sha256:794f385653e2b749387a42afb1e14c2135e18daeb027e0d97162e4b7031210f8"}, - {file = "mypy-0.981.tar.gz", hash = "sha256:ad77c13037d3402fbeffda07d51e3f228ba078d1c7096a73759c9419ea031bf4"}, + {file = "mypy-1.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0626db16705ab9f7fa6c249c017c887baf20738ce7f9129da162bb3075fc1af"}, + {file = "mypy-1.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ace23f6bb4aec4604b86c4843276e8fa548d667dbbd0cb83a3ae14b18b2db6c"}, + {file = "mypy-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87edfaf344c9401942883fad030909116aa77b0fa7e6e8e1c5407e14549afe9a"}, + {file = "mypy-1.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0ab090d9240d6b4e99e1fa998c2d0aa5b29fc0fb06bd30e7ad6183c95fa07593"}, + {file = "mypy-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:7cc2c01dfc5a3cbddfa6c13f530ef3b95292f926329929001d45e124342cd6b7"}, + {file = "mypy-1.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14d776869a3e6c89c17eb943100f7868f677703c8a4e00b3803918f86aafbc52"}, + {file = "mypy-1.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb2782a036d9eb6b5a6efcdda0986774bf798beef86a62da86cb73e2a10b423d"}, + {file = "mypy-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cfca124f0ac6707747544c127880893ad72a656e136adc935c8600740b21ff5"}, + {file = "mypy-1.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8845125d0b7c57838a10fd8925b0f5f709d0e08568ce587cc862aacce453e3dd"}, + {file = "mypy-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b1b9e1ed40544ef486fa8ac022232ccc57109f379611633ede8e71630d07d2"}, + {file = "mypy-1.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c7cf862aef988b5fbaa17764ad1d21b4831436701c7d2b653156a9497d92c83c"}, + {file = "mypy-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cd187d92b6939617f1168a4fe68f68add749902c010e66fe574c165c742ed88"}, + {file = "mypy-1.0.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4e5175026618c178dfba6188228b845b64131034ab3ba52acaffa8f6c361f805"}, + {file = "mypy-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2f6ac8c87e046dc18c7d1d7f6653a66787a4555085b056fe2d599f1f1a2a2d21"}, + {file = "mypy-1.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7306edca1c6f1b5fa0bc9aa645e6ac8393014fa82d0fa180d0ebc990ebe15964"}, + {file = "mypy-1.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3cfad08f16a9c6611e6143485a93de0e1e13f48cfb90bcad7d5fde1c0cec3d36"}, + {file = "mypy-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67cced7f15654710386e5c10b96608f1ee3d5c94ca1da5a2aad5889793a824c1"}, + {file = "mypy-1.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a86b794e8a56ada65c573183756eac8ac5b8d3d59daf9d5ebd72ecdbb7867a43"}, + {file = "mypy-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:50979d5efff8d4135d9db293c6cb2c42260e70fb010cbc697b1311a4d7a39ddb"}, + {file = "mypy-1.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3ae4c7a99e5153496243146a3baf33b9beff714464ca386b5f62daad601d87af"}, + {file = "mypy-1.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e398652d005a198a7f3c132426b33c6b85d98aa7dc852137a2a3be8890c4072"}, + {file = "mypy-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be78077064d016bc1b639c2cbcc5be945b47b4261a4f4b7d8923f6c69c5c9457"}, + {file = "mypy-1.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92024447a339400ea00ac228369cd242e988dd775640755fa4ac0c126e49bb74"}, + {file = "mypy-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fe523fcbd52c05040c7bee370d66fee8373c5972171e4fbc323153433198592d"}, + {file = "mypy-1.0.0-py3-none-any.whl", hash = "sha256:2efa963bdddb27cb4a0d42545cd137a8d2b883bd181bbc4525b568ef6eca258f"}, + {file = "mypy-1.0.0.tar.gz", hash = "sha256:f34495079c8d9da05b183f9f7daec2878280c2ad7cc81da686ef0b484cea2ecf"}, ] [package.dependencies] @@ -1186,6 +1188,7 @@ typing-extensions = ">=3.10" [package.extras] dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] python2 = ["typed-ast (>=1.4.0,<2)"] reports = ["lxml"] @@ -1203,18 +1206,18 @@ files = [ [[package]] name = "mypy-zope" -version = "0.3.11" +version = "0.9.0" description = "Plugin for mypy to support zope interfaces" category = "dev" optional = false python-versions = "*" files = [ - {file = "mypy-zope-0.3.11.tar.gz", hash = "sha256:d4255f9f04d48c79083bbd4e2fea06513a6ac7b8de06f8c4ce563fd85142ca05"}, - {file = "mypy_zope-0.3.11-py3-none-any.whl", hash = "sha256:ec080a6508d1f7805c8d2054f9fdd13c849742ce96803519e1fdfa3d3cab7140"}, + {file = "mypy-zope-0.9.0.tar.gz", hash = "sha256:88bf6cd056e38b338e6956055958a7805b4ff84404ccd99e29883a3647a1aeb3"}, + {file = "mypy_zope-0.9.0-py3-none-any.whl", hash = "sha256:e1bb4b57084f76ff8a154a3e07880a1af2ac6536c491dad4b143d529f72c5d15"}, ] [package.dependencies] -mypy = "0.981" +mypy = "1.0.0" "zope.interface" = "*" "zope.schema" = "*" @@ -1705,7 +1708,7 @@ files = [ cffi = ">=1.4.1" [package.extras] -docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"] +docs = ["sphinx (>=1.6.5)", "sphinx_rtd_theme"] tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] [[package]] diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 57a6854b1e..cf12b55d21 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -201,7 +201,7 @@ class AuthHandler: for auth_checker_class in INTERACTIVE_AUTH_CHECKERS: inst = auth_checker_class(hs) if inst.is_enabled(): - self.checkers[inst.AUTH_TYPE] = inst # type: ignore + self.checkers[inst.AUTH_TYPE] = inst self.bcrypt_rounds = hs.config.registration.bcrypt_rounds diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index 332edcca24..78a75bfed6 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -13,7 +13,8 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Any +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, ClassVar, Sequence, Type from twisted.web.client import PartialDownloadError @@ -27,19 +28,28 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -class UserInteractiveAuthChecker: +class UserInteractiveAuthChecker(ABC): """Abstract base class for an interactive auth checker""" - def __init__(self, hs: "HomeServer"): + # This should really be an "abstract class property", i.e. it should + # be an error to instantiate a subclass that doesn't specify an AUTH_TYPE. + # But calling this a `ClassVar` is simpler than a decorator stack of + # @property @abstractmethod and @classmethod (if that's even the right order). + AUTH_TYPE: ClassVar[str] + + def __init__(self, hs: "HomeServer"): # noqa: B027 pass + @abstractmethod def is_enabled(self) -> bool: """Check if the configuration of the homeserver allows this checker to work Returns: True if this login type is enabled. """ + raise NotImplementedError() + @abstractmethod async def check_auth(self, authdict: dict, clientip: str) -> Any: """Given the authentication dict from the client, attempt to check this step @@ -304,7 +314,7 @@ class RegistrationTokenAuthChecker(UserInteractiveAuthChecker): ) -INTERACTIVE_AUTH_CHECKERS = [ +INTERACTIVE_AUTH_CHECKERS: Sequence[Type[UserInteractiveAuthChecker]] = [ DummyAuthChecker, TermsAuthChecker, RecaptchaAuthChecker, diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index b92f1d3d1a..312aab4dcc 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -1267,7 +1267,7 @@ class MatrixFederationHttpClient: def _flatten_response_never_received(e: BaseException) -> str: if hasattr(e, "reasons"): reasons = ", ".join( - _flatten_response_never_received(f.value) for f in e.reasons # type: ignore[attr-defined] + _flatten_response_never_received(f.value) for f in e.reasons ) return "%s:[%s]" % (type(e).__name__, reasons) diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 6c7cf1b294..5aed71262f 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -188,7 +188,7 @@ from typing import ( ) import attr -from typing_extensions import ParamSpec +from typing_extensions import Concatenate, ParamSpec from twisted.internet import defer from twisted.web.http import Request @@ -445,7 +445,7 @@ def init_tracer(hs: "HomeServer") -> None: opentracing = None # type: ignore[assignment] return - if not opentracing or not JaegerConfig: + if opentracing is None or JaegerConfig is None: raise ConfigError( "The server has been configured to use opentracing but opentracing is not " "installed." @@ -872,7 +872,7 @@ def extract_text_map(carrier: Dict[str, str]) -> Optional["opentracing.SpanConte def _custom_sync_async_decorator( func: Callable[P, R], - wrapping_logic: Callable[[Callable[P, R], Any, Any], ContextManager[None]], + wrapping_logic: Callable[Concatenate[Callable[P, R], P], ContextManager[None]], ) -> Callable[P, R]: """ Decorates a function that is sync or async (coroutines), or that returns a Twisted @@ -902,10 +902,14 @@ def _custom_sync_async_decorator( """ if inspect.iscoroutinefunction(func): - + # In this branch, R = Awaitable[RInner], for some other type RInner @wraps(func) - async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R: + async def _wrapper( + *args: P.args, **kwargs: P.kwargs + ) -> Any: # Return type is RInner with wrapping_logic(func, *args, **kwargs): + # type-ignore: func() returns R, but mypy doesn't know that R is + # Awaitable here. return await func(*args, **kwargs) # type: ignore[misc] else: @@ -972,7 +976,11 @@ def trace_with_opname( if not opentracing: return func - return _custom_sync_async_decorator(func, _wrapping_logic) + # type-ignore: mypy seems to be confused by the ParamSpecs here. + # I think the problem is https://github.com/python/mypy/issues/12909 + return _custom_sync_async_decorator( + func, _wrapping_logic # type: ignore[arg-type] + ) return _decorator @@ -1018,7 +1026,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]: set_tag(SynapseTags.FUNC_KWARGS, str(kwargs)) yield - return _custom_sync_async_decorator(func, _wrapping_logic) + # type-ignore: mypy seems to be confused by the ParamSpecs here. + # I think the problem is https://github.com/python/mypy/issues/12909 + return _custom_sync_async_decorator(func, _wrapping_logic) # type: ignore[arg-type] @contextlib.contextmanager diff --git a/synapse/rest/media/v1/_base.py b/synapse/rest/media/v1/_base.py index d30878f704..6e035afcce 100644 --- a/synapse/rest/media/v1/_base.py +++ b/synapse/rest/media/v1/_base.py @@ -16,6 +16,7 @@ import logging import os import urllib +from abc import ABC, abstractmethod from types import TracebackType from typing import Awaitable, Dict, Generator, List, Optional, Tuple, Type @@ -284,13 +285,14 @@ async def respond_with_responder( finish_request(request) -class Responder: +class Responder(ABC): """Represents a response that can be streamed to the requester. Responder is a context manager which *must* be used, so that any resources held can be cleaned up. """ + @abstractmethod def write_to_consumer(self, consumer: IConsumer) -> Awaitable: """Stream response into consumer @@ -300,11 +302,12 @@ class Responder: Returns: Resolves once the response has finished being written """ + raise NotImplementedError() - def __enter__(self) -> None: + def __enter__(self) -> None: # noqa: B027 pass - def __exit__( + def __exit__( # noqa: B027 self, exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index a182e8a098..d1ccb7390a 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -25,7 +25,7 @@ try: except ImportError: class PostgresEngine(BaseDatabaseEngine): # type: ignore[no-redef] - def __new__(cls, *args: object, **kwargs: object) -> NoReturn: # type: ignore[misc] + def __new__(cls, *args: object, **kwargs: object) -> NoReturn: raise RuntimeError( f"Cannot create {cls.__name__} -- psycopg2 module is not installed" ) @@ -36,7 +36,7 @@ try: except ImportError: class Sqlite3Engine(BaseDatabaseEngine): # type: ignore[no-redef] - def __new__(cls, *args: object, **kwargs: object) -> NoReturn: # type: ignore[misc] + def __new__(cls, *args: object, **kwargs: object) -> NoReturn: raise RuntimeError( f"Cannot create {cls.__name__} -- sqlite3 module is not installed" ) diff --git a/synapse/storage/types.py b/synapse/storage/types.py index 0031df1e06..56a0048539 100644 --- a/synapse/storage/types.py +++ b/synapse/storage/types.py @@ -12,7 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. from types import TracebackType -from typing import Any, Iterator, List, Mapping, Optional, Sequence, Tuple, Type, Union +from typing import ( + Any, + Callable, + Iterator, + List, + Mapping, + Optional, + Sequence, + Tuple, + Type, + Union, +) from typing_extensions import Protocol @@ -112,15 +123,35 @@ class DBAPI2Module(Protocol): # extends from this hierarchy. See # https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3#exceptions # https://www.postgresql.org/docs/current/errcodes-appendix.html#ERRCODES-TABLE - Warning: Type[Exception] - Error: Type[Exception] + # + # Note: rather than + # x: T + # we write + # @property + # def x(self) -> T: ... + # which expresses that the protocol attribute `x` is read-only. The mypy docs + # https://mypy.readthedocs.io/en/latest/common_issues.html#covariant-subtyping-of-mutable-protocol-members-is-rejected + # explain why this is necessary for safety. TL;DR: we shouldn't be able to write + # to `x`, only read from it. See also https://github.com/python/mypy/issues/6002 . + @property + def Warning(self) -> Type[Exception]: + ... + + @property + def Error(self) -> Type[Exception]: + ... # Errors are divided into `InterfaceError`s (something went wrong in the database # driver) and `DatabaseError`s (something went wrong in the database). These are # both subclasses of `Error`, but we can't currently express this in type # annotations due to https://github.com/python/mypy/issues/8397 - InterfaceError: Type[Exception] - DatabaseError: Type[Exception] + @property + def InterfaceError(self) -> Type[Exception]: + ... + + @property + def DatabaseError(self) -> Type[Exception]: + ... # Everything below is a subclass of `DatabaseError`. @@ -128,7 +159,9 @@ class DBAPI2Module(Protocol): # - An integer was too big for its data type. # - An invalid date time was provided. # - A string contained a null code point. - DataError: Type[Exception] + @property + def DataError(self) -> Type[Exception]: + ... # Roughly: something went wrong in the database, but it's not within the application # programmer's control. Examples: @@ -138,28 +171,45 @@ class DBAPI2Module(Protocol): # - A serialisation failure occurred. # - The database ran out of resources, such as storage, memory, connections, etc. # - The database encountered an error from the operating system. - OperationalError: Type[Exception] + @property + def OperationalError(self) -> Type[Exception]: + ... # Roughly: we've given the database data which breaks a rule we asked it to enforce. # Examples: # - Stop, criminal scum! You violated the foreign key constraint # - Also check constraints, non-null constraints, etc. - IntegrityError: Type[Exception] + @property + def IntegrityError(self) -> Type[Exception]: + ... # Roughly: something went wrong within the database server itself. - InternalError: Type[Exception] + @property + def InternalError(self) -> Type[Exception]: + ... # Roughly: the application did something silly that needs to be fixed. Examples: # - We don't have permissions to do something. # - We tried to create a table with duplicate column names. # - We tried to use a reserved name. # - We referred to a column that doesn't exist. - ProgrammingError: Type[Exception] + @property + def ProgrammingError(self) -> Type[Exception]: + ... # Roughly: we've tried to do something that this database doesn't support. - NotSupportedError: Type[Exception] + @property + def NotSupportedError(self) -> Type[Exception]: + ... - def connect(self, **parameters: object) -> Connection: + # We originally wrote + # def connect(self, *args, **kwargs) -> Connection: ... + # But mypy doesn't seem to like that because sqlite3.connect takes a mandatory + # positional argument. We can't make that part of the signature though, because + # psycopg2.connect doesn't have a mandatory positional argument. Instead, we use + # the following slightly unusual workaround. + @property + def connect(self) -> Callable[..., Connection]: ... diff --git a/synapse/streams/__init__.py b/synapse/streams/__init__.py index c6c8a0315c..8a48ffc48d 100644 --- a/synapse/streams/__init__.py +++ b/synapse/streams/__init__.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from abc import ABC, abstractmethod from typing import Generic, List, Optional, Tuple, TypeVar from synapse.types import StrCollection, UserID @@ -22,7 +22,8 @@ K = TypeVar("K") R = TypeVar("R") -class EventSource(Generic[K, R]): +class EventSource(ABC, Generic[K, R]): + @abstractmethod async def get_new_events( self, user: UserID, @@ -32,4 +33,4 @@ class EventSource(Generic[K, R]): is_guest: bool, explicit_room_id: Optional[str] = None, ) -> Tuple[List[R], K]: - ... + raise NotImplementedError() diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index 782ef09cf4..1db99b3c00 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -62,7 +62,7 @@ class TestSpamChecker: request_info: Collection[Tuple[str, str]], auth_provider_id: Optional[str], ) -> RegistrationBehaviour: - pass + return RegistrationBehaviour.ALLOW class DenyAll(TestSpamChecker): @@ -111,7 +111,7 @@ class TestLegacyRegistrationSpamChecker: username: Optional[str], request_info: Collection[Tuple[str, str]], ) -> RegistrationBehaviour: - pass + return RegistrationBehaviour.ALLOW class LegacyAllowAll(TestLegacyRegistrationSpamChecker): diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index acfdcd3bca..d27422515c 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -63,7 +63,7 @@ from tests.http import ( get_test_ca_cert_file, ) from tests.server import FakeTransport, ThreadedMemoryReactorClock -from tests.utils import default_config +from tests.utils import checked_cast, default_config logger = logging.getLogger(__name__) @@ -146,8 +146,10 @@ class MatrixFederationAgentTests(unittest.TestCase): # # Normally this would be done by the TCP socket code in Twisted, but we are # stubbing that out here. - client_protocol = client_factory.buildProtocol(dummy_address) - assert isinstance(client_protocol, _WrappingProtocol) + # NB: we use a checked_cast here to workaround https://github.com/Shoobx/mypy-zope/issues/91) + client_protocol = checked_cast( + _WrappingProtocol, client_factory.buildProtocol(dummy_address) + ) client_protocol.makeConnection( FakeTransport(server_protocol, self.reactor, client_protocol) ) @@ -446,7 +448,6 @@ class MatrixFederationAgentTests(unittest.TestCase): server_ssl_protocol = _wrap_server_factory_for_tls( _get_test_protocol_factory() ).buildProtocol(dummy_address) - assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol) # Tell the HTTP server to send outgoing traffic back via the proxy's transport. proxy_server_transport = proxy_server.transport @@ -1529,7 +1530,7 @@ def _check_logcontext(context: LoggingContextOrSentinel) -> None: def _wrap_server_factory_for_tls( factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None -) -> IProtocolFactory: +) -> TLSMemoryBIOFactory: """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory The resultant factory will create a TLS server which presents a certificate signed by our test CA, valid for the domains in `sanlist` diff --git a/tests/http/test_proxyagent.py b/tests/http/test_proxyagent.py index a817940730..22fdc7f5f2 100644 --- a/tests/http/test_proxyagent.py +++ b/tests/http/test_proxyagent.py @@ -43,6 +43,7 @@ from tests.http import ( ) from tests.server import FakeTransport, ThreadedMemoryReactorClock from tests.unittest import TestCase +from tests.utils import checked_cast logger = logging.getLogger(__name__) @@ -620,7 +621,6 @@ class MatrixFederationAgentTests(TestCase): server_ssl_protocol = _wrap_server_factory_for_tls( _get_test_protocol_factory() ).buildProtocol(dummy_address) - assert isinstance(server_ssl_protocol, TLSMemoryBIOProtocol) # Tell the HTTP server to send outgoing traffic back via the proxy's transport. proxy_server_transport = proxy_server.transport @@ -757,12 +757,14 @@ class MatrixFederationAgentTests(TestCase): assert isinstance(proxy_server, HTTPChannel) # fish the transports back out so that we can do the old switcheroo - s2c_transport = proxy_server.transport - assert isinstance(s2c_transport, FakeTransport) - client_protocol = s2c_transport.other - assert isinstance(client_protocol, _WrappingProtocol) - c2s_transport = client_protocol.transport - assert isinstance(c2s_transport, FakeTransport) + # To help mypy out with the various Protocols and wrappers and mocks, we do + # some explicit casting. Without the casts, we hit the bug I reported at + # https://github.com/Shoobx/mypy-zope/issues/91 . + # We also double-checked these casts at runtime (test-time) because I found it + # quite confusing to deduce these types in the first place! + s2c_transport = checked_cast(FakeTransport, proxy_server.transport) + client_protocol = checked_cast(_WrappingProtocol, s2c_transport.other) + c2s_transport = checked_cast(FakeTransport, client_protocol.transport) # the FakeTransport is async, so we need to pump the reactor self.reactor.advance(0) @@ -822,9 +824,9 @@ class MatrixFederationAgentTests(TestCase): @patch.dict(os.environ, {"http_proxy": "proxy.com:8888"}) def test_proxy_with_no_scheme(self) -> None: http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint) - self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com") - self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888) + proxy_ep = checked_cast(HostnameEndpoint, http_proxy_agent.http_proxy_endpoint) + self.assertEqual(proxy_ep._hostStr, "proxy.com") + self.assertEqual(proxy_ep._port, 8888) @patch.dict(os.environ, {"http_proxy": "socks://proxy.com:8888"}) def test_proxy_with_unsupported_scheme(self) -> None: @@ -834,25 +836,21 @@ class MatrixFederationAgentTests(TestCase): @patch.dict(os.environ, {"http_proxy": "http://proxy.com:8888"}) def test_proxy_with_http_scheme(self) -> None: http_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - assert isinstance(http_proxy_agent.http_proxy_endpoint, HostnameEndpoint) - self.assertEqual(http_proxy_agent.http_proxy_endpoint._hostStr, "proxy.com") - self.assertEqual(http_proxy_agent.http_proxy_endpoint._port, 8888) + proxy_ep = checked_cast(HostnameEndpoint, http_proxy_agent.http_proxy_endpoint) + self.assertEqual(proxy_ep._hostStr, "proxy.com") + self.assertEqual(proxy_ep._port, 8888) @patch.dict(os.environ, {"http_proxy": "https://proxy.com:8888"}) def test_proxy_with_https_scheme(self) -> None: https_proxy_agent = ProxyAgent(self.reactor, use_proxy=True) - assert isinstance(https_proxy_agent.http_proxy_endpoint, _WrapperEndpoint) - self.assertEqual( - https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._hostStr, "proxy.com" - ) - self.assertEqual( - https_proxy_agent.http_proxy_endpoint._wrappedEndpoint._port, 8888 - ) + proxy_ep = checked_cast(_WrapperEndpoint, https_proxy_agent.http_proxy_endpoint) + self.assertEqual(proxy_ep._wrappedEndpoint._hostStr, "proxy.com") + self.assertEqual(proxy_ep._wrappedEndpoint._port, 8888) def _wrap_server_factory_for_tls( factory: IProtocolFactory, sanlist: Optional[List[bytes]] = None -) -> IProtocolFactory: +) -> TLSMemoryBIOFactory: """Wrap an existing Protocol Factory with a test TLSMemoryBIOFactory The resultant factory will create a TLS server which presents a certificate diff --git a/tests/logging/test_remote_handler.py b/tests/logging/test_remote_handler.py index c08954d887..5191e31a8a 100644 --- a/tests/logging/test_remote_handler.py +++ b/tests/logging/test_remote_handler.py @@ -21,6 +21,7 @@ from synapse.logging import RemoteHandler from tests.logging import LoggerCleanupMixin from tests.server import FakeTransport, get_clock from tests.unittest import TestCase +from tests.utils import checked_cast def connect_logging_client( @@ -56,8 +57,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): client, server = connect_logging_client(self.reactor, 0) # Trigger data being sent - assert isinstance(client.transport, FakeTransport) - client.transport.flush() + client_transport = checked_cast(FakeTransport, client.transport) + client_transport.flush() # One log message, with a single trailing newline logs = server.data.decode("utf8").splitlines() @@ -89,8 +90,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): # Allow the reconnection client, server = connect_logging_client(self.reactor, 0) - assert isinstance(client.transport, FakeTransport) - client.transport.flush() + client_transport = checked_cast(FakeTransport, client.transport) + client_transport.flush() # Only the 7 infos made it through, the debugs were elided logs = server.data.splitlines() @@ -123,8 +124,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): # Allow the reconnection client, server = connect_logging_client(self.reactor, 0) - assert isinstance(client.transport, FakeTransport) - client.transport.flush() + client_transport = checked_cast(FakeTransport, client.transport) + client_transport.flush() # The 10 warnings made it through, the debugs and infos were elided logs = server.data.splitlines() @@ -148,8 +149,8 @@ class RemoteHandlerTestCase(LoggerCleanupMixin, TestCase): # Allow the reconnection client, server = connect_logging_client(self.reactor, 0) - assert isinstance(client.transport, FakeTransport) - client.transport.flush() + client_transport = checked_cast(FakeTransport, client.transport) + client_transport.flush() # The first five and last five warnings made it through, the debugs and # infos were elided diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py index 208ec44829..f4e1e7de43 100644 --- a/tests/rest/client/test_auth.py +++ b/tests/rest/client/test_auth.py @@ -43,6 +43,9 @@ class DummyRecaptchaChecker(UserInteractiveAuthChecker): super().__init__(hs) self.recaptcha_attempts: List[Tuple[dict, str]] = [] + def is_enabled(self) -> bool: + return True + def check_auth(self, authdict: dict, clientip: str) -> Any: self.recaptcha_attempts.append((authdict, clientip)) return succeed(True) diff --git a/tests/rest/client/test_third_party_rules.py b/tests/rest/client/test_third_party_rules.py index 3325d43a2f..5fa3440691 100644 --- a/tests/rest/client/test_third_party_rules.py +++ b/tests/rest/client/test_third_party_rules.py @@ -425,7 +425,7 @@ class ThirdPartyRulesTestCase(unittest.FederatingHomeserverTestCase): async def test_fn( event: EventBase, state_events: StateMap[EventBase] ) -> Tuple[bool, Optional[JsonDict]]: - if event.is_state and event.type == EventTypes.PowerLevels: + if event.is_state() and event.type == EventTypes.PowerLevels: await api.create_and_send_event_into_room( { "room_id": event.room_id, diff --git a/tests/utils.py b/tests/utils.py index 15fabbc2d0..a0ac11bc5c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -15,7 +15,7 @@ import atexit import os -from typing import Any, Callable, Dict, List, Tuple, Union, overload +from typing import Any, Callable, Dict, List, Tuple, Type, TypeVar, Union, overload import attr from typing_extensions import Literal, ParamSpec @@ -341,3 +341,27 @@ async def create_room(hs: HomeServer, room_id: str, creator_id: str) -> None: context = await unpersisted_context.persist(event) await persistence_store.persist_event(event, context) + + +T = TypeVar("T") + + +def checked_cast(type: Type[T], x: object) -> T: + """A version of typing.cast that is checked at runtime. + + We have our own function for this for two reasons: + + 1. typing.cast itself is deliberately a no-op at runtime, see + https://docs.python.org/3/library/typing.html#typing.cast + 2. To help workaround a mypy-zope bug https://github.com/Shoobx/mypy-zope/issues/91 + where mypy would erroneously consider `isinstance(x, type)` to be false in all + circumstances. + + For this to make sense, `T` needs to be something that `isinstance` can check; see + https://docs.python.org/3/library/functions.html?highlight=isinstance#isinstance + https://docs.python.org/3/glossary.html#term-abstract-base-class + https://docs.python.org/3/library/typing.html#typing.runtime_checkable + for more details. + """ + assert isinstance(x, type) + return x -- cgit 1.5.1 From 4f4f27e57fdab1d7cc6e275b8acabc785952205e Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 17 Feb 2023 09:40:32 +0000 Subject: Mitigate a race where /make_join could 403 for restricted rooms (#15080) Previously, when creating a join event in /make_join, we would decide whether to include additional fields to satisfy restricted room checks based on the current state of the room. Then, when building the event, we would capture the forward extremities of the room to use as prev events. This is subject to race conditions. For example, when leaving and rejoining a room, the following sequence of events leads to a misleading 403 response: 1. /make_join reads the current state of the room and sees that the user is still in the room. It decides to omit the field required for restricted room joins. 2. The leave event is persisted and the room's forward extremities are updated. 3. /make_join builds the event, using the post-leave forward extremities. The event then fails the restricted room checks. To mitigate the race, we move the read of the forward extremities closer to the read of the current state. Ideally, we would compute the state based off the chosen prev events, but that can involve state resolution, which is expensive. Signed-off-by: Sean Quah --- changelog.d/15080.bugfix | 1 + synapse/handlers/federation.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 changelog.d/15080.bugfix (limited to 'synapse') diff --git a/changelog.d/15080.bugfix b/changelog.d/15080.bugfix new file mode 100644 index 0000000000..965d0b921e --- /dev/null +++ b/changelog.d/15080.bugfix @@ -0,0 +1 @@ +Reduce the likelihood of a rare race condition where rejoining a restricted room over federation would fail. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1d0f6bcd6f..5f2057269d 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -952,7 +952,20 @@ class FederationHandler: # # Note that this requires the /send_join request to come back to the # same server. + prev_event_ids = None if room_version.msc3083_join_rules: + # Note that the room's state can change out from under us and render our + # nice join rules-conformant event non-conformant by the time we build the + # event. When this happens, our validation at the end fails and we respond + # to the requesting server with a 403, which is misleading — it indicates + # that the user is not allowed to join the room and the joining server + # should not bother retrying via this homeserver or any others, when + # in fact we've just messed up with building the event. + # + # To reduce the likelihood of this race, we capture the forward extremities + # of the room (prev_event_ids) just before fetching the current state, and + # hope that the state we fetch corresponds to the prev events we chose. + prev_event_ids = await self.store.get_prev_events_for_room(room_id) state_ids = await self._state_storage_controller.get_current_state_ids( room_id ) @@ -994,7 +1007,8 @@ class FederationHandler: event, unpersisted_context, ) = await self.event_creation_handler.create_new_client_event( - builder=builder + builder=builder, + prev_event_ids=prev_event_ids, ) except SynapseError as e: logger.warning("Failed to create join to %s because %s", room_id, e) -- cgit 1.5.1 From 61bfcd669ae596a8df940f434e3e2335059100b1 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Fri, 17 Feb 2023 14:54:55 +0100 Subject: Add account data to export command (#14969) * Add account data to to export command * newsfile * remove not needed function * update newsfile * adopt #14973 --- changelog.d/14969.feature | 1 + docs/usage/administration/admin_faq.md | 3 +++ synapse/app/admin_cmd.py | 15 ++++++++++- synapse/handlers/admin.py | 49 +++++++++++++++++++++++----------- tests/handlers/test_admin.py | 27 +++++++++++++++++++ 5 files changed, 79 insertions(+), 16 deletions(-) create mode 100644 changelog.d/14969.feature (limited to 'synapse') diff --git a/changelog.d/14969.feature b/changelog.d/14969.feature new file mode 100644 index 0000000000..a4680ef9c8 --- /dev/null +++ b/changelog.d/14969.feature @@ -0,0 +1 @@ +Add account data to the command line [user data export tool](https://matrix-org.github.io/synapse/v1.78/usage/administration/admin_faq.html#how-can-i-export-user-data). \ No newline at end of file diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 7a27741199..925e1d175e 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -71,6 +71,9 @@ output-directory │ ├───invite_state │ └───knock_state └───user_data + ├───account_data + │ ├───global + │ └─── ├───connections ├───devices └───profile diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index fe7afb9475..ad51f33165 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -17,7 +17,7 @@ import logging import os import sys import tempfile -from typing import List, Optional +from typing import List, Mapping, Optional from twisted.internet import defer, task @@ -222,6 +222,19 @@ class FileExfiltrationWriter(ExfiltrationWriter): with open(connection_file, "a") as f: print(json.dumps(connection), file=f) + def write_account_data( + self, file_name: str, account_data: Mapping[str, JsonDict] + ) -> None: + account_data_directory = os.path.join( + self.base_directory, "user_data", "account_data" + ) + os.makedirs(account_data_directory, exist_ok=True) + + account_data_file = os.path.join(account_data_directory, file_name) + + with open(account_data_file, "a") as f: + print(json.dumps(account_data), file=f) + def finished(self) -> str: return self.base_directory diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index b03c214b14..8b7760b2cc 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -14,7 +14,7 @@ import abc import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set +from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Set from synapse.api.constants import Direction, Membership from synapse.events import EventBase @@ -29,7 +29,7 @@ logger = logging.getLogger(__name__) class AdminHandler: def __init__(self, hs: "HomeServer"): - self.store = hs.get_datastores().main + self._store = hs.get_datastores().main self._device_handler = hs.get_device_handler() self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state @@ -38,7 +38,7 @@ class AdminHandler: async def get_whois(self, user: UserID) -> JsonDict: connections = [] - sessions = await self.store.get_user_ip_and_agents(user) + sessions = await self._store.get_user_ip_and_agents(user) for session in sessions: connections.append( { @@ -57,7 +57,7 @@ class AdminHandler: async def get_user(self, user: UserID) -> Optional[JsonDict]: """Function to get user details""" - user_info_dict = await self.store.get_user_by_id(user.to_string()) + user_info_dict = await self._store.get_user_by_id(user.to_string()) if user_info_dict is None: return None @@ -89,11 +89,11 @@ class AdminHandler: } # Add additional user metadata - profile = await self.store.get_profileinfo(user.localpart) - threepids = await self.store.user_get_threepids(user.to_string()) + profile = await self._store.get_profileinfo(user.localpart) + threepids = await self._store.user_get_threepids(user.to_string()) external_ids = [ ({"auth_provider": auth_provider, "external_id": external_id}) - for auth_provider, external_id in await self.store.get_external_ids_by_user( + for auth_provider, external_id in await self._store.get_external_ids_by_user( user.to_string() ) ] @@ -101,7 +101,7 @@ class AdminHandler: user_info_dict["avatar_url"] = profile.avatar_url user_info_dict["threepids"] = threepids user_info_dict["external_ids"] = external_ids - user_info_dict["erased"] = await self.store.is_user_erased(user.to_string()) + user_info_dict["erased"] = await self._store.is_user_erased(user.to_string()) return user_info_dict @@ -117,7 +117,7 @@ class AdminHandler: The returned value is that returned by `writer.finished()`. """ # Get all rooms the user is in or has been in - rooms = await self.store.get_rooms_for_local_user_where_membership_is( + rooms = await self._store.get_rooms_for_local_user_where_membership_is( user_id, membership_list=( Membership.JOIN, @@ -131,7 +131,7 @@ class AdminHandler: # We only try and fetch events for rooms the user has been in. If # they've been e.g. invited to a room without joining then we handle # those separately. - rooms_user_has_been_in = await self.store.get_rooms_user_has_been_in(user_id) + rooms_user_has_been_in = await self._store.get_rooms_user_has_been_in(user_id) for index, room in enumerate(rooms): room_id = room.room_id @@ -140,7 +140,7 @@ class AdminHandler: "[%s] Handling room %s, %d/%d", user_id, room_id, index + 1, len(rooms) ) - forgotten = await self.store.did_forget(user_id, room_id) + forgotten = await self._store.did_forget(user_id, room_id) if forgotten: logger.info("[%s] User forgot room %d, ignoring", user_id, room_id) continue @@ -152,14 +152,14 @@ class AdminHandler: if room.membership == Membership.INVITE: event_id = room.event_id - invite = await self.store.get_event(event_id, allow_none=True) + invite = await self._store.get_event(event_id, allow_none=True) if invite: invited_state = invite.unsigned["invite_room_state"] writer.write_invite(room_id, invite, invited_state) if room.membership == Membership.KNOCK: event_id = room.event_id - knock = await self.store.get_event(event_id, allow_none=True) + knock = await self._store.get_event(event_id, allow_none=True) if knock: knock_state = knock.unsigned["knock_room_state"] writer.write_knock(room_id, knock, knock_state) @@ -170,7 +170,7 @@ class AdminHandler: # were joined. We estimate that point by looking at the # stream_ordering of the last membership if it wasn't a join. if room.membership == Membership.JOIN: - stream_ordering = self.store.get_room_max_stream_ordering() + stream_ordering = self._store.get_room_max_stream_ordering() else: stream_ordering = room.stream_ordering @@ -197,7 +197,7 @@ class AdminHandler: # events that we have and then filtering, this isn't the most # efficient method perhaps but it does guarantee we get everything. while True: - events, _ = await self.store.paginate_room_events( + events, _ = await self._store.paginate_room_events( room_id, from_key, to_key, limit=100, direction=Direction.FORWARDS ) if not events: @@ -263,6 +263,13 @@ class AdminHandler: connections["devices"][""]["sessions"][0]["connections"] ) + # Get all account data the user has global and in rooms + global_data = await self._store.get_global_account_data_for_user(user_id) + by_room_data = await self._store.get_room_account_data_for_user(user_id) + writer.write_account_data("global", global_data) + for room_id in by_room_data: + writer.write_account_data(room_id, by_room_data[room_id]) + return writer.finished() @@ -340,6 +347,18 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): """ raise NotImplementedError() + @abc.abstractmethod + def write_account_data( + self, file_name: str, account_data: Mapping[str, JsonDict] + ) -> None: + """Write the account data of a user. + + Args: + file_name: file name to write data + account_data: mapping of global or room account_data + """ + raise NotImplementedError() + @abc.abstractmethod def finished(self) -> Any: """Called when all data has successfully been exported and written. diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py index 6f300b8e11..1b97aaeed1 100644 --- a/tests/handlers/test_admin.py +++ b/tests/handlers/test_admin.py @@ -296,3 +296,30 @@ class ExfiltrateData(unittest.HomeserverTestCase): self.assertEqual(args[0][0]["user_agent"], "user_agent") self.assertGreater(args[0][0]["last_seen"], 0) self.assertNotIn("access_token", args[0][0]) + + def test_account_data(self) -> None: + """Tests that user account data get exported.""" + # add account data + self.get_success( + self._store.add_account_data_for_user(self.user2, "m.global", {"a": 1}) + ) + self.get_success( + self._store.add_account_data_to_room( + self.user2, "test_room", "m.per_room", {"b": 2} + ) + ) + + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + # two calls, one call for user data and one call for room data + writer.write_account_data.assert_called() + + args = writer.write_account_data.call_args_list[0][0] + self.assertEqual(args[0], "global") + self.assertEqual(args[1]["m.global"]["a"], 1) + + args = writer.write_account_data.call_args_list[1][0] + self.assertEqual(args[0], "test_room") + self.assertEqual(args[1]["m.per_room"]["b"], 2) -- cgit 1.5.1 From 1cbc3f197cc1b9732649ffb769b05d90c0e904d7 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 20 Feb 2023 12:00:18 +0000 Subject: Fix a bug introduced in Synapse v1.74.0 where searching with colons when using ICU for search term tokenisation would fail with an error. (#15079) Co-authored-by: David Robertson --- changelog.d/15079.bugfix | 1 + synapse/storage/databases/main/user_directory.py | 24 +++++++-- tests/handlers/test_user_directory.py | 7 +++ tests/storage/test_user_directory.py | 63 +++++++++++++++++++++++- 4 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 changelog.d/15079.bugfix (limited to 'synapse') diff --git a/changelog.d/15079.bugfix b/changelog.d/15079.bugfix new file mode 100644 index 0000000000..907892c1ef --- /dev/null +++ b/changelog.d/15079.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse v1.74.0 where searching with colons when using ICU for search term tokenisation would fail with an error. \ No newline at end of file diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index f6a6fd4079..30af4b3b6c 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -918,11 +918,19 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]: We use this so that we can add prefix matching, which isn't something that is supported by default. """ - results = _parse_words(search_term) + escaped_words = [] + for word in _parse_words(search_term): + # Postgres tsvector and tsquery quoting rules: + # words potentially containing punctuation should be quoted + # and then existing quotes and backslashes should be doubled + # See: https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY + + quoted_word = word.replace("'", "''").replace("\\", "\\\\") + escaped_words.append(f"'{quoted_word}'") - both = " & ".join("(%s:* | %s)" % (result, result) for result in results) - exact = " & ".join("%s" % (result,) for result in results) - prefix = " & ".join("%s:*" % (result,) for result in results) + both = " & ".join("(%s:* | %s)" % (word, word) for word in escaped_words) + exact = " & ".join("%s" % (word,) for word in escaped_words) + prefix = " & ".join("%s:*" % (word,) for word in escaped_words) return both, exact, prefix @@ -944,6 +952,14 @@ def _parse_words(search_term: str) -> List[str]: if USE_ICU: return _parse_words_with_icu(search_term) + return _parse_words_with_regex(search_term) + + +def _parse_words_with_regex(search_term: str) -> List[str]: + """ + Break down search term into words, when we don't have ICU available. + See: `_parse_words` + """ return re.findall(r"([\w\-]+)", search_term, re.UNICODE) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index f65a68b9c2..a02c1c6227 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -192,6 +192,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.helper.join(room, self.appservice.sender, tok=self.appservice.token) self._check_only_one_user_in_directory(user, room) + def test_search_term_with_colon_in_it_does_not_raise(self) -> None: + """ + Regression test: Test that search terms with colons in them are acceptable. + """ + u1 = self.register_user("user1", "pass") + self.get_success(self.handler.search_users(u1, "haha:paamayim-nekudotayim", 10)) + def test_user_not_in_users_table(self) -> None: """Unclear how it happens, but on matrix.org we've seen join events for users who aren't in the users table. Test that we don't fall over diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index f1ca523d23..2d169684cf 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -25,6 +25,11 @@ from synapse.rest.client import login, register, room from synapse.server import HomeServer from synapse.storage import DataStore from synapse.storage.background_updates import _BackgroundUpdateHandler +from synapse.storage.databases.main import user_directory +from synapse.storage.databases.main.user_directory import ( + _parse_words_with_icu, + _parse_words_with_regex, +) from synapse.storage.roommember import ProfileInfo from synapse.util import Clock @@ -42,7 +47,7 @@ ALICE = "@alice:a" BOB = "@bob:b" BOBBY = "@bobby:a" # The localpart isn't 'Bela' on purpose so we can test looking up display names. -BELA = "@somenickname:a" +BELA = "@somenickname:example.org" class GetUserDirectoryTables: @@ -423,6 +428,8 @@ class UserDirectoryInitialPopulationTestcase(HomeserverTestCase): class UserDirectoryStoreTestCase(HomeserverTestCase): + use_icu = False + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main @@ -434,6 +441,12 @@ class UserDirectoryStoreTestCase(HomeserverTestCase): self.get_success(self.store.update_profile_in_user_dir(BELA, "Bela", None)) self.get_success(self.store.add_users_in_public_rooms("!room:id", (ALICE, BOB))) + self._restore_use_icu = user_directory.USE_ICU + user_directory.USE_ICU = self.use_icu + + def tearDown(self) -> None: + user_directory.USE_ICU = self._restore_use_icu + def test_search_user_dir(self) -> None: # normally when alice searches the directory she should just find # bob because bobby doesn't share a room with her. @@ -478,6 +491,26 @@ class UserDirectoryStoreTestCase(HomeserverTestCase): {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, ) + @override_config({"user_directory": {"search_all_users": True}}) + def test_search_user_dir_start_of_user_id(self) -> None: + """Tests that a user can look up another user by searching for the start + of their user ID. + """ + r = self.get_success(self.store.search_user_dir(ALICE, "somenickname:exa", 10)) + self.assertFalse(r["limited"]) + self.assertEqual(1, len(r["results"])) + self.assertDictEqual( + r["results"][0], + {"user_id": BELA, "display_name": "Bela", "avatar_url": None}, + ) + + +class UserDirectoryStoreTestCaseWithIcu(UserDirectoryStoreTestCase): + use_icu = True + + if not icu: + skip = "Requires PyICU" + class UserDirectoryICUTestCase(HomeserverTestCase): if not icu: @@ -513,3 +546,31 @@ class UserDirectoryICUTestCase(HomeserverTestCase): r["results"][0], {"user_id": ALICE, "display_name": display_name, "avatar_url": None}, ) + + def test_icu_word_boundary_punctuation(self) -> None: + """ + Tests the behaviour of punctuation with the ICU tokeniser. + + Seems to depend on underlying version of ICU. + """ + + # Note: either tokenisation is fine, because Postgres actually splits + # words itself afterwards. + self.assertIn( + _parse_words_with_icu("lazy'fox jumped:over the.dog"), + ( + # ICU 66 on Ubuntu 20.04 + ["lazy'fox", "jumped", "over", "the", "dog"], + # ICU 70 on Ubuntu 22.04 + ["lazy'fox", "jumped:over", "the.dog"], + ), + ) + + def test_regex_word_boundary_punctuation(self) -> None: + """ + Tests the behaviour of punctuation with the non-ICU tokeniser + """ + self.assertEqual( + _parse_words_with_regex("lazy'fox jumped:over the.dog"), + ["lazy", "fox", "jumped", "over", "the", "dog"], + ) -- cgit 1.5.1 From 490a3675bd7225b5695e505fea225d7c30127551 Mon Sep 17 00:00:00 2001 From: realtyem Date: Mon, 20 Feb 2023 06:23:00 -0600 Subject: Allow health listener resource to load (#15096) * Allow health listener resource to load. * changelog * Update changelog.d/15096.bugfix --- changelog.d/15096.bugfix | 1 + synapse/config/server.py | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog.d/15096.bugfix (limited to 'synapse') diff --git a/changelog.d/15096.bugfix b/changelog.d/15096.bugfix new file mode 100644 index 0000000000..09b4d861f8 --- /dev/null +++ b/changelog.d/15096.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.76 where workers would fail to start if the `health` listener was configured. diff --git a/synapse/config/server.py b/synapse/config/server.py index ecdaa2d9dd..d4ef9930b0 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -177,6 +177,7 @@ KNOWN_RESOURCES = { "client", "consent", "federation", + "health", "keys", "media", "metrics", -- cgit 1.5.1 From e26d7d5ae786df8d9d9a4dbd0f734e5c2f08aafd Mon Sep 17 00:00:00 2001 From: David Robertson Date: Mon, 20 Feb 2023 13:35:24 +0000 Subject: Teach portdb about `un_partial_stated_event_stream` (#15108) * Sort BOOLEAN_COLUMNS and APPEND_ONLY_TABLES So I can see if a given table is present in logarithmic time, rather than linear. * Teach portdb about `un_partial_stated_event_streams` * Comments comments comments * Changelog --- changelog.d/15108.bugfix | 1 + synapse/_scripts/synapse_port_db.py | 85 +++++++++++++++++++++++-------------- 2 files changed, 53 insertions(+), 33 deletions(-) create mode 100644 changelog.d/15108.bugfix (limited to 'synapse') diff --git a/changelog.d/15108.bugfix b/changelog.d/15108.bugfix new file mode 100644 index 0000000000..30af8b439d --- /dev/null +++ b/changelog.d/15108.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.75 where the [portdb script](https://matrix-org.github.io/synapse/release-v1.78/postgres.html#porting-from-sqlite) would fail to run after a room had been faster-joined. diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 5e137dbbf7..0d35e0af8f 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -94,61 +94,80 @@ reactor = cast(ISynapseReactor, reactor_) logger = logging.getLogger("synapse_port_db") +# SQLite doesn't have a dedicated boolean type (it stores True/False as 1/0). This means +# portdb will read sqlite bools as integers, then try to insert them into postgres +# boolean columns---which fails. Lacking some Python-parseable metaschema, we must +# specify which integer columns should be inserted as booleans into postgres. BOOLEAN_COLUMNS = { - "events": ["processed", "outlier", "contains_url"], - "rooms": ["is_public", "has_auth_chain_index"], + "access_tokens": ["used"], + "account_validity": ["email_sent"], + "device_lists_changes_in_room": ["converted_to_destinations"], + "device_lists_outbound_pokes": ["sent"], + "devices": ["hidden"], + "e2e_fallback_keys_json": ["used"], + "e2e_room_keys": ["is_verified"], "event_edges": ["is_state"], + "events": ["processed", "outlier", "contains_url"], + "local_media_repository": ["safe_from_quarantine"], "presence_list": ["accepted"], "presence_stream": ["currently_active"], "public_room_list_stream": ["visibility"], - "devices": ["hidden"], - "device_lists_outbound_pokes": ["sent"], - "users_who_share_rooms": ["share_private"], - "e2e_room_keys": ["is_verified"], - "account_validity": ["email_sent"], + "pushers": ["enabled"], "redactions": ["have_censored"], "room_stats_state": ["is_federatable"], - "local_media_repository": ["safe_from_quarantine"], + "rooms": ["is_public", "has_auth_chain_index"], "users": ["shadow_banned", "approved"], - "e2e_fallback_keys_json": ["used"], - "access_tokens": ["used"], - "device_lists_changes_in_room": ["converted_to_destinations"], - "pushers": ["enabled"], + "un_partial_stated_event_stream": ["rejection_status_changed"], + "users_who_share_rooms": ["share_private"], } +# These tables are never deleted from in normal operation [*], so we can resume porting +# over rows from a previous attempt rather than starting from scratch. +# +# [*]: We do delete from many of these tables when purging a room, and +# presumably when purging old events. So we might e.g. +# +# 1. Run portdb and port half of some table. +# 2. Stop portdb. +# 3. Purge something, deleting some of the rows we've ported over. +# 4. Restart portdb. The rows deleted from sqlite are still present in postgres. +# +# But this isn't the end of the world: we should be able to repeat the purge +# on the postgres DB when porting completes. APPEND_ONLY_TABLES = [ + "cache_invalidation_stream_by_instance", + "event_auth", + "event_edges", + "event_json", "event_reference_hashes", + "event_search", + "event_to_state_groups", "events", - "event_json", - "state_events", - "room_memberships", - "topics", - "room_names", - "rooms", + "ex_outlier_stream", "local_media_repository", "local_media_repository_thumbnails", + "presence_stream", + "public_room_list_stream", + "push_rules_stream", + "received_transactions", + "redactions", + "rejections", "remote_media_cache", "remote_media_cache_thumbnails", - "redactions", - "event_edges", - "event_auth", - "received_transactions", + "room_memberships", + "room_names", + "rooms", "sent_transactions", - "transaction_id_to_pdu", - "users", + "state_events", + "state_group_edges", "state_groups", "state_groups_state", - "event_to_state_groups", - "rejections", - "event_search", - "presence_stream", - "push_rules_stream", - "ex_outlier_stream", - "cache_invalidation_stream_by_instance", - "public_room_list_stream", - "state_group_edges", "stream_ordering_to_exterm", + "topics", + "transaction_id_to_pdu", + "un_partial_stated_event_stream", + "users", ] -- cgit 1.5.1 From addd12f16dc35a4f82cb48807719909e7aed9dcb Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 21 Feb 2023 12:26:00 +0000 Subject: Tweak logging for when a worker waits for its view of a replication stream to catch up. (#15120)Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Improve logging messages for the 'wait for repl stream' read-after-write consistency feature * Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) * Update synapse/replication/tcp/client.py Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --------- Signed-off-by: Olivier Wilkinson (reivilibre) Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/15120.misc | 1 + synapse/replication/tcp/client.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 changelog.d/15120.misc (limited to 'synapse') diff --git a/changelog.d/15120.misc b/changelog.d/15120.misc new file mode 100644 index 0000000000..ebbc0c9027 --- /dev/null +++ b/changelog.d/15120.misc @@ -0,0 +1 @@ +Tweak logging for when a worker waits for its view of a replication stream to catch up. \ No newline at end of file diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index cc0528bd8e..424854efbe 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -370,15 +370,23 @@ class ReplicationDataHandler: # We measure here to get in flight counts and average waiting time. with Measure(self._clock, "repl.wait_for_stream_position"): logger.info( - "Waiting for repl stream %r to reach %s (%s)", + "Waiting for repl stream %r to reach %s (%s); currently at: %s", stream_name, position, instance_name, + current_position, ) try: await make_deferred_yieldable(deferred) except defer.TimeoutError: - logger.error("Timed out waiting for stream %s", stream_name) + logger.error( + "Timed out waiting for repl stream %r to reach %s (%s)" + "; currently at: %s", + stream_name, + position, + instance_name, + self._streams[stream_name].current_token(instance_name), + ) return logger.info( -- cgit 1.5.1