From 190f49d8aba3b18bb9b9c2cd8352dc9b402d6bbf Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Thu, 21 Jul 2022 12:51:30 +0200 Subject: Use cache store remove base slaved (#13329) This comes from two identical definitions in each of the base stores, and means the base slaved store is now empty and can be removed. --- synapse/storage/databases/main/__init__.py | 29 ++--------------------------- synapse/storage/databases/main/cache.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 27 deletions(-) (limited to 'synapse/storage/databases/main') diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index a3d31d3737..4dccbb732a 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -24,9 +24,9 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.databases.main.stats import UserSortOrder -from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine +from synapse.storage.engines import BaseDatabaseEngine from synapse.storage.types import Cursor -from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator +from synapse.storage.util.id_generators import StreamIdGenerator from synapse.types import JsonDict, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -149,31 +149,6 @@ class DataStore( ], ) - self._cache_id_gen: Optional[MultiWriterIdGenerator] - if isinstance(self.database_engine, PostgresEngine): - # We set the `writers` to an empty list here as we don't care about - # missing updates over restarts, as we'll not have anything in our - # caches to invalidate. (This reduces the amount of writes to the DB - # that happen). - self._cache_id_gen = MultiWriterIdGenerator( - db_conn, - database, - stream_name="caches", - instance_name=hs.get_instance_name(), - tables=[ - ( - "cache_invalidation_stream_by_instance", - "instance_name", - "stream_id", - ) - ], - sequence_name="cache_invalidation_stream_seq", - writers=[], - ) - - else: - self._cache_id_gen = None - super().__init__(database, db_conn, hs) events_max = self._stream_id_gen.get_current_token() diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 2367ddeea3..12e9a42382 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -32,6 +32,7 @@ from synapse.storage.database import ( LoggingTransaction, ) from synapse.storage.engines import PostgresEngine +from synapse.storage.util.id_generators import MultiWriterIdGenerator from synapse.util.caches.descriptors import _CachedFunction from synapse.util.iterutils import batch_iter @@ -65,6 +66,31 @@ class CacheInvalidationWorkerStore(SQLBaseStore): psql_only=True, # The table is only on postgres DBs. ) + self._cache_id_gen: Optional[MultiWriterIdGenerator] + if isinstance(self.database_engine, PostgresEngine): + # We set the `writers` to an empty list here as we don't care about + # missing updates over restarts, as we'll not have anything in our + # caches to invalidate. (This reduces the amount of writes to the DB + # that happen). + self._cache_id_gen = MultiWriterIdGenerator( + db_conn, + database, + stream_name="caches", + instance_name=hs.get_instance_name(), + tables=[ + ( + "cache_invalidation_stream_by_instance", + "instance_name", + "stream_id", + ) + ], + sequence_name="cache_invalidation_stream_seq", + writers=[], + ) + + else: + self._cache_id_gen = None + async def get_all_updated_caches( self, instance_name: str, last_id: int, current_id: int, limit: int ) -> Tuple[List[Tuple[int, tuple]], int, bool]: -- cgit 1.5.1 From 50122754c8743df5c904e81b634fdfdeea64e795 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 21 Jul 2022 08:01:52 -0400 Subject: Add missing types to opentracing. (#13345) After this change `synapse.logging` is fully typed. --- changelog.d/13328.misc | 2 +- changelog.d/13345.misc | 1 + mypy.ini | 3 -- synapse/federation/transport/server/_base.py | 2 +- synapse/handlers/device.py | 8 ++--- synapse/handlers/e2e_keys.py | 16 ++++----- synapse/handlers/e2e_room_keys.py | 4 +-- synapse/logging/opentracing.py | 44 ++++++++++++++++++----- synapse/metrics/background_process_metrics.py | 2 +- synapse/rest/client/keys.py | 4 ++- synapse/storage/databases/main/deviceinbox.py | 2 +- synapse/storage/databases/main/devices.py | 4 +-- synapse/storage/databases/main/end_to_end_keys.py | 6 ++-- tests/logging/test_opentracing.py | 30 +++++++++++----- 14 files changed, 83 insertions(+), 45 deletions(-) create mode 100644 changelog.d/13345.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13328.misc b/changelog.d/13328.misc index d15fb5fc37..c80578ce95 100644 --- a/changelog.d/13328.misc +++ b/changelog.d/13328.misc @@ -1 +1 @@ -Add type hints to `trace` decorator. +Add missing type hints to open tracing module. diff --git a/changelog.d/13345.misc b/changelog.d/13345.misc new file mode 100644 index 0000000000..c80578ce95 --- /dev/null +++ b/changelog.d/13345.misc @@ -0,0 +1 @@ +Add missing type hints to open tracing module. diff --git a/mypy.ini b/mypy.ini index ea0ab003a8..6add272990 100644 --- a/mypy.ini +++ b/mypy.ini @@ -84,9 +84,6 @@ disallow_untyped_defs = False [mypy-synapse.http.matrixfederationclient] disallow_untyped_defs = False -[mypy-synapse.logging.opentracing] -disallow_untyped_defs = False - [mypy-synapse.metrics._reactor_metrics] disallow_untyped_defs = False # This module imports select.epoll. That exists on Linux, but doesn't on macOS. diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py index 84100a5a52..bb0f8d6b7b 100644 --- a/synapse/federation/transport/server/_base.py +++ b/synapse/federation/transport/server/_base.py @@ -309,7 +309,7 @@ class BaseFederationServlet: raise # update the active opentracing span with the authenticated entity - set_tag("authenticated_entity", origin) + set_tag("authenticated_entity", str(origin)) # if the origin is authenticated and whitelisted, use its span context # as the parent. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index c05a170c55..1a8379854c 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -118,8 +118,8 @@ class DeviceWorkerHandler: ips = await self.store.get_last_client_ip_by_device(user_id, device_id) _update_device_from_client_ips(device, ips) - set_tag("device", device) - set_tag("ips", ips) + set_tag("device", str(device)) + set_tag("ips", str(ips)) return device @@ -170,7 +170,7 @@ class DeviceWorkerHandler: """ set_tag("user_id", user_id) - set_tag("from_token", from_token) + set_tag("from_token", str(from_token)) now_room_key = self.store.get_room_max_token() room_ids = await self.store.get_rooms_for_user(user_id) @@ -795,7 +795,7 @@ class DeviceListUpdater: """ set_tag("origin", origin) - set_tag("edu_content", edu_content) + set_tag("edu_content", str(edu_content)) user_id = edu_content.pop("user_id") device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 84c28c480e..c938339ddd 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -138,8 +138,8 @@ class E2eKeysHandler: else: remote_queries[user_id] = device_ids - set_tag("local_key_query", local_query) - set_tag("remote_key_query", remote_queries) + set_tag("local_key_query", str(local_query)) + set_tag("remote_key_query", str(remote_queries)) # First get local devices. # A map of destination -> failure response. @@ -343,7 +343,7 @@ class E2eKeysHandler: failure = _exception_to_failure(e) failures[destination] = failure set_tag("error", True) - set_tag("reason", failure) + set_tag("reason", str(failure)) return @@ -405,7 +405,7 @@ class E2eKeysHandler: Returns: A map from user_id -> device_id -> device details """ - set_tag("local_query", query) + set_tag("local_query", str(query)) local_query: List[Tuple[str, Optional[str]]] = [] result_dict: Dict[str, Dict[str, dict]] = {} @@ -477,8 +477,8 @@ class E2eKeysHandler: domain = get_domain_from_id(user_id) remote_queries.setdefault(domain, {})[user_id] = one_time_keys - set_tag("local_key_query", local_query) - set_tag("remote_key_query", remote_queries) + set_tag("local_key_query", str(local_query)) + set_tag("remote_key_query", str(remote_queries)) results = await self.store.claim_e2e_one_time_keys(local_query) @@ -508,7 +508,7 @@ class E2eKeysHandler: failure = _exception_to_failure(e) failures[destination] = failure set_tag("error", True) - set_tag("reason", failure) + set_tag("reason", str(failure)) await make_deferred_yieldable( defer.gatherResults( @@ -611,7 +611,7 @@ class E2eKeysHandler: result = await self.store.count_e2e_one_time_keys(user_id, device_id) - set_tag("one_time_key_counts", result) + set_tag("one_time_key_counts", str(result)) return {"one_time_key_counts": result} async def _upload_one_time_keys_for_user( diff --git a/synapse/handlers/e2e_room_keys.py b/synapse/handlers/e2e_room_keys.py index 446f509bdc..28dc08c22a 100644 --- a/synapse/handlers/e2e_room_keys.py +++ b/synapse/handlers/e2e_room_keys.py @@ -14,7 +14,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, Optional +from typing import TYPE_CHECKING, Dict, Optional, cast from typing_extensions import Literal @@ -97,7 +97,7 @@ class E2eRoomKeysHandler: user_id, version, room_id, session_id ) - log_kv(results) + log_kv(cast(JsonDict, results)) return results @trace diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 17e729f0c7..ad5cbf46a4 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -182,6 +182,8 @@ from typing import ( Type, TypeVar, Union, + cast, + overload, ) import attr @@ -328,6 +330,7 @@ class _Sentinel(enum.Enum): P = ParamSpec("P") R = TypeVar("R") +T = TypeVar("T") def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]: @@ -343,22 +346,43 @@ def only_if_tracing(func: Callable[P, R]) -> Callable[P, Optional[R]]: return _only_if_tracing_inner -def ensure_active_span(message: str, ret=None): +@overload +def ensure_active_span( + message: str, +) -> Callable[[Callable[P, R]], Callable[P, Optional[R]]]: + ... + + +@overload +def ensure_active_span( + message: str, ret: T +) -> Callable[[Callable[P, R]], Callable[P, Union[T, R]]]: + ... + + +def ensure_active_span( + message: str, ret: Optional[T] = None +) -> Callable[[Callable[P, R]], Callable[P, Union[Optional[T], R]]]: """Executes the operation only if opentracing is enabled and there is an active span. If there is no active span it logs message at the error level. Args: message: Message which fills in "There was no active span when trying to %s" in the error log if there is no active span and opentracing is enabled. - ret (object): return value if opentracing is None or there is no active span. + ret: return value if opentracing is None or there is no active span. - Returns (object): The result of the func or ret if opentracing is disabled or there + Returns: + The result of the func, falling back to ret if opentracing is disabled or there was no active span. """ - def ensure_active_span_inner_1(func): + def ensure_active_span_inner_1( + func: Callable[P, R] + ) -> Callable[P, Union[Optional[T], R]]: @wraps(func) - def ensure_active_span_inner_2(*args, **kwargs): + def ensure_active_span_inner_2( + *args: P.args, **kwargs: P.kwargs + ) -> Union[Optional[T], R]: if not opentracing: return ret @@ -464,7 +488,7 @@ def start_active_span( finish_on_close: bool = True, *, tracer: Optional["opentracing.Tracer"] = None, -): +) -> "opentracing.Scope": """Starts an active opentracing span. Records the start time for the span, and sets it as the "active span" in the @@ -502,7 +526,7 @@ def start_active_span_follows_from( *, inherit_force_tracing: bool = False, tracer: Optional["opentracing.Tracer"] = None, -): +) -> "opentracing.Scope": """Starts an active opentracing span, with additional references to previous spans Args: @@ -717,7 +741,9 @@ def inject_response_headers(response_headers: Headers) -> None: response_headers.addRawHeader("Synapse-Trace-Id", f"{trace_id:x}") -@ensure_active_span("get the active span context as a dict", ret={}) +@ensure_active_span( + "get the active span context as a dict", ret=cast(Dict[str, str], {}) +) def get_active_span_text_map(destination: Optional[str] = None) -> Dict[str, str]: """ Gets a span context as a dict. This can be used instead of manually @@ -886,7 +912,7 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]: for i, arg in enumerate(argspec.args[1:]): set_tag("ARG_" + arg, args[i]) # type: ignore[index] set_tag("args", args[len(argspec.args) :]) # type: ignore[index] - set_tag("kwargs", kwargs) + set_tag("kwargs", str(kwargs)) return func(*args, **kwargs) return _tag_args_inner diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index eef3462e10..7a1516d3a8 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -235,7 +235,7 @@ def run_as_background_process( f"bgproc.{desc}", tags={SynapseTags.REQUEST_ID: str(context)} ) else: - ctx = nullcontext() + ctx = nullcontext() # type: ignore[assignment] with ctx: return await func(*args, **kwargs) except Exception: diff --git a/synapse/rest/client/keys.py b/synapse/rest/client/keys.py index eb1b85721f..e3f454896a 100644 --- a/synapse/rest/client/keys.py +++ b/synapse/rest/client/keys.py @@ -208,7 +208,9 @@ class KeyChangesServlet(RestServlet): # We want to enforce they do pass us one, but we ignore it and return # changes after the "to" as well as before. - set_tag("to", parse_string(request, "to")) + # + # XXX This does not enforce that "to" is passed. + set_tag("to", str(parse_string(request, "to"))) from_token = await StreamToken.from_string(self.store, from_token_string) diff --git a/synapse/storage/databases/main/deviceinbox.py b/synapse/storage/databases/main/deviceinbox.py index 422e0e65ca..73c95ffb6f 100644 --- a/synapse/storage/databases/main/deviceinbox.py +++ b/synapse/storage/databases/main/deviceinbox.py @@ -436,7 +436,7 @@ class DeviceInboxWorkerStore(SQLBaseStore): (user_id, device_id), None ) - set_tag("last_deleted_stream_id", last_deleted_stream_id) + set_tag("last_deleted_stream_id", str(last_deleted_stream_id)) if last_deleted_stream_id: has_changed = self._device_inbox_stream_cache.has_entity_changed( diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 7a6ed332aa..ca0fe8c4be 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -706,8 +706,8 @@ class DeviceWorkerStore(EndToEndKeyWorkerStore): else: results[user_id] = await self.get_cached_devices_for_user(user_id) - set_tag("in_cache", results) - set_tag("not_in_cache", user_ids_not_in_cache) + set_tag("in_cache", str(results)) + set_tag("not_in_cache", str(user_ids_not_in_cache)) return user_ids_not_in_cache, results diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 60f622ad71..46c0d06157 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -146,7 +146,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker key data. The key data will be a dict in the same format as the DeviceKeys type returned by POST /_matrix/client/r0/keys/query. """ - set_tag("query_list", query_list) + set_tag("query_list", str(query_list)) if not query_list: return {} @@ -418,7 +418,7 @@ class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore, CacheInvalidationWorker def _add_e2e_one_time_keys(txn: LoggingTransaction) -> None: set_tag("user_id", user_id) set_tag("device_id", device_id) - set_tag("new_keys", new_keys) + set_tag("new_keys", str(new_keys)) # We are protected from race between lookup and insertion due to # a unique constraint. If there is a race of two calls to # `add_e2e_one_time_keys` then they'll conflict and we will only @@ -1161,7 +1161,7 @@ class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): set_tag("user_id", user_id) set_tag("device_id", device_id) set_tag("time_now", time_now) - set_tag("device_keys", device_keys) + set_tag("device_keys", str(device_keys)) old_key_json = self.db_pool.simple_select_one_onecol_txn( txn, diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py index 40148d503c..3b14c76d7e 100644 --- a/tests/logging/test_opentracing.py +++ b/tests/logging/test_opentracing.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import cast + from twisted.internet import defer from twisted.test.proto_helpers import MemoryReactorClock @@ -40,6 +42,15 @@ from tests.unittest import TestCase class LogContextScopeManagerTestCase(TestCase): + """ + Test logging contexts and active opentracing spans. + + There's casts throughout this from generic opentracing objects (e.g. + opentracing.Span) to the ones specific to Jaeger since they have additional + properties that these tests depend on. This is safe since the only supported + opentracing backend is Jaeger. + """ + if LogContextScopeManager is None: skip = "Requires opentracing" # type: ignore[unreachable] if jaeger_client is None: @@ -69,7 +80,7 @@ class LogContextScopeManagerTestCase(TestCase): # start_active_span should start and activate a span. scope = start_active_span("span", tracer=self._tracer) - span = scope.span + span = cast(jaeger_client.Span, scope.span) self.assertEqual(self._tracer.active_span, span) self.assertIsNotNone(span.start_time) @@ -91,6 +102,7 @@ class LogContextScopeManagerTestCase(TestCase): with LoggingContext("root context"): with start_active_span("root span", tracer=self._tracer) as root_scope: self.assertEqual(self._tracer.active_span, root_scope.span) + root_context = cast(jaeger_client.SpanContext, root_scope.span.context) scope1 = start_active_span( "child1", @@ -99,9 +111,8 @@ class LogContextScopeManagerTestCase(TestCase): self.assertEqual( self._tracer.active_span, scope1.span, "child1 was not activated" ) - self.assertEqual( - scope1.span.context.parent_id, root_scope.span.context.span_id - ) + context1 = cast(jaeger_client.SpanContext, scope1.span.context) + self.assertEqual(context1.parent_id, root_context.span_id) scope2 = start_active_span_follows_from( "child2", @@ -109,17 +120,18 @@ class LogContextScopeManagerTestCase(TestCase): tracer=self._tracer, ) self.assertEqual(self._tracer.active_span, scope2.span) - self.assertEqual( - scope2.span.context.parent_id, scope1.span.context.span_id - ) + context2 = cast(jaeger_client.SpanContext, scope2.span.context) + self.assertEqual(context2.parent_id, context1.span_id) with scope1, scope2: pass # the root scope should be restored self.assertEqual(self._tracer.active_span, root_scope.span) - self.assertIsNotNone(scope2.span.end_time) - self.assertIsNotNone(scope1.span.end_time) + span2 = cast(jaeger_client.Span, scope2.span) + span1 = cast(jaeger_client.Span, scope1.span) + self.assertIsNotNone(span2.end_time) + self.assertIsNotNone(span1.end_time) self.assertIsNone(self._tracer.active_span) -- cgit 1.5.1 From 43adf2521cc6952dcc7f0e3006dbfe52db85721a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 25 Jul 2022 10:21:06 +0100 Subject: Refactor presence so we can prune user in room caches (#13313) See #10826 and #10786 for context as to why we had to disable pruning on those caches. Now that `get_users_who_share_room_with_user` is called frequently only for presence, we just need to make calls to it less frequent and then we can remove the various levels of caching that is going on. --- changelog.d/13313.misc | 1 + synapse/handlers/presence.py | 112 +++++++++------------------ synapse/storage/_base.py | 4 + synapse/storage/databases/main/roommember.py | 83 ++++++++++++++++---- 4 files changed, 109 insertions(+), 91 deletions(-) create mode 100644 changelog.d/13313.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13313.misc b/changelog.d/13313.misc new file mode 100644 index 0000000000..0f3c1f0afd --- /dev/null +++ b/changelog.d/13313.misc @@ -0,0 +1 @@ +Change `get_users_in_room` and `get_rooms_for_user` caches to enable pruning of old entries. diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 895ea63ed3..741504ba9f 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -34,7 +34,6 @@ from typing import ( Callable, Collection, Dict, - FrozenSet, Generator, Iterable, List, @@ -42,7 +41,6 @@ from typing import ( Set, Tuple, Type, - Union, ) from prometheus_client import Counter @@ -68,7 +66,6 @@ from synapse.storage.databases.main import DataStore from synapse.streams import EventSource from synapse.types import JsonDict, StreamKeyType, UserID, get_domain_from_id from synapse.util.async_helpers import Linearizer -from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.metrics import Measure from synapse.util.wheel_timer import WheelTimer @@ -1656,15 +1653,18 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): # doesn't return. C.f. #5503. return [], max_token - # Figure out which other users this user should receive updates for - users_interested_in = await self._get_interested_in(user, explicit_room_id) + # Figure out which other users this user should explicitly receive + # updates for + additional_users_interested_in = ( + await self.get_presence_router().get_interested_users(user.to_string()) + ) # We have a set of users that we're interested in the presence of. We want to # cross-reference that with the users that have actually changed their presence. # Check whether this user should see all user updates - if users_interested_in == PresenceRouter.ALL_USERS: + if additional_users_interested_in == PresenceRouter.ALL_USERS: # Provide presence state for all users presence_updates = await self._filter_all_presence_updates_for_user( user_id, include_offline, from_key @@ -1673,34 +1673,47 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): return presence_updates, max_token # Make mypy happy. users_interested_in should now be a set - assert not isinstance(users_interested_in, str) + assert not isinstance(additional_users_interested_in, str) + + # We always care about our own presence. + additional_users_interested_in.add(user_id) + + if explicit_room_id: + user_ids = await self.store.get_users_in_room(explicit_room_id) + additional_users_interested_in.update(user_ids) # The set of users that we're interested in and that have had a presence update. # We'll actually pull the presence updates for these users at the end. - interested_and_updated_users: Union[Set[str], FrozenSet[str]] = set() + interested_and_updated_users: Collection[str] if from_key is not None: # First get all users that have had a presence update updated_users = stream_change_cache.get_all_entities_changed(from_key) # Cross-reference users we're interested in with those that have had updates. - # Use a slightly-optimised method for processing smaller sets of updates. - if updated_users is not None and len(updated_users) < 500: - # For small deltas, it's quicker to get all changes and then - # cross-reference with the users we're interested in + if updated_users is not None: + # If we have the full list of changes for presence we can + # simply check which ones share a room with the user. get_updates_counter.labels("stream").inc() - for other_user_id in updated_users: - if other_user_id in users_interested_in: - # mypy thinks this variable could be a FrozenSet as it's possibly set - # to one in the `get_entities_changed` call below, and `add()` is not - # method on a FrozenSet. That doesn't affect us here though, as - # `interested_and_updated_users` is clearly a set() above. - interested_and_updated_users.add(other_user_id) # type: ignore + + sharing_users = await self.store.do_users_share_a_room( + user_id, updated_users + ) + + interested_and_updated_users = ( + sharing_users.union(additional_users_interested_in) + ).intersection(updated_users) + else: # Too many possible updates. Find all users we can see and check # if any of them have changed. get_updates_counter.labels("full").inc() + users_interested_in = ( + await self.store.get_users_who_share_room_with_user(user_id) + ) + users_interested_in.update(additional_users_interested_in) + interested_and_updated_users = ( stream_change_cache.get_entities_changed( users_interested_in, from_key @@ -1709,7 +1722,10 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): else: # No from_key has been specified. Return the presence for all users # this user is interested in - interested_and_updated_users = users_interested_in + interested_and_updated_users = ( + await self.store.get_users_who_share_room_with_user(user_id) + ) + interested_and_updated_users.update(additional_users_interested_in) # Retrieve the current presence state for each user users_to_state = await self.get_presence_handler().current_state_for_users( @@ -1804,62 +1820,6 @@ class PresenceEventSource(EventSource[int, UserPresenceState]): def get_current_key(self) -> int: return self.store.get_current_presence_token() - @cached(num_args=2, cache_context=True) - async def _get_interested_in( - self, - user: UserID, - explicit_room_id: Optional[str] = None, - cache_context: Optional[_CacheContext] = None, - ) -> Union[Set[str], str]: - """Returns the set of users that the given user should see presence - updates for. - - Args: - user: The user to retrieve presence updates for. - explicit_room_id: The users that are in the room will be returned. - - Returns: - A set of user IDs to return presence updates for, or "ALL" to return all - known updates. - """ - user_id = user.to_string() - users_interested_in = set() - users_interested_in.add(user_id) # So that we receive our own presence - - # cache_context isn't likely to ever be None due to the @cached decorator, - # but we can't have a non-optional argument after the optional argument - # explicit_room_id either. Assert cache_context is not None so we can use it - # without mypy complaining. - assert cache_context - - # Check with the presence router whether we should poll additional users for - # their presence information - additional_users = await self.get_presence_router().get_interested_users( - user.to_string() - ) - if additional_users == PresenceRouter.ALL_USERS: - # If the module requested that this user see the presence updates of *all* - # users, then simply return that instead of calculating what rooms this - # user shares - return PresenceRouter.ALL_USERS - - # Add the additional users from the router - users_interested_in.update(additional_users) - - # Find the users who share a room with this user - users_who_share_room = await self.store.get_users_who_share_room_with_user( - user_id, on_invalidate=cache_context.invalidate - ) - users_interested_in.update(users_who_share_room) - - if explicit_room_id: - user_ids = await self.store.get_users_in_room( - explicit_room_id, on_invalidate=cache_context.invalidate - ) - users_interested_in.update(user_ids) - - return users_interested_in - def handle_timeouts( user_states: List[UserPresenceState], diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a2f8310388..e30f9c76d4 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -80,6 +80,10 @@ class SQLBaseStore(metaclass=ABCMeta): ) self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,)) + # There's no easy way of invalidating this cache for just the users + # that have changed, so we just clear the entire thing. + self._attempt_to_invalidate_cache("does_pair_of_users_share_a_room", None) + for user_id in members_changed: self._attempt_to_invalidate_cache( "get_user_in_room_with_profile", (room_id, user_id) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index df6b82660e..e2cccc688c 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -21,6 +21,7 @@ from typing import ( FrozenSet, Iterable, List, + Mapping, Optional, Set, Tuple, @@ -55,6 +56,7 @@ from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import _CacheContext, cached, cachedList +from synapse.util.iterutils import batch_iter from synapse.util.metrics import Measure if TYPE_CHECKING: @@ -183,7 +185,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): self._check_safe_current_state_events_membership_updated_txn, ) - @cached(max_entries=100000, iterable=True, prune_unread_entries=False) + @cached(max_entries=100000, iterable=True) async def get_users_in_room(self, room_id: str) -> List[str]: return await self.db_pool.runInteraction( "get_users_in_room", self.get_users_in_room_txn, room_id @@ -561,7 +563,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): return results_dict.get("membership"), results_dict.get("event_id") - @cached(max_entries=500000, iterable=True, prune_unread_entries=False) + @cached(max_entries=500000, iterable=True) async def get_rooms_for_user_with_stream_ordering( self, user_id: str ) -> FrozenSet[GetRoomsForUserWithStreamOrdering]: @@ -732,25 +734,76 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) return frozenset(r.room_id for r in rooms) - @cached( - max_entries=500000, - cache_context=True, - iterable=True, - prune_unread_entries=False, + @cached(max_entries=10000) + async def does_pair_of_users_share_a_room( + self, user_id: str, other_user_id: str + ) -> bool: + raise NotImplementedError() + + @cachedList( + cached_method_name="does_pair_of_users_share_a_room", list_name="other_user_ids" ) - async def get_users_who_share_room_with_user( - self, user_id: str, cache_context: _CacheContext + async def _do_users_share_a_room( + self, user_id: str, other_user_ids: Collection[str] + ) -> Mapping[str, Optional[bool]]: + """Return mapping from user ID to whether they share a room with the + given user. + + Note: `None` and `False` are equivalent and mean they don't share a + room. + """ + + def do_users_share_a_room_txn( + txn: LoggingTransaction, user_ids: Collection[str] + ) -> Dict[str, bool]: + clause, args = make_in_list_sql_clause( + self.database_engine, "state_key", user_ids + ) + + # This query works by fetching both the list of rooms for the target + # user and the set of other users, and then checking if there is any + # overlap. + sql = f""" + SELECT b.state_key + FROM ( + SELECT room_id FROM current_state_events + WHERE type = 'm.room.member' AND membership = 'join' AND state_key = ? + ) AS a + INNER JOIN ( + SELECT room_id, state_key FROM current_state_events + WHERE type = 'm.room.member' AND membership = 'join' AND {clause} + ) AS b using (room_id) + LIMIT 1 + """ + + txn.execute(sql, (user_id, *args)) + return {u: True for u, in txn} + + to_return = {} + for batch_user_ids in batch_iter(other_user_ids, 1000): + res = await self.db_pool.runInteraction( + "do_users_share_a_room", do_users_share_a_room_txn, batch_user_ids + ) + to_return.update(res) + + return to_return + + async def do_users_share_a_room( + self, user_id: str, other_user_ids: Collection[str] ) -> Set[str]: + """Return the set of users who share a room with the first users""" + + user_dict = await self._do_users_share_a_room(user_id, other_user_ids) + + return {u for u, share_room in user_dict.items() if share_room} + + async def get_users_who_share_room_with_user(self, user_id: str) -> Set[str]: """Returns the set of users who share a room with `user_id`""" - room_ids = await self.get_rooms_for_user( - user_id, on_invalidate=cache_context.invalidate - ) + room_ids = await self.get_rooms_for_user(user_id) user_who_share_room = set() for room_id in room_ids: - user_ids = await self.get_users_in_room( - room_id, on_invalidate=cache_context.invalidate - ) + user_ids = await self.get_users_in_room(room_id) user_who_share_room.update(user_ids) return user_who_share_room -- cgit 1.5.1 From 8b603299bf2d15bef1db867e08ac90c0f753cc32 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 26 Jul 2022 07:19:20 -0400 Subject: Remove unused argument for get_relations_for_event. (#13383) --- changelog.d/13383.misc | 1 + synapse/handlers/relations.py | 3 --- synapse/storage/databases/main/relations.py | 6 ------ 3 files changed, 1 insertion(+), 9 deletions(-) create mode 100644 changelog.d/13383.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13383.misc b/changelog.d/13383.misc new file mode 100644 index 0000000000..2236eced24 --- /dev/null +++ b/changelog.d/13383.misc @@ -0,0 +1 @@ +Remove an unused argument to `get_relations_for_event`. diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index 0b63cd2186..8f797e3ae9 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -73,7 +73,6 @@ class RelationsHandler: room_id: str, relation_type: Optional[str] = None, event_type: Optional[str] = None, - aggregation_key: Optional[str] = None, limit: int = 5, direction: str = "b", from_token: Optional[StreamToken] = None, @@ -89,7 +88,6 @@ class RelationsHandler: room_id: The room the event belongs to. relation_type: Only fetch events with this relation type, if given. event_type: Only fetch events with this event type, if given. - aggregation_key: Only fetch events with this aggregation key, if given. limit: Only fetch the most recent `limit` events. direction: Whether to fetch the most recent first (`"b"`) or the oldest first (`"f"`). @@ -122,7 +120,6 @@ class RelationsHandler: room_id=room_id, relation_type=relation_type, event_type=event_type, - aggregation_key=aggregation_key, limit=limit, direction=direction, from_token=from_token, diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index b457bc189e..7bd27790eb 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -62,7 +62,6 @@ class RelationsWorkerStore(SQLBaseStore): room_id: str, relation_type: Optional[str] = None, event_type: Optional[str] = None, - aggregation_key: Optional[str] = None, limit: int = 5, direction: str = "b", from_token: Optional[StreamToken] = None, @@ -76,7 +75,6 @@ class RelationsWorkerStore(SQLBaseStore): room_id: The room the event belongs to. relation_type: Only fetch events with this relation type, if given. event_type: Only fetch events with this event type, if given. - aggregation_key: Only fetch events with this aggregation key, if given. limit: Only fetch the most recent `limit` events. direction: Whether to fetch the most recent first (`"b"`) or the oldest first (`"f"`). @@ -105,10 +103,6 @@ class RelationsWorkerStore(SQLBaseStore): where_clause.append("type = ?") where_args.append(event_type) - if aggregation_key: - where_clause.append("aggregation_key = ?") - where_args.append(aggregation_key) - pagination_clause = generate_pagination_where_clause( direction=direction, column_names=("topological_ordering", "stream_ordering"), -- cgit 1.5.1 From ca3db044a3b5a207ff8d65ad7b761427ab215ccc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 26 Jul 2022 12:47:31 +0100 Subject: Fix infinite loop in partial-state resync (#13353) Make sure that we only pull out events from the db once they have no prev-events with partial state. --- changelog.d/13353.bugfix | 1 + synapse/handlers/federation_event.py | 14 +++++++------- synapse/storage/databases/main/events_worker.py | 20 +++++++++++++++++++- 3 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 changelog.d/13353.bugfix (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13353.bugfix b/changelog.d/13353.bugfix new file mode 100644 index 0000000000..8e18bfae1f --- /dev/null +++ b/changelog.d/13353.bugfix @@ -0,0 +1 @@ +Fix a bug in the experimental faster-room-joins support which could cause it to get stuck in an infinite loop. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index fc1254d2ad..2ba2b1527e 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -569,15 +569,15 @@ class FederationEventHandler: if context is None or context.partial_state: # this can happen if some or all of the event's prev_events still have - # partial state - ie, an event has an earlier stream_ordering than one - # or more of its prev_events, so we de-partial-state it before its - # prev_events. + # partial state. We were careful to only pick events from the db without + # partial-state prev events, so that implies that a prev event has + # been persisted (with partial state) since we did the query. # - # TODO(faster_joins): we probably need to be more intelligent, and - # exclude partial-state prev_events from consideration - # https://github.com/matrix-org/synapse/issues/13001 + # So, let's just ignore `event` for now; when we re-run the db query + # we should instead get its partial-state prev event, which we will + # de-partial-state, and then come back to event. logger.warning( - "%s still has partial state: can't de-partial-state it yet", + "%s still has prev_events with partial state: can't de-partial-state it yet", event.event_id, ) return diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 5914a35420..29c99c6357 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -2110,11 +2110,29 @@ class EventsWorkerStore(SQLBaseStore): def _get_partial_state_events_batch_txn( txn: LoggingTransaction, room_id: str ) -> List[str]: + # we want to work through the events from oldest to newest, so + # we only want events whose prev_events do *not* have partial state - hence + # the 'NOT EXISTS' clause in the below. + # + # This is necessary because ordering by stream ordering isn't quite enough + # to ensure that we work from oldest to newest event (in particular, + # if an event is initially persisted as an outlier and later de-outliered, + # it can end up with a lower stream_ordering than its prev_events). + # + # Typically this means we'll only return one event per batch, but that's + # hard to do much about. + # + # See also: https://github.com/matrix-org/synapse/issues/13001 txn.execute( """ SELECT event_id FROM partial_state_events AS pse JOIN events USING (event_id) - WHERE pse.room_id = ? + WHERE pse.room_id = ? AND + NOT EXISTS( + SELECT 1 FROM event_edges AS ee + JOIN partial_state_events AS prev_pse ON (prev_pse.event_id=ee.prev_event_id) + WHERE ee.event_id=pse.event_id + ) ORDER BY events.stream_ordering LIMIT 100 """, -- cgit 1.5.1 From 583f22780f44157c50bc2dc5c242e88cc18c7886 Mon Sep 17 00:00:00 2001 From: Šimon Brandner Date: Wed, 27 Jul 2022 20:46:57 +0200 Subject: Use stable prefixes for MSC3827: filtering of `/publicRooms` by room type (#13370) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Šimon Brandner --- changelog.d/13370.feature | 1 + synapse/api/constants.py | 2 +- synapse/config/experimental.py | 3 --- synapse/handlers/room_list.py | 2 +- synapse/rest/client/versions.py | 4 ++-- synapse/storage/databases/main/room.py | 2 +- tests/rest/client/test_rooms.py | 5 ++--- 7 files changed, 8 insertions(+), 11 deletions(-) create mode 100644 changelog.d/13370.feature (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13370.feature b/changelog.d/13370.feature new file mode 100644 index 0000000000..3a49bc2778 --- /dev/null +++ b/changelog.d/13370.feature @@ -0,0 +1 @@ +Use stable prefixes for [MSC3827](https://github.com/matrix-org/matrix-spec-proposals/pull/3827). diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 2653764119..789859e69e 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -268,4 +268,4 @@ class PublicRoomsFilterFields: """ GENERIC_SEARCH_TERM: Final = "generic_search_term" - ROOM_TYPES: Final = "org.matrix.msc3827.room_types" + ROOM_TYPES: Final = "room_types" diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 1902222d7b..c2ecd977cd 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -88,8 +88,5 @@ class ExperimentalConfig(Config): # MSC3715: dir param on /relations. self.msc3715_enabled: bool = experimental.get("msc3715_enabled", False) - # MSC3827: Filtering of /publicRooms by room type - self.msc3827_enabled: bool = experimental.get("msc3827_enabled", False) - # MSC3848: Introduce errcodes for specific event sending failures self.msc3848_enabled: bool = experimental.get("msc3848_enabled", False) diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py index 29868eb743..bb0bdb8e6f 100644 --- a/synapse/handlers/room_list.py +++ b/synapse/handlers/room_list.py @@ -182,7 +182,7 @@ class RoomListHandler: == HistoryVisibility.WORLD_READABLE, "guest_can_join": room["guest_access"] == "can_join", "join_rule": room["join_rules"], - "org.matrix.msc3827.room_type": room["room_type"], + "room_type": room["room_type"], } # Filter out Nones – rather omit the field altogether diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index f4f06563dd..0366986755 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -95,8 +95,8 @@ class VersionsRestServlet(RestServlet): "org.matrix.msc3026.busy_presence": self.config.experimental.msc3026_enabled, # Supports receiving private read receipts as per MSC2285 "org.matrix.msc2285": self.config.experimental.msc2285_enabled, - # Supports filtering of /publicRooms by room type MSC3827 - "org.matrix.msc3827": self.config.experimental.msc3827_enabled, + # Supports filtering of /publicRooms by room type as per MSC3827 + "org.matrix.msc3827.stable": True, # Adds support for importing historical messages as per MSC2716 "org.matrix.msc2716": self.config.experimental.msc2716_enabled, # Adds support for jump to date endpoints (/timestamp_to_event) as per MSC3030 diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index d6d485507b..0f1f0d11ea 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -207,7 +207,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): def _construct_room_type_where_clause( self, room_types: Union[List[Union[str, None]], None] ) -> Tuple[Union[str, None], List[str]]: - if not room_types or not self.config.experimental.msc3827_enabled: + if not room_types: return None, [] else: # We use None when we want get rooms without a type diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 2272d55d84..aa2f578441 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -2070,7 +2070,6 @@ class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase): config = self.default_config() config["allow_public_rooms_without_auth"] = True - config["experimental_features"] = {"msc3827_enabled": True} self.hs = self.setup_test_homeserver(config=config) self.url = b"/_matrix/client/r0/publicRooms" @@ -2123,13 +2122,13 @@ class PublicRoomsRoomTypeFilterTestCase(unittest.HomeserverTestCase): chunk, count = self.make_public_rooms_request([None]) self.assertEqual(count, 1) - self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), None) + self.assertEqual(chunk[0].get("room_type", None), None) def test_returns_only_space_based_on_filter(self) -> None: chunk, count = self.make_public_rooms_request(["m.space"]) self.assertEqual(count, 1) - self.assertEqual(chunk[0].get("org.matrix.msc3827.room_type", None), "m.space") + self.assertEqual(chunk[0].get("room_type", None), "m.space") def test_returns_both_rooms_and_space_based_on_filter(self) -> None: chunk, count = self.make_public_rooms_request(["m.space", None]) -- cgit 1.5.1 From 23768ccb4d00ae6d4c01d30178ba223a4bbb10f2 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 1 Aug 2022 11:20:05 +0100 Subject: Faster joins: fix rejected events becoming un-rejected during resync (#13413) Make sure that we re-check the auth rules during state resync, otherwise rejected events get un-rejected. --- changelog.d/13413.bugfix | 1 + synapse/handlers/federation_event.py | 29 ++++++++++++++++++++++++++--- synapse/storage/databases/main/state.py | 8 +++++--- 3 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 changelog.d/13413.bugfix (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13413.bugfix b/changelog.d/13413.bugfix new file mode 100644 index 0000000000..a0ce884274 --- /dev/null +++ b/changelog.d/13413.bugfix @@ -0,0 +1 @@ +Faster room joins: fix a bug which caused rejected events to become un-rejected during state syncing. \ No newline at end of file diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 2ba2b1527e..bcc755a376 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -581,6 +581,13 @@ class FederationEventHandler: event.event_id, ) return + + # since the state at this event has changed, we should now re-evaluate + # whether it should have been rejected. We must already have all of the + # auth events (from last time we went round this path), so there is no + # need to pass the origin. + await self._check_event_auth(None, event, context) + await self._store.update_state_for_partial_state_event(event, context) self._state_storage_controller.notify_event_un_partial_stated( event.event_id @@ -1624,13 +1631,15 @@ class FederationEventHandler: ) async def _check_event_auth( - self, origin: str, event: EventBase, context: EventContext + self, origin: Optional[str], event: EventBase, context: EventContext ) -> None: """ Checks whether an event should be rejected (for failing auth checks). Args: - origin: The host the event originates from. + origin: The host the event originates from. This is used to fetch + any missing auth events. It can be set to None, but only if we are + sure that we already have all the auth events. event: The event itself. context: The event context. @@ -1876,7 +1885,7 @@ class FederationEventHandler: event.internal_metadata.soft_failed = True async def _load_or_fetch_auth_events_for_event( - self, destination: str, event: EventBase + self, destination: Optional[str], event: EventBase ) -> Collection[EventBase]: """Fetch this event's auth_events, from database or remote @@ -1892,12 +1901,19 @@ class FederationEventHandler: Args: destination: where to send the /event_auth request. Typically the server that sent us `event` in the first place. + + If this is None, no attempt is made to load any missing auth events: + rather, an AssertionError is raised if there are any missing events. + event: the event whose auth_events we want Returns: all of the events listed in `event.auth_events_ids`, after deduplication Raises: + AssertionError if some auth events were missing and no `destination` was + supplied. + AuthError if we were unable to fetch the auth_events for any reason. """ event_auth_event_ids = set(event.auth_event_ids()) @@ -1909,6 +1925,13 @@ class FederationEventHandler: ) if not missing_auth_event_ids: return event_auth_events.values() + if destination is None: + # this shouldn't happen: destination must be set unless we know we have already + # persisted the auth events. + raise AssertionError( + "_load_or_fetch_auth_events_for_event() called with no destination for " + "an event with missing auth_events" + ) logger.info( "Event %s refers to unknown auth events %s: fetching auth chain", diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 9674c4a757..f70705a0af 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -419,13 +419,15 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): # anything that was rejected should have the same state as its # predecessor. if context.rejected: - assert context.state_group == context.state_group_before_event + state_group = context.state_group_before_event + else: + state_group = context.state_group self.db_pool.simple_update_txn( txn, table="event_to_state_groups", keyvalues={"event_id": event.event_id}, - updatevalues={"state_group": context.state_group}, + updatevalues={"state_group": state_group}, ) self.db_pool.simple_delete_one_txn( @@ -440,7 +442,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): txn.call_after( self._get_state_group_for_event.prefill, (event.event_id,), - context.state_group, + state_group, ) -- cgit 1.5.1 From 92d21faf12c982a8d27ad465eb94f2fed0e8b32f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 3 Aug 2022 10:57:38 -0500 Subject: Instrument `/messages` for understandable traces in Jaeger (#13368) In Jaeger: - Before: huge list of uncategorized database calls - After: nice and collapsible into units of work --- changelog.d/13368.misc | 1 + synapse/api/auth.py | 8 +++++++- synapse/federation/federation_client.py | 2 ++ synapse/handlers/federation.py | 2 ++ synapse/handlers/federation_event.py | 5 +++++ synapse/handlers/pagination.py | 2 ++ synapse/handlers/relations.py | 2 ++ synapse/storage/controllers/state.py | 5 +++++ synapse/storage/databases/main/stream.py | 2 ++ synapse/streams/events.py | 2 ++ synapse/visibility.py | 2 ++ 11 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13368.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13368.misc b/changelog.d/13368.misc new file mode 100644 index 0000000000..4b433a5107 --- /dev/null +++ b/changelog.d/13368.misc @@ -0,0 +1 @@ +Instrument `/messages` for understandable traces in Jaeger. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 82e6475ef5..523bad0c55 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -31,7 +31,12 @@ from synapse.api.errors import ( from synapse.appservice import ApplicationService from synapse.http import get_request_user_agent from synapse.http.site import SynapseRequest -from synapse.logging.opentracing import active_span, force_tracing, start_active_span +from synapse.logging.opentracing import ( + active_span, + force_tracing, + start_active_span, + trace, +) from synapse.storage.databases.main.registration import TokenLookupResult from synapse.types import Requester, UserID, create_requester @@ -567,6 +572,7 @@ class Auth: return query_params[0].decode("ascii") + @trace async def check_user_in_room_or_world_readable( self, room_id: str, user_id: str, allow_departed_users: bool = False ) -> Tuple[str, Optional[str]]: diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 6a8d76529b..54ffbd8170 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -61,6 +61,7 @@ from synapse.federation.federation_base import ( ) from synapse.federation.transport.client import SendJoinResponse from synapse.http.types import QueryParams +from synapse.logging.opentracing import trace from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache @@ -233,6 +234,7 @@ class FederationClient(FederationBase): destination, content, timeout ) + @trace async def backfill( self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> Optional[List[EventBase]]: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 57ad6e5dce..30f1585a85 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,6 +59,7 @@ from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context +from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -180,6 +181,7 @@ class FederationHandler: "resume_sync_partial_state_room", self._resume_sync_partial_state_room ) + @trace async def maybe_backfill( self, room_id: str, current_depth: int, limit: int ) -> bool: diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 91d1439191..8968b705d4 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -59,6 +59,7 @@ from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.federation.federation_client import InvalidResponseError from synapse.logging.context import nested_logging_context +from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet from synapse.replication.http.federation import ( @@ -566,6 +567,7 @@ class FederationEventHandler: event.event_id ) + @trace async def backfill( self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> None: @@ -610,6 +612,7 @@ class FederationEventHandler: backfilled=True, ) + @trace async def _get_missing_events_for_pdu( self, origin: str, pdu: EventBase, prevs: Set[str], min_depth: int ) -> None: @@ -710,6 +713,7 @@ class FederationEventHandler: logger.info("Got %d prev_events", len(missing_events)) await self._process_pulled_events(origin, missing_events, backfilled=False) + @trace async def _process_pulled_events( self, origin: str, events: Iterable[EventBase], backfilled: bool ) -> None: @@ -748,6 +752,7 @@ class FederationEventHandler: with nested_logging_context(ev.event_id): await self._process_pulled_event(origin, ev, backfilled=backfilled) + @trace async def _process_pulled_event( self, origin: str, event: EventBase, backfilled: bool ) -> None: diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 6262a35822..e1e34e3b16 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -24,6 +24,7 @@ from synapse.api.errors import SynapseError from synapse.api.filtering import Filter from synapse.events.utils import SerializeEventConfig from synapse.handlers.room import ShutdownRoomResponse +from synapse.logging.opentracing import trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig @@ -416,6 +417,7 @@ class PaginationHandler: await self._storage_controllers.purge_events.purge_room(room_id) + @trace async def get_messages( self, requester: Requester, diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index 8f797e3ae9..72d25df8c8 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -19,6 +19,7 @@ import attr from synapse.api.constants import RelationTypes from synapse.api.errors import SynapseError from synapse.events import EventBase, relation_from_event +from synapse.logging.opentracing import trace from synapse.storage.databases.main.relations import _RelatedEvent from synapse.types import JsonDict, Requester, StreamToken, UserID from synapse.visibility import filter_events_for_client @@ -361,6 +362,7 @@ class RelationsHandler: return results + @trace async def get_bundled_aggregations( self, events: Iterable[EventBase], user_id: str ) -> Dict[str, BundledAggregations]: diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 1e35046e07..0d480f1014 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -29,6 +29,7 @@ from typing import ( from synapse.api.constants import EventTypes from synapse.events import EventBase +from synapse.logging.opentracing import trace from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( PartialCurrentStateTracker, @@ -179,6 +180,7 @@ class StateStorageController: return self.stores.state._get_state_groups_from_groups(groups, state_filter) + @trace async def get_state_for_events( self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None ) -> Dict[str, StateMap[EventBase]]: @@ -225,6 +227,7 @@ class StateStorageController: return {event: event_to_state[event] for event in event_ids} + @trace async def get_state_ids_for_events( self, event_ids: Collection[str], @@ -287,6 +290,7 @@ class StateStorageController: ) return state_map[event_id] + @trace async def get_state_ids_for_event( self, event_id: str, state_filter: Optional[StateFilter] = None ) -> StateMap[str]: @@ -327,6 +331,7 @@ class StateStorageController: groups, state_filter or StateFilter.all() ) + @trace async def get_state_group_for_events( self, event_ids: Collection[str], diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 2590b52f73..a347430aa7 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -58,6 +58,7 @@ from twisted.internet import defer from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background +from synapse.logging.opentracing import trace from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, @@ -1346,6 +1347,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): return rows, next_token + @trace async def paginate_room_events( self, room_id: str, diff --git a/synapse/streams/events.py b/synapse/streams/events.py index 54e0b1a23b..bcd840bd88 100644 --- a/synapse/streams/events.py +++ b/synapse/streams/events.py @@ -21,6 +21,7 @@ from synapse.handlers.presence import PresenceEventSource from synapse.handlers.receipts import ReceiptEventSource from synapse.handlers.room import RoomEventSource from synapse.handlers.typing import TypingNotificationEventSource +from synapse.logging.opentracing import trace from synapse.streams import EventSource from synapse.types import StreamToken @@ -69,6 +70,7 @@ class EventSources: ) return token + @trace async def get_current_token_for_pagination(self, room_id: str) -> StreamToken: """Get the current token for a given room to be used to paginate events. diff --git a/synapse/visibility.py b/synapse/visibility.py index 9abbaa5a64..d947edde66 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -23,6 +23,7 @@ from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.events.utils import prune_event +from synapse.logging.opentracing import trace from synapse.storage.controllers import StorageControllers from synapse.storage.databases.main import DataStore from synapse.storage.state import StateFilter @@ -51,6 +52,7 @@ MEMBERSHIP_PRIORITY = ( _HISTORY_VIS_KEY: Final[Tuple[str, str]] = (EventTypes.RoomHistoryVisibility, "") +@trace async def filter_events_for_client( storage: StorageControllers, user_id: str, -- cgit 1.5.1 From 41320a0554716aaf7cec6172da98e002c48344c5 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Thu, 4 Aug 2022 15:49:55 +0100 Subject: Optimise async get event lookups (#13435) Still maintains local in memory lookup optimisation, but does any external lookup as part of the deferred that prevents duplicate lookups for the same event at once. This makes the assumption that fetching from an external cache is a non-zero load operation. --- changelog.d/13435.misc | 1 + synapse/storage/databases/main/events_worker.py | 75 ++++++++++++++++++++++--- synapse/storage/databases/main/roommember.py | 2 +- synapse/util/caches/lrucache.py | 17 ++++++ 4 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 changelog.d/13435.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13435.misc b/changelog.d/13435.misc new file mode 100644 index 0000000000..c01b9136c8 --- /dev/null +++ b/changelog.d/13435.misc @@ -0,0 +1 @@ +Prevent unnecessary lookups to any external `get_event` cache. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 29c99c6357..e9ff6cfb34 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -600,7 +600,11 @@ class EventsWorkerStore(SQLBaseStore): Returns: map from event id to result """ - event_entry_map = await self._get_events_from_cache( + # Shortcut: check if we have any events in the *in memory* cache - this function + # may be called repeatedly for the same event so at this point we cannot reach + # out to any external cache for performance reasons. The external cache is + # checked later on in the `get_missing_events_from_cache_or_db` function below. + event_entry_map = self._get_events_from_local_cache( event_ids, ) @@ -632,7 +636,9 @@ class EventsWorkerStore(SQLBaseStore): if missing_events_ids: - async def get_missing_events_from_db() -> Dict[str, EventCacheEntry]: + async def get_missing_events_from_cache_or_db() -> Dict[ + str, EventCacheEntry + ]: """Fetches the events in `missing_event_ids` from the database. Also creates entries in `self._current_event_fetches` to allow @@ -657,10 +663,18 @@ class EventsWorkerStore(SQLBaseStore): # the events have been redacted, and if so pulling the redaction event # out of the database to check it. # + missing_events = {} try: - missing_events = await self._get_events_from_db( + # Try to fetch from any external cache. We already checked the + # in-memory cache above. + missing_events = await self._get_events_from_external_cache( missing_events_ids, ) + # Now actually fetch any remaining events from the DB + db_missing_events = await self._get_events_from_db( + missing_events_ids - missing_events.keys(), + ) + missing_events.update(db_missing_events) except Exception as e: with PreserveLoggingContext(): fetching_deferred.errback(e) @@ -679,7 +693,7 @@ class EventsWorkerStore(SQLBaseStore): # cancellations, since multiple `_get_events_from_cache_or_db` calls can # reuse the same fetch. missing_events: Dict[str, EventCacheEntry] = await delay_cancellation( - get_missing_events_from_db() + get_missing_events_from_cache_or_db() ) event_entry_map.update(missing_events) @@ -754,7 +768,54 @@ class EventsWorkerStore(SQLBaseStore): async def _get_events_from_cache( self, events: Iterable[str], update_metrics: bool = True ) -> Dict[str, EventCacheEntry]: - """Fetch events from the caches. + """Fetch events from the caches, both in memory and any external. + + May return rejected events. + + Args: + events: list of event_ids to fetch + update_metrics: Whether to update the cache hit ratio metrics + """ + event_map = self._get_events_from_local_cache( + events, update_metrics=update_metrics + ) + + missing_event_ids = (e for e in events if e not in event_map) + event_map.update( + await self._get_events_from_external_cache( + events=missing_event_ids, + update_metrics=update_metrics, + ) + ) + + return event_map + + async def _get_events_from_external_cache( + self, events: Iterable[str], update_metrics: bool = True + ) -> Dict[str, EventCacheEntry]: + """Fetch events from any configured external cache. + + May return rejected events. + + Args: + events: list of event_ids to fetch + update_metrics: Whether to update the cache hit ratio metrics + """ + event_map = {} + + for event_id in events: + ret = await self._get_event_cache.get_external( + (event_id,), None, update_metrics=update_metrics + ) + if ret: + event_map[event_id] = ret + + return event_map + + def _get_events_from_local_cache( + self, events: Iterable[str], update_metrics: bool = True + ) -> Dict[str, EventCacheEntry]: + """Fetch events from the local, in memory, caches. May return rejected events. @@ -766,7 +827,7 @@ class EventsWorkerStore(SQLBaseStore): for event_id in events: # First check if it's in the event cache - ret = await self._get_event_cache.get( + ret = self._get_event_cache.get_local( (event_id,), None, update_metrics=update_metrics ) if ret: @@ -788,7 +849,7 @@ class EventsWorkerStore(SQLBaseStore): # We add the entry back into the cache as we want to keep # recently queried events in the cache. - await self._get_event_cache.set((event_id,), cache_entry) + self._get_event_cache.set_local((event_id,), cache_entry) return event_map diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index e2cccc688c..93ff4816c8 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -896,7 +896,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): # We don't update the event cache hit ratio as it completely throws off # the hit ratio counts. After all, we don't populate the cache if we # miss it here - event_map = await self._get_events_from_cache( + event_map = self._get_events_from_local_cache( member_event_ids, update_metrics=False ) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index b3bdedb04c..aa93109d13 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -834,9 +834,26 @@ class AsyncLruCache(Generic[KT, VT]): ) -> Optional[VT]: return self._lru_cache.get(key, update_metrics=update_metrics) + async def get_external( + self, + key: KT, + default: Optional[T] = None, + update_metrics: bool = True, + ) -> Optional[VT]: + # This method should fetch from any configured external cache, in this case noop. + return None + + def get_local( + self, key: KT, default: Optional[T] = None, update_metrics: bool = True + ) -> Optional[VT]: + return self._lru_cache.get(key, update_metrics=update_metrics) + async def set(self, key: KT, value: VT) -> None: self._lru_cache.set(key, value) + def set_local(self, key: KT, value: VT) -> None: + self._lru_cache.set(key, value) + async def invalidate(self, key: KT) -> None: # This method should invalidate any external cache and then invalidate the LruCache. return self._lru_cache.invalidate(key) -- cgit 1.5.1 From 96d92156d0f820224f68092e72d6089dceef715a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 4 Aug 2022 17:45:01 +0100 Subject: Update type of `EventContext.rejected` (#13460) --- changelog.d/13460.misc | 1 + synapse/events/snapshot.py | 7 +++---- synapse/storage/databases/main/events.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 changelog.d/13460.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13460.misc b/changelog.d/13460.misc new file mode 100644 index 0000000000..f9e9de219d --- /dev/null +++ b/changelog.d/13460.misc @@ -0,0 +1 @@ +Update type of `EventContext.rejected`. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index b700cbbfa1..d3c8083e4a 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -11,11 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Tuple import attr from frozendict import frozendict -from typing_extensions import Literal from synapse.appservice import ApplicationService from synapse.events import EventBase @@ -33,7 +32,7 @@ class EventContext: Holds information relevant to persisting an event Attributes: - rejected: A rejection reason if the event was rejected, else False + rejected: A rejection reason if the event was rejected, else None _state_group: The ID of the state group for this event. Note that state events are persisted with a state group which includes the new event, so this is @@ -85,7 +84,7 @@ class EventContext: """ _storage: "StorageControllers" - rejected: Union[Literal[False], str] = False + rejected: Optional[str] = None _state_group: Optional[int] = None state_group_before_event: Optional[int] = None _state_delta_due_to_event: Optional[StateMap[str]] = None diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 1f600f1190..5560b38a48 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1490,7 +1490,7 @@ class PersistEventsStore: event.sender, "url" in event.content and isinstance(event.content["url"], str), event.get_state_key(), - context.rejected or None, + context.rejected, ) for event, context in events_and_contexts ), -- cgit 1.5.1 From ec24813220f9d54108924dc04aecd24555277b99 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 4 Aug 2022 15:24:44 -0400 Subject: Improve comments (& avoid a duplicate query) in push actions processing. (#13455) * Adds docstrings and inline comments. * Formats SQL queries using triple quoted strings. * Minor formatting changes. * Avoid fetching `event_push_summary_stream_ordering` multiple times in the same transactions. --- changelog.d/13455.misc | 1 + .../storage/databases/main/event_push_actions.py | 282 ++++++++++++--------- 2 files changed, 159 insertions(+), 124 deletions(-) create mode 100644 changelog.d/13455.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13455.misc b/changelog.d/13455.misc new file mode 100644 index 0000000000..17462c56f3 --- /dev/null +++ b/changelog.d/13455.misc @@ -0,0 +1 @@ +Add some comments about how event push actions are stored. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index dd2627037c..5ddddb1cf3 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -265,7 +265,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas counts.notify_count += row[1] counts.unread_count += row[2] - # Next we need to count highlights, which aren't summarized + # Next we need to count highlights, which aren't summarised sql = """ SELECT COUNT(*) FROM event_push_actions WHERE user_id = ? @@ -280,7 +280,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # Finally we need to count push actions that aren't included in the # summary returned above, e.g. recent events that haven't been - # summarized yet, or the summary is empty due to a recent read receipt. + # summarised yet, or the summary is empty due to a recent read receipt. stream_ordering = max(stream_ordering, summary_stream_ordering) notify_count, unread_count = self._get_notif_unread_count_for_user_room( txn, room_id, user_id, stream_ordering @@ -304,6 +304,17 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas Does not consult `event_push_summary` table, which may include push actions that have been deleted from `event_push_actions` table. + + Args: + txn: The database transaction. + room_id: The room ID to get unread counts for. + user_id: The user ID to get unread counts for. + stream_ordering: The (exclusive) minimum stream ordering to consider. + max_stream_ordering: The (inclusive) maximum stream ordering to consider. + If this is not given, then no maximum is applied. + + Return: + A tuple of the notif count and unread count in the given range. """ # If there have been no events in the room since the stream ordering, @@ -383,27 +394,27 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) -> List[Tuple[str, str, int, str, bool]]: # find rooms that have a read receipt in them and return the next # push actions - sql = ( - "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions," - " ep.highlight " - " FROM (" - " SELECT room_id," - " MAX(stream_ordering) as stream_ordering" - " FROM events" - " INNER JOIN receipts_linearized USING (room_id, event_id)" - " WHERE receipt_type = 'm.read' AND user_id = ?" - " GROUP BY room_id" - ") AS rl," - " event_push_actions AS ep" - " WHERE" - " ep.room_id = rl.room_id" - " AND ep.stream_ordering > rl.stream_ordering" - " AND ep.user_id = ?" - " AND ep.stream_ordering > ?" - " AND ep.stream_ordering <= ?" - " AND ep.notif = 1" - " ORDER BY ep.stream_ordering ASC LIMIT ?" - ) + sql = """ + SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, + ep.highlight + FROM ( + SELECT room_id, + MAX(stream_ordering) as stream_ordering + FROM events + INNER JOIN receipts_linearized USING (room_id, event_id) + WHERE receipt_type = 'm.read' AND user_id = ? + GROUP BY room_id + ) AS rl, + event_push_actions AS ep + WHERE + ep.room_id = rl.room_id + AND ep.stream_ordering > rl.stream_ordering + AND ep.user_id = ? + AND ep.stream_ordering > ? + AND ep.stream_ordering <= ? + AND ep.notif = 1 + ORDER BY ep.stream_ordering ASC LIMIT ? + """ args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit] txn.execute(sql, args) return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall()) @@ -418,23 +429,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas def get_no_receipt( txn: LoggingTransaction, ) -> List[Tuple[str, str, int, str, bool]]: - sql = ( - "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions," - " ep.highlight " - " FROM event_push_actions AS ep" - " INNER JOIN events AS e USING (room_id, event_id)" - " WHERE" - " ep.room_id NOT IN (" - " SELECT room_id FROM receipts_linearized" - " WHERE receipt_type = 'm.read' AND user_id = ?" - " GROUP BY room_id" - " )" - " AND ep.user_id = ?" - " AND ep.stream_ordering > ?" - " AND ep.stream_ordering <= ?" - " AND ep.notif = 1" - " ORDER BY ep.stream_ordering ASC LIMIT ?" - ) + sql = """ + SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, + ep.highlight + FROM event_push_actions AS ep + INNER JOIN events AS e USING (room_id, event_id) + WHERE + ep.room_id NOT IN ( + SELECT room_id FROM receipts_linearized + WHERE receipt_type = 'm.read' AND user_id = ? + GROUP BY room_id + ) + AND ep.user_id = ? + AND ep.stream_ordering > ? + AND ep.stream_ordering <= ? + AND ep.notif = 1 + ORDER BY ep.stream_ordering ASC LIMIT ? + """ args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit] txn.execute(sql, args) return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall()) @@ -490,28 +501,28 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas def get_after_receipt( txn: LoggingTransaction, ) -> List[Tuple[str, str, int, str, bool, int]]: - sql = ( - "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions," - " ep.highlight, e.received_ts" - " FROM (" - " SELECT room_id," - " MAX(stream_ordering) as stream_ordering" - " FROM events" - " INNER JOIN receipts_linearized USING (room_id, event_id)" - " WHERE receipt_type = 'm.read' AND user_id = ?" - " GROUP BY room_id" - ") AS rl," - " event_push_actions AS ep" - " INNER JOIN events AS e USING (room_id, event_id)" - " WHERE" - " ep.room_id = rl.room_id" - " AND ep.stream_ordering > rl.stream_ordering" - " AND ep.user_id = ?" - " AND ep.stream_ordering > ?" - " AND ep.stream_ordering <= ?" - " AND ep.notif = 1" - " ORDER BY ep.stream_ordering DESC LIMIT ?" - ) + sql = """ + SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, + ep.highlight, e.received_ts + FROM ( + SELECT room_id, + MAX(stream_ordering) as stream_ordering + FROM events + INNER JOIN receipts_linearized USING (room_id, event_id) + WHERE receipt_type = 'm.read' AND user_id = ? + GROUP BY room_id + ) AS rl, + event_push_actions AS ep + INNER JOIN events AS e USING (room_id, event_id) + WHERE + ep.room_id = rl.room_id + AND ep.stream_ordering > rl.stream_ordering + AND ep.user_id = ? + AND ep.stream_ordering > ? + AND ep.stream_ordering <= ? + AND ep.notif = 1 + ORDER BY ep.stream_ordering DESC LIMIT ? + """ args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit] txn.execute(sql, args) return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall()) @@ -526,23 +537,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas def get_no_receipt( txn: LoggingTransaction, ) -> List[Tuple[str, str, int, str, bool, int]]: - sql = ( - "SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions," - " ep.highlight, e.received_ts" - " FROM event_push_actions AS ep" - " INNER JOIN events AS e USING (room_id, event_id)" - " WHERE" - " ep.room_id NOT IN (" - " SELECT room_id FROM receipts_linearized" - " WHERE receipt_type = 'm.read' AND user_id = ?" - " GROUP BY room_id" - " )" - " AND ep.user_id = ?" - " AND ep.stream_ordering > ?" - " AND ep.stream_ordering <= ?" - " AND ep.notif = 1" - " ORDER BY ep.stream_ordering DESC LIMIT ?" - ) + sql = """ + SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, + ep.highlight, e.received_ts + FROM event_push_actions AS ep + INNER JOIN events AS e USING (room_id, event_id) + WHERE + ep.room_id NOT IN ( + SELECT room_id FROM receipts_linearized + WHERE receipt_type = 'm.read' AND user_id = ? + GROUP BY room_id + ) + AND ep.user_id = ? + AND ep.stream_ordering > ? + AND ep.stream_ordering <= ? + AND ep.notif = 1 + ORDER BY ep.stream_ordering DESC LIMIT ? + """ args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit] txn.execute(sql, args) return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall()) @@ -769,12 +780,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # [10, , 20], we should treat this as being equivalent to # [10, 10, 20]. # - sql = ( - "SELECT received_ts FROM events" - " WHERE stream_ordering <= ?" - " ORDER BY stream_ordering DESC" - " LIMIT 1" - ) + sql = """ + SELECT received_ts FROM events + WHERE stream_ordering <= ? + ORDER BY stream_ordering DESC + LIMIT 1 + """ while range_end - range_start > 0: middle = (range_end + range_start) // 2 @@ -802,14 +813,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas self, stream_ordering: int ) -> Optional[int]: def f(txn: LoggingTransaction) -> Optional[Tuple[int]]: - sql = ( - "SELECT e.received_ts" - " FROM event_push_actions AS ep" - " JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id" - " WHERE ep.stream_ordering > ? AND notif = 1" - " ORDER BY ep.stream_ordering ASC" - " LIMIT 1" - ) + sql = """ + SELECT e.received_ts + FROM event_push_actions AS ep + JOIN events e ON ep.room_id = e.room_id AND ep.event_id = e.event_id + WHERE ep.stream_ordering > ? AND notif = 1 + ORDER BY ep.stream_ordering ASC + LIMIT 1 + """ txn.execute(sql, (stream_ordering,)) return cast(Optional[Tuple[int]], txn.fetchone()) @@ -858,10 +869,13 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas Any push actions which predate the user's most recent read receipt are now redundant, so we can remove them from `event_push_actions` and update `event_push_summary`. + + Returns true if all new receipts have been processed. """ limit = 100 + # The (inclusive) receipt stream ID that was previously processed.. min_receipts_stream_id = self.db_pool.simple_select_one_onecol_txn( txn, table="event_push_summary_last_receipt_stream_id", @@ -871,6 +885,14 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas max_receipts_stream_id = self._receipts_id_gen.get_current_token() + # The (inclusive) event stream ordering that was previously summarised. + old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn( + txn, + table="event_push_summary_stream_ordering", + keyvalues={}, + retcol="stream_ordering", + ) + sql = """ SELECT r.stream_id, r.room_id, r.user_id, e.stream_ordering FROM receipts_linearized AS r @@ -895,13 +917,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) rows = txn.fetchall() - old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn( - txn, - table="event_push_summary_stream_ordering", - keyvalues={}, - retcol="stream_ordering", - ) - # For each new read receipt we delete push actions from before it and # recalculate the summary. for _, room_id, user_id, stream_ordering in rows: @@ -920,10 +935,13 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas (room_id, user_id, stream_ordering), ) + # Fetch the notification counts between the stream ordering of the + # latest receipt and what was previously summarised. notif_count, unread_count = self._get_notif_unread_count_for_user_room( txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering ) + # Replace the previous summary with the new counts. self.db_pool.simple_upsert_txn( txn, table="event_push_summary", @@ -956,10 +974,12 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas return len(rows) < limit def _rotate_notifs_txn(self, txn: LoggingTransaction) -> bool: - """Archives older notifications into event_push_summary. Returns whether - the archiving process has caught up or not. + """Archives older notifications (from event_push_actions) into event_push_summary. + + Returns whether the archiving process has caught up or not. """ + # The (inclusive) event stream ordering that was previously summarised. old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn( txn, table="event_push_summary_stream_ordering", @@ -974,7 +994,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas SELECT stream_ordering FROM event_push_actions WHERE stream_ordering > ? ORDER BY stream_ordering ASC LIMIT 1 OFFSET ? - """, + """, (old_rotate_stream_ordering, self._rotate_count), ) stream_row = txn.fetchone() @@ -993,19 +1013,31 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas logger.info("Rotating notifications up to: %s", rotate_to_stream_ordering) - self._rotate_notifs_before_txn(txn, rotate_to_stream_ordering) + self._rotate_notifs_before_txn( + txn, old_rotate_stream_ordering, rotate_to_stream_ordering + ) return caught_up def _rotate_notifs_before_txn( - self, txn: LoggingTransaction, rotate_to_stream_ordering: int + self, + txn: LoggingTransaction, + old_rotate_stream_ordering: int, + rotate_to_stream_ordering: int, ) -> None: - old_rotate_stream_ordering = self.db_pool.simple_select_one_onecol_txn( - txn, - table="event_push_summary_stream_ordering", - keyvalues={}, - retcol="stream_ordering", - ) + """Archives older notifications (from event_push_actions) into event_push_summary. + + Any event_push_actions between old_rotate_stream_ordering (exclusive) and + rotate_to_stream_ordering (inclusive) will be added to the event_push_summary + table. + + Args: + txn: The database transaction. + old_rotate_stream_ordering: The previous maximum event stream ordering. + rotate_to_stream_ordering: The new maximum event stream ordering to summarise. + + Returns whether the archiving process has caught up or not. + """ # Calculate the new counts that should be upserted into event_push_summary sql = """ @@ -1093,9 +1125,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas async def _remove_old_push_actions_that_have_rotated( self, ) -> None: - """Clear out old push actions that have been summarized.""" + """Clear out old push actions that have been summarised.""" - # We want to clear out anything that older than a day that *has* already + # We want to clear out anything that is older than a day that *has* already # been rotated. rotated_upto_stream_ordering = await self.db_pool.simple_select_one_onecol( table="event_push_summary_stream_ordering", @@ -1119,7 +1151,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas SELECT stream_ordering FROM event_push_actions WHERE stream_ordering <= ? AND highlight = 0 ORDER BY stream_ordering ASC LIMIT 1 OFFSET ? - """, + """, ( max_stream_ordering_to_delete, batch_size, @@ -1215,16 +1247,18 @@ class EventPushActionsStore(EventPushActionsWorkerStore): # NB. This assumes event_ids are globally unique since # it makes the query easier to index - sql = ( - "SELECT epa.event_id, epa.room_id," - " epa.stream_ordering, epa.topological_ordering," - " epa.actions, epa.highlight, epa.profile_tag, e.received_ts" - " FROM event_push_actions epa, events e" - " WHERE epa.event_id = e.event_id" - " AND epa.user_id = ? %s" - " AND epa.notif = 1" - " ORDER BY epa.stream_ordering DESC" - " LIMIT ?" % (before_clause,) + sql = """ + SELECT epa.event_id, epa.room_id, + epa.stream_ordering, epa.topological_ordering, + epa.actions, epa.highlight, epa.profile_tag, e.received_ts + FROM event_push_actions epa, events e + WHERE epa.event_id = e.event_id + AND epa.user_id = ? %s + AND epa.notif = 1 + ORDER BY epa.stream_ordering DESC + LIMIT ? + """ % ( + before_clause, ) txn.execute(sql, args) return cast( -- cgit 1.5.1 From b6a6bb4027c1a812361ac127b8c5ea1226be295d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 4 Aug 2022 20:38:08 +0100 Subject: Add comments about how event push actions are stored. (#13445) --- changelog.d/13445.misc | 1 + .../storage/databases/main/event_push_actions.py | 61 ++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 changelog.d/13445.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13445.misc b/changelog.d/13445.misc new file mode 100644 index 0000000000..17462c56f3 --- /dev/null +++ b/changelog.d/13445.misc @@ -0,0 +1 @@ +Add some comments about how event push actions are stored. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 5ddddb1cf3..5db70f9a60 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -12,6 +12,67 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +"""Responsible for storing and fetching push actions / notifications. + +There are two main uses for push actions: + 1. Sending out push to a user's device; and + 2. Tracking per-room per-user notification counts (used in sync requests). + +For the former we simply use the `event_push_actions` table, which contains all +the calculated actions for a given user (which were calculated by the +`BulkPushRuleEvaluator`). + +For the latter we could simply count the number of rows in `event_push_actions` +table for a given room/user, but in practice this is *very* heavyweight when +there were a large number of notifications (due to e.g. the user never reading a +room). Plus, keeping all push actions indefinitely uses a lot of disk space. + +To fix these issues, we add a new table `event_push_summary` that tracks +per-user per-room counts of all notifications that happened before a stream +ordering S. Thus, to get the notification count for a user / room we can simply +query a single row in `event_push_summary` and count the number of rows in +`event_push_actions` with a stream ordering larger than S (and as long as S is +"recent", the number of rows needing to be scanned will be small). + +The `event_push_summary` table is updated via a background job that periodically +chooses a new stream ordering S' (usually the latest stream ordering), counts +all notifications in `event_push_actions` between the existing S and S', and +adds them to the existing counts in `event_push_summary`. + +This allows us to delete old rows from `event_push_actions` once those rows have +been counted and added to `event_push_summary` (we call this process +"rotation"). + + +We need to handle when a user sends a read receipt to the room. Again this is +done as a background process. For each receipt we clear the row in +`event_push_summary` and count the number of notifications in +`event_push_actions` that happened after the receipt but before S, and insert +that count into `event_push_summary` (If the receipt happened *after* S then we +simply clear the `event_push_summary`.) + +Note that its possible that if the read receipt is for an old event the relevant +`event_push_actions` rows will have been rotated and we get the wrong count +(it'll be too low). We accept this as a rare edge case that is unlikely to +impact the user much (since the vast majority of read receipts will be for the +latest event). + +The last complication is to handle the race where we request the notifications +counts after a user sends a read receipt into the room, but *before* the +background update handles the receipt (without any special handling the counts +would be outdated). We fix this by including in `event_push_summary` the read +receipt we used when updating `event_push_summary`, and every time we query the +table we check if that matches the most recent read receipt in the room. If yes, +continue as above, if not we simply query the `event_push_actions` table +directly. + +Since read receipts are almost always for recent events, scanning the +`event_push_actions` table in this case is unlikely to be a problem. Even if it +is a problem, it is temporary until the background job handles the new read +receipt. +""" + import logging from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union, cast -- cgit 1.5.1 From ab18441573dc14cea1fe4082b2a89b9d392a4b9f Mon Sep 17 00:00:00 2001 From: Šimon Brandner Date: Fri, 5 Aug 2022 17:09:33 +0200 Subject: Support stable identifiers for MSC2285: private read receipts. (#13273) This adds support for the stable identifiers of MSC2285 while continuing to support the unstable identifiers behind the configuration flag. These will be removed in a future version. --- changelog.d/13273.feature | 1 + synapse/api/constants.py | 3 +- synapse/config/experimental.py | 2 +- synapse/handlers/initial_sync.py | 11 +-- synapse/handlers/receipts.py | 36 ++++++--- synapse/replication/tcp/client.py | 5 +- synapse/rest/client/notifications.py | 7 +- synapse/rest/client/read_marker.py | 8 +- synapse/rest/client/receipts.py | 10 ++- synapse/rest/client/versions.py | 1 + .../storage/databases/main/event_push_actions.py | 85 ++++++++++++++++++---- tests/handlers/test_receipts.py | 58 +++++++++++---- tests/rest/client/test_sync.py | 58 ++++++++++----- tests/storage/test_receipts.py | 55 +++++++++----- 14 files changed, 246 insertions(+), 94 deletions(-) create mode 100644 changelog.d/13273.feature (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13273.feature b/changelog.d/13273.feature new file mode 100644 index 0000000000..53110d74e9 --- /dev/null +++ b/changelog.d/13273.feature @@ -0,0 +1 @@ +Add support for stable prefixes for [MSC2285 (private read receipts)](https://github.com/matrix-org/matrix-spec-proposals/pull/2285). diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 789859e69e..1d46fb0e43 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -257,7 +257,8 @@ class GuestAccess: class ReceiptTypes: READ: Final = "m.read" - READ_PRIVATE: Final = "org.matrix.msc2285.read.private" + READ_PRIVATE: Final = "m.read.private" + UNSTABLE_READ_PRIVATE: Final = "org.matrix.msc2285.read.private" FULLY_READ: Final = "m.fully_read" diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index c2ecd977cd..7d17c958bb 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -32,7 +32,7 @@ class ExperimentalConfig(Config): # MSC2716 (importing historical messages) self.msc2716_enabled: bool = experimental.get("msc2716_enabled", False) - # MSC2285 (private read receipts) + # MSC2285 (unstable private read receipts) self.msc2285_enabled: bool = experimental.get("msc2285_enabled", False) # MSC3244 (room version capabilities) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 85b472f250..6484e47e5f 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -143,8 +143,8 @@ class InitialSyncHandler: joined_rooms, to_key=int(now_token.receipt_key), ) - if self.hs.config.experimental.msc2285_enabled: - receipt = ReceiptEventSource.filter_out_private_receipts(receipt, user_id) + + receipt = ReceiptEventSource.filter_out_private_receipts(receipt, user_id) tags_by_room = await self.store.get_tags_for_user(user_id) @@ -456,11 +456,8 @@ class InitialSyncHandler: ) if not receipts: return [] - if self.hs.config.experimental.msc2285_enabled: - receipts = ReceiptEventSource.filter_out_private_receipts( - receipts, user_id - ) - return receipts + + return ReceiptEventSource.filter_out_private_receipts(receipts, user_id) presence, receipts, (messages, token) = await make_deferred_yieldable( gather_results( diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 43d2882b0a..d4a866b346 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -163,7 +163,10 @@ class ReceiptsHandler: if not is_new: return - if self.federation_sender and receipt_type != ReceiptTypes.READ_PRIVATE: + if self.federation_sender and receipt_type not in ( + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ): await self.federation_sender.send_read_receipt(receipt) @@ -203,24 +206,38 @@ class ReceiptEventSource(EventSource[int, JsonDict]): for event_id, orig_event_content in room.get("content", {}).items(): event_content = orig_event_content # If there are private read receipts, additional logic is necessary. - if ReceiptTypes.READ_PRIVATE in event_content: + if ( + ReceiptTypes.READ_PRIVATE in event_content + or ReceiptTypes.UNSTABLE_READ_PRIVATE in event_content + ): # Make a copy without private read receipts to avoid leaking # other user's private read receipts.. event_content = { receipt_type: receipt_value for receipt_type, receipt_value in event_content.items() - if receipt_type != ReceiptTypes.READ_PRIVATE + if receipt_type + not in ( + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ) } # Copy the current user's private read receipt from the # original content, if it exists. - user_private_read_receipt = orig_event_content[ - ReceiptTypes.READ_PRIVATE - ].get(user_id, None) + user_private_read_receipt = orig_event_content.get( + ReceiptTypes.READ_PRIVATE, {} + ).get(user_id, None) if user_private_read_receipt: event_content[ReceiptTypes.READ_PRIVATE] = { user_id: user_private_read_receipt } + user_unstable_private_read_receipt = orig_event_content.get( + ReceiptTypes.UNSTABLE_READ_PRIVATE, {} + ).get(user_id, None) + if user_unstable_private_read_receipt: + event_content[ReceiptTypes.UNSTABLE_READ_PRIVATE] = { + user_id: user_unstable_private_read_receipt + } # Include the event if there is at least one non-private read # receipt or the current user has a private read receipt. @@ -256,10 +273,9 @@ class ReceiptEventSource(EventSource[int, JsonDict]): room_ids, from_key=from_key, to_key=to_key ) - if self.config.experimental.msc2285_enabled: - events = ReceiptEventSource.filter_out_private_receipts( - events, user.to_string() - ) + events = ReceiptEventSource.filter_out_private_receipts( + events, user.to_string() + ) return events, to_key diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index e4f2201c92..1ed7230e32 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -416,7 +416,10 @@ class FederationSenderHandler: if not self._is_mine_id(receipt.user_id): continue # Private read receipts never get sent over federation. - if receipt.receipt_type == ReceiptTypes.READ_PRIVATE: + if receipt.receipt_type in ( + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ): continue receipt_info = ReadReceipt( receipt.room_id, diff --git a/synapse/rest/client/notifications.py b/synapse/rest/client/notifications.py index 24bc7c9095..a73322a6a4 100644 --- a/synapse/rest/client/notifications.py +++ b/synapse/rest/client/notifications.py @@ -58,7 +58,12 @@ class NotificationsServlet(RestServlet): ) receipts_by_room = await self.store.get_receipts_for_user_with_orderings( - user_id, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] + user_id, + [ + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ], ) notif_event_ids = [pa.event_id for pa in push_actions] diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py index 8896f2df50..aaad8b233f 100644 --- a/synapse/rest/client/read_marker.py +++ b/synapse/rest/client/read_marker.py @@ -40,9 +40,13 @@ class ReadMarkerRestServlet(RestServlet): self.read_marker_handler = hs.get_read_marker_handler() self.presence_handler = hs.get_presence_handler() - self._known_receipt_types = {ReceiptTypes.READ, ReceiptTypes.FULLY_READ} + self._known_receipt_types = { + ReceiptTypes.READ, + ReceiptTypes.FULLY_READ, + ReceiptTypes.READ_PRIVATE, + } if hs.config.experimental.msc2285_enabled: - self._known_receipt_types.add(ReceiptTypes.READ_PRIVATE) + self._known_receipt_types.add(ReceiptTypes.UNSTABLE_READ_PRIVATE) async def on_POST( self, request: SynapseRequest, room_id: str diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py index 409bfd43c1..c6108fc5eb 100644 --- a/synapse/rest/client/receipts.py +++ b/synapse/rest/client/receipts.py @@ -44,11 +44,13 @@ class ReceiptRestServlet(RestServlet): self.read_marker_handler = hs.get_read_marker_handler() self.presence_handler = hs.get_presence_handler() - self._known_receipt_types = {ReceiptTypes.READ} + self._known_receipt_types = { + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.FULLY_READ, + } if hs.config.experimental.msc2285_enabled: - self._known_receipt_types.update( - (ReceiptTypes.READ_PRIVATE, ReceiptTypes.FULLY_READ) - ) + self._known_receipt_types.add(ReceiptTypes.UNSTABLE_READ_PRIVATE) async def on_POST( self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index 0366986755..c9a830cbac 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -94,6 +94,7 @@ class VersionsRestServlet(RestServlet): # Supports the busy presence state described in MSC3026. "org.matrix.msc3026.busy_presence": self.config.experimental.msc3026_enabled, # Supports receiving private read receipts as per MSC2285 + "org.matrix.msc2285.stable": True, # TODO: Remove when MSC2285 becomes a part of the spec "org.matrix.msc2285": self.config.experimental.msc2285_enabled, # Supports filtering of /publicRooms by room type as per MSC3827 "org.matrix.msc3827.stable": True, diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 5db70f9a60..161aad0f89 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -80,7 +80,7 @@ import attr from synapse.api.constants import ReceiptTypes from synapse.metrics.background_process_metrics import wrap_as_background_process -from synapse.storage._base import SQLBaseStore, db_to_json +from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, @@ -259,7 +259,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas txn, user_id, room_id, - receipt_types=(ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE), + receipt_types=( + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ), ) stream_ordering = None @@ -448,6 +452,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas The list will be ordered by ascending stream_ordering. The list will have between 0~limit entries. """ + # find rooms that have a read receipt in them and return the next # push actions def get_after_receipt( @@ -455,7 +460,18 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) -> List[Tuple[str, str, int, str, bool]]: # find rooms that have a read receipt in them and return the next # push actions - sql = """ + + receipt_types_clause, args = make_in_list_sql_clause( + self.database_engine, + "receipt_type", + ( + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ), + ) + + sql = f""" SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, ep.highlight FROM ( @@ -463,10 +479,10 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas MAX(stream_ordering) as stream_ordering FROM events INNER JOIN receipts_linearized USING (room_id, event_id) - WHERE receipt_type = 'm.read' AND user_id = ? + WHERE {receipt_types_clause} AND user_id = ? GROUP BY room_id ) AS rl, - event_push_actions AS ep + event_push_actions AS ep WHERE ep.room_id = rl.room_id AND ep.stream_ordering > rl.stream_ordering @@ -476,7 +492,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas AND ep.notif = 1 ORDER BY ep.stream_ordering ASC LIMIT ? """ - args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit] + args.extend( + (user_id, user_id, min_stream_ordering, max_stream_ordering, limit) + ) txn.execute(sql, args) return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall()) @@ -490,7 +508,17 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas def get_no_receipt( txn: LoggingTransaction, ) -> List[Tuple[str, str, int, str, bool]]: - sql = """ + receipt_types_clause, args = make_in_list_sql_clause( + self.database_engine, + "receipt_type", + ( + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ), + ) + + sql = f""" SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, ep.highlight FROM event_push_actions AS ep @@ -498,7 +526,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas WHERE ep.room_id NOT IN ( SELECT room_id FROM receipts_linearized - WHERE receipt_type = 'm.read' AND user_id = ? + WHERE {receipt_types_clause} AND user_id = ? GROUP BY room_id ) AND ep.user_id = ? @@ -507,7 +535,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas AND ep.notif = 1 ORDER BY ep.stream_ordering ASC LIMIT ? """ - args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit] + args.extend( + (user_id, user_id, min_stream_ordering, max_stream_ordering, limit) + ) txn.execute(sql, args) return cast(List[Tuple[str, str, int, str, bool]], txn.fetchall()) @@ -557,12 +587,23 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas The list will be ordered by descending received_ts. The list will have between 0~limit entries. """ + # find rooms that have a read receipt in them and return the most recent # push actions def get_after_receipt( txn: LoggingTransaction, ) -> List[Tuple[str, str, int, str, bool, int]]: - sql = """ + receipt_types_clause, args = make_in_list_sql_clause( + self.database_engine, + "receipt_type", + ( + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ), + ) + + sql = f""" SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, ep.highlight, e.received_ts FROM ( @@ -570,7 +611,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas MAX(stream_ordering) as stream_ordering FROM events INNER JOIN receipts_linearized USING (room_id, event_id) - WHERE receipt_type = 'm.read' AND user_id = ? + WHERE {receipt_types_clause} AND user_id = ? GROUP BY room_id ) AS rl, event_push_actions AS ep @@ -584,7 +625,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas AND ep.notif = 1 ORDER BY ep.stream_ordering DESC LIMIT ? """ - args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit] + args.extend( + (user_id, user_id, min_stream_ordering, max_stream_ordering, limit) + ) txn.execute(sql, args) return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall()) @@ -598,7 +641,17 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas def get_no_receipt( txn: LoggingTransaction, ) -> List[Tuple[str, str, int, str, bool, int]]: - sql = """ + receipt_types_clause, args = make_in_list_sql_clause( + self.database_engine, + "receipt_type", + ( + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ), + ) + + sql = f""" SELECT ep.event_id, ep.room_id, ep.stream_ordering, ep.actions, ep.highlight, e.received_ts FROM event_push_actions AS ep @@ -606,7 +659,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas WHERE ep.room_id NOT IN ( SELECT room_id FROM receipts_linearized - WHERE receipt_type = 'm.read' AND user_id = ? + WHERE {receipt_types_clause} AND user_id = ? GROUP BY room_id ) AND ep.user_id = ? @@ -615,7 +668,9 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas AND ep.notif = 1 ORDER BY ep.stream_ordering DESC LIMIT ? """ - args = [user_id, user_id, min_stream_ordering, max_stream_ordering, limit] + args.extend( + (user_id, user_id, min_stream_ordering, max_stream_ordering, limit) + ) txn.execute(sql, args) return cast(List[Tuple[str, str, int, str, bool, int]], txn.fetchall()) diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index a95868b5c0..5f70a2db79 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -15,6 +15,8 @@ from copy import deepcopy from typing import List +from parameterized import parameterized + from synapse.api.constants import EduTypes, ReceiptTypes from synapse.types import JsonDict @@ -25,13 +27,16 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): self.event_source = hs.get_event_sources().sources.receipt - def test_filters_out_private_receipt(self): + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_filters_out_private_receipt(self, receipt_type: str) -> None: self._test_filters_private( [ { "content": { "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { + receipt_type: { "@rikj:jki.re": { "ts": 1436451550453, } @@ -45,13 +50,18 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): [], ) - def test_filters_out_private_receipt_and_ignores_rest(self): + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_filters_out_private_receipt_and_ignores_rest( + self, receipt_type: str + ) -> None: self._test_filters_private( [ { "content": { "$1dgdgrd5641916114394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { + receipt_type: { "@rikj:jki.re": { "ts": 1436451550453, }, @@ -84,13 +94,18 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): ], ) - def test_filters_out_event_with_only_private_receipts_and_ignores_the_rest(self): + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_filters_out_event_with_only_private_receipts_and_ignores_the_rest( + self, receipt_type: str + ) -> None: self._test_filters_private( [ { "content": { "$14356419edgd14394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { + receipt_type: { "@rikj:jki.re": { "ts": 1436451550453, }, @@ -125,7 +140,7 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): ], ) - def test_handles_empty_event(self): + def test_handles_empty_event(self) -> None: self._test_filters_private( [ { @@ -160,13 +175,18 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): ], ) - def test_filters_out_receipt_event_with_only_private_receipt_and_ignores_rest(self): + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_filters_out_receipt_event_with_only_private_receipt_and_ignores_rest( + self, receipt_type: str + ) -> None: self._test_filters_private( [ { "content": { "$14356419edgd14394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { + receipt_type: { "@rikj:jki.re": { "ts": 1436451550453, }, @@ -207,7 +227,7 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): ], ) - def test_handles_string_data(self): + def test_handles_string_data(self) -> None: """ Tests that an invalid shape for read-receipts is handled. Context: https://github.com/matrix-org/synapse/issues/10603 @@ -242,13 +262,16 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): ], ) - def test_leaves_our_private_and_their_public(self): + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_leaves_our_private_and_their_public(self, receipt_type: str) -> None: self._test_filters_private( [ { "content": { "$1dgdgrd5641916114394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { + receipt_type: { "@me:server.org": { "ts": 1436451550453, }, @@ -273,7 +296,7 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): { "content": { "$1dgdgrd5641916114394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { + receipt_type: { "@me:server.org": { "ts": 1436451550453, }, @@ -296,13 +319,16 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): ], ) - def test_we_do_not_mutate(self): + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_we_do_not_mutate(self, receipt_type: str) -> None: """Ensure the input values are not modified.""" events = [ { "content": { "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { + receipt_type: { "@rikj:jki.re": { "ts": 1436451550453, } @@ -320,7 +346,7 @@ class ReceiptsTestCase(unittest.HomeserverTestCase): def _test_filters_private( self, events: List[JsonDict], expected_output: List[JsonDict] - ): + ) -> None: """Tests that the _filter_out_private returns the expected output""" filtered_events = self.event_source.filter_out_private_receipts( events, "@me:server.org" diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index ae16184828..de0dec8539 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -38,7 +38,6 @@ from tests.federation.transport.test_knocking import ( KnockingStrippedStateEventHelperMixin, ) from tests.server import TimedOutException -from tests.unittest import override_config class FilterTestCase(unittest.HomeserverTestCase): @@ -390,6 +389,12 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase): sync.register_servlets, ] + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + config = self.default_config() + config["experimental_features"] = {"msc2285_enabled": True} + + return self.setup_test_homeserver(config=config) + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.url = "/sync?since=%s" self.next_batch = "s0" @@ -408,15 +413,17 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase): # Join the second user self.helper.join(room=self.room_id, user=self.user2, tok=self.tok2) - @override_config({"experimental_features": {"msc2285_enabled": True}}) - def test_private_read_receipts(self) -> None: + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_private_read_receipts(self, receipt_type: str) -> None: # Send a message as the first user res = self.helper.send(self.room_id, body="hello", tok=self.tok) # Send a private read receipt to tell the server the first user's message was read channel = self.make_request( "POST", - f"/rooms/{self.room_id}/receipt/org.matrix.msc2285.read.private/{res['event_id']}", + f"/rooms/{self.room_id}/receipt/{receipt_type}/{res['event_id']}", {}, access_token=self.tok2, ) @@ -425,8 +432,10 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase): # Test that the first user can't see the other user's private read receipt self.assertIsNone(self._get_read_receipt()) - @override_config({"experimental_features": {"msc2285_enabled": True}}) - def test_public_receipt_can_override_private(self) -> None: + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_public_receipt_can_override_private(self, receipt_type: str) -> None: """ Sending a public read receipt to the same event which has a private read receipt should cause that receipt to become public. @@ -437,7 +446,7 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase): # Send a private read receipt channel = self.make_request( "POST", - f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}", + f"/rooms/{self.room_id}/receipt/{receipt_type}/{res['event_id']}", {}, access_token=self.tok2, ) @@ -456,8 +465,10 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase): # Test that we did override the private read receipt self.assertNotEqual(self._get_read_receipt(), None) - @override_config({"experimental_features": {"msc2285_enabled": True}}) - def test_private_receipt_cannot_override_public(self) -> None: + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_private_receipt_cannot_override_public(self, receipt_type: str) -> None: """ Sending a private read receipt to the same event which has a public read receipt should cause no change. @@ -478,7 +489,7 @@ class ReadReceiptsTestCase(unittest.HomeserverTestCase): # Send a private read receipt channel = self.make_request( "POST", - f"/rooms/{self.room_id}/receipt/{ReceiptTypes.READ_PRIVATE}/{res['event_id']}", + f"/rooms/{self.room_id}/receipt/{receipt_type}/{res['event_id']}", {}, access_token=self.tok2, ) @@ -590,7 +601,10 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase): tok=self.tok, ) - def test_unread_counts(self) -> None: + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_unread_counts(self, receipt_type: str) -> None: """Tests that /sync returns the right value for the unread count (MSC2654).""" # Check that our own messages don't increase the unread count. @@ -624,7 +638,7 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase): # Send a read receipt to tell the server we've read the latest event. channel = self.make_request( "POST", - f"/rooms/{self.room_id}/receipt/org.matrix.msc2285.read.private/{res['event_id']}", + f"/rooms/{self.room_id}/receipt/{receipt_type}/{res['event_id']}", {}, access_token=self.tok, ) @@ -700,7 +714,7 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase): self._check_unread_count(5) res2 = self.helper.send(self.room_id, "hello", tok=self.tok2) - # Make sure both m.read and org.matrix.msc2285.read.private advance + # Make sure both m.read and m.read.private advance channel = self.make_request( "POST", f"/rooms/{self.room_id}/receipt/m.read/{res1['event_id']}", @@ -712,16 +726,22 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase): channel = self.make_request( "POST", - f"/rooms/{self.room_id}/receipt/org.matrix.msc2285.read.private/{res2['event_id']}", + f"/rooms/{self.room_id}/receipt/{receipt_type}/{res2['event_id']}", {}, access_token=self.tok, ) self.assertEqual(channel.code, 200, channel.json_body) self._check_unread_count(0) - # We test for both receipt types that influence notification counts - @parameterized.expand([ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE]) - def test_read_receipts_only_go_down(self, receipt_type: ReceiptTypes) -> None: + # We test for all three receipt types that influence notification counts + @parameterized.expand( + [ + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ] + ) + def test_read_receipts_only_go_down(self, receipt_type: str) -> None: # Join the new user self.helper.join(room=self.room_id, user=self.user2, tok=self.tok2) @@ -739,11 +759,11 @@ class UnreadMessagesTestCase(unittest.HomeserverTestCase): self.assertEqual(channel.code, 200, channel.json_body) self._check_unread_count(0) - # Make sure neither m.read nor org.matrix.msc2285.read.private make the + # Make sure neither m.read nor m.read.private make the # read receipt go up to an older event channel = self.make_request( "POST", - f"/rooms/{self.room_id}/receipt/org.matrix.msc2285.read.private/{res1['event_id']}", + f"/rooms/{self.room_id}/receipt/{receipt_type}/{res1['event_id']}", {}, access_token=self.tok, ) diff --git a/tests/storage/test_receipts.py b/tests/storage/test_receipts.py index b1a8f8bba7..191c957fb5 100644 --- a/tests/storage/test_receipts.py +++ b/tests/storage/test_receipts.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from parameterized import parameterized + from synapse.api.constants import ReceiptTypes from synapse.types import UserID, create_requester @@ -23,7 +25,7 @@ OUR_USER_ID = "@our:test" class ReceiptTestCase(HomeserverTestCase): - def prepare(self, reactor, clock, homeserver): + def prepare(self, reactor, clock, homeserver) -> None: super().prepare(reactor, clock, homeserver) self.store = homeserver.get_datastores().main @@ -83,10 +85,15 @@ class ReceiptTestCase(HomeserverTestCase): ) ) - def test_return_empty_with_no_data(self): + def test_return_empty_with_no_data(self) -> None: res = self.get_success( self.store.get_receipts_for_user( - OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] + OUR_USER_ID, + [ + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ], ) ) self.assertEqual(res, {}) @@ -94,7 +101,11 @@ class ReceiptTestCase(HomeserverTestCase): res = self.get_success( self.store.get_receipts_for_user_with_orderings( OUR_USER_ID, - [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], + [ + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ], ) ) self.assertEqual(res, {}) @@ -103,12 +114,19 @@ class ReceiptTestCase(HomeserverTestCase): self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id1, - [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], + [ + ReceiptTypes.READ, + ReceiptTypes.READ_PRIVATE, + ReceiptTypes.UNSTABLE_READ_PRIVATE, + ], ) ) self.assertEqual(res, None) - def test_get_receipts_for_user(self): + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_get_receipts_for_user(self, receipt_type: str) -> None: # Send some events into the first room event1_1_id = self.create_and_send_event( self.room_id1, UserID.from_string(OTHER_USER_ID) @@ -126,14 +144,14 @@ class ReceiptTestCase(HomeserverTestCase): # Send private read receipt for the second event self.get_success( self.store.insert_receipt( - self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {} + self.room_id1, receipt_type, OUR_USER_ID, [event1_2_id], {} ) ) # Test we get the latest event when we want both private and public receipts res = self.get_success( self.store.get_receipts_for_user( - OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] + OUR_USER_ID, [ReceiptTypes.READ, receipt_type] ) ) self.assertEqual(res, {self.room_id1: event1_2_id}) @@ -146,7 +164,7 @@ class ReceiptTestCase(HomeserverTestCase): # Test we get the latest event when we want only the public receipt res = self.get_success( - self.store.get_receipts_for_user(OUR_USER_ID, [ReceiptTypes.READ_PRIVATE]) + self.store.get_receipts_for_user(OUR_USER_ID, [receipt_type]) ) self.assertEqual(res, {self.room_id1: event1_2_id}) @@ -169,17 +187,20 @@ class ReceiptTestCase(HomeserverTestCase): # Test new room is reflected in what the method returns self.get_success( self.store.insert_receipt( - self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {} + self.room_id2, receipt_type, OUR_USER_ID, [event2_1_id], {} ) ) res = self.get_success( self.store.get_receipts_for_user( - OUR_USER_ID, [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] + OUR_USER_ID, [ReceiptTypes.READ, receipt_type] ) ) self.assertEqual(res, {self.room_id1: event1_2_id, self.room_id2: event2_1_id}) - def test_get_last_receipt_event_id_for_user(self): + @parameterized.expand( + [ReceiptTypes.READ_PRIVATE, ReceiptTypes.UNSTABLE_READ_PRIVATE] + ) + def test_get_last_receipt_event_id_for_user(self, receipt_type: str) -> None: # Send some events into the first room event1_1_id = self.create_and_send_event( self.room_id1, UserID.from_string(OTHER_USER_ID) @@ -197,7 +218,7 @@ class ReceiptTestCase(HomeserverTestCase): # Send private read receipt for the second event self.get_success( self.store.insert_receipt( - self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {} + self.room_id1, receipt_type, OUR_USER_ID, [event1_2_id], {} ) ) @@ -206,7 +227,7 @@ class ReceiptTestCase(HomeserverTestCase): self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id1, - [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], + [ReceiptTypes.READ, receipt_type], ) ) self.assertEqual(res, event1_2_id) @@ -222,7 +243,7 @@ class ReceiptTestCase(HomeserverTestCase): # Test we get the latest event when we want only the private receipt res = self.get_success( self.store.get_last_receipt_event_id_for_user( - OUR_USER_ID, self.room_id1, [ReceiptTypes.READ_PRIVATE] + OUR_USER_ID, self.room_id1, [receipt_type] ) ) self.assertEqual(res, event1_2_id) @@ -248,14 +269,14 @@ class ReceiptTestCase(HomeserverTestCase): # Test new room is reflected in what the method returns self.get_success( self.store.insert_receipt( - self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {} + self.room_id2, receipt_type, OUR_USER_ID, [event2_1_id], {} ) ) res = self.get_success( self.store.get_last_receipt_event_id_for_user( OUR_USER_ID, self.room_id2, - [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], + [ReceiptTypes.READ, receipt_type], ) ) self.assertEqual(res, event2_1_id) -- cgit 1.5.1 From 507c1cb3309e989d84ec3ff9557a96ae1fc7f369 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 11 Aug 2022 11:42:24 +0100 Subject: Update the rejected state of events during resync (#13459) Events can be un-rejected or newly-rejected during resync, so ensure we update the database and caches when that happens. --- changelog.d/13459.misc | 1 + synapse/storage/databases/main/events_worker.py | 60 +++++++++++++++++++++++++ synapse/storage/databases/main/state.py | 5 +++ synapse/storage/state.py | 9 ---- 4 files changed, 66 insertions(+), 9 deletions(-) create mode 100644 changelog.d/13459.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13459.misc b/changelog.d/13459.misc new file mode 100644 index 0000000000..e6082210a0 --- /dev/null +++ b/changelog.d/13459.misc @@ -0,0 +1 @@ +Faster joins: update the rejected state of events during de-partial-stating. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index e9ff6cfb34..b07d812ae2 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -2200,3 +2200,63 @@ class EventsWorkerStore(SQLBaseStore): (room_id,), ) return [row[0] for row in txn] + + def mark_event_rejected_txn( + self, + txn: LoggingTransaction, + event_id: str, + rejection_reason: Optional[str], + ) -> None: + """Mark an event that was previously accepted as rejected, or vice versa + + This can happen, for example, when resyncing state during a faster join. + + Args: + txn: + event_id: ID of event to update + rejection_reason: reason it has been rejected, or None if it is now accepted + """ + if rejection_reason is None: + logger.info( + "Marking previously-processed event %s as accepted", + event_id, + ) + self.db_pool.simple_delete_txn( + txn, + "rejections", + keyvalues={"event_id": event_id}, + ) + else: + logger.info( + "Marking previously-processed event %s as rejected(%s)", + event_id, + rejection_reason, + ) + self.db_pool.simple_upsert_txn( + txn, + table="rejections", + keyvalues={"event_id": event_id}, + values={ + "reason": rejection_reason, + "last_check": self._clock.time_msec(), + }, + ) + self.db_pool.simple_update_txn( + txn, + table="events", + keyvalues={"event_id": event_id}, + updatevalues={"rejection_reason": rejection_reason}, + ) + + self.invalidate_get_event_cache_after_txn(txn, event_id) + + # TODO(faster_joins): invalidate the cache on workers. Ideally we'd just + # call '_send_invalidation_to_replication', but we actually need the other + # end to call _invalidate_local_get_event_cache() rather than (just) + # _get_event_cache.invalidate(). + # + # One solution might be to (somehow) get the workers to call + # _invalidate_caches_for_event() (though that will invalidate more than + # strictly necessary). + # + # https://github.com/matrix-org/synapse/issues/12994 diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index f70705a0af..0b10af0e58 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -430,6 +430,11 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): updatevalues={"state_group": state_group}, ) + # the event may now be rejected where it was not before, or vice versa, + # in which case we need to update the rejected flags. + if bool(context.rejected) != (event.rejected_reason is not None): + self.mark_event_rejected_txn(txn, event.event_id, context.rejected) + self.db_pool.simple_delete_one_txn( txn, table="partial_state_events", diff --git a/synapse/storage/state.py b/synapse/storage/state.py index af3bab2c15..0004d955b4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -539,15 +539,6 @@ class StateFilter: is_mine_id: a callable which confirms if a given state_key matches a mxid of a local user """ - - # TODO(faster_joins): it's not entirely clear that this is safe. In particular, - # there may be circumstances in which we return a piece of state that, once we - # resync the state, we discover is invalid. For example: if it turns out that - # the sender of a piece of state wasn't actually in the room, then clearly that - # state shouldn't have been returned. - # We should at least add some tests around this to see what happens. - # https://github.com/matrix-org/synapse/issues/13006 - # if we haven't requested membership events, then it depends on the value of # 'include_others' if EventTypes.Member not in self.types: -- cgit 1.5.1 From 46bd7f4ed9020bbed459c03a11c26d7f7c3093b0 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 15 Aug 2022 09:33:17 -0400 Subject: Clarifications for event push action processing. (#13485) * Clarifies comments. * Fixes an erroneous comment (about return type) added in #13455 (ec24813220f9d54108924dc04aecd24555277b99). * Clarifies the name of a variable. * Simplifies logic of pulling out the latest join for the requesting user. --- changelog.d/13485.misc | 1 + .../storage/databases/main/event_push_actions.py | 53 ++++++++++++++-------- synapse/storage/databases/main/receipts.py | 2 +- 3 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 changelog.d/13485.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13485.misc b/changelog.d/13485.misc new file mode 100644 index 0000000000..c75712b9ff --- /dev/null +++ b/changelog.d/13485.misc @@ -0,0 +1 @@ +Add comments about how event push actions are rotated. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index 161aad0f89..f62aa45ca1 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -227,7 +227,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: str, ) -> NotifCounts: """Get the notification count, the highlight count and the unread message count - for a given user in a given room after the given read receipt. + for a given user in a given room after their latest read receipt. Note that this function assumes the user to be a current member of the room, since it's either called by the sync handler to handle joined room entries, or by @@ -238,9 +238,8 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas user_id: The user to retrieve the counts for. Returns - A dict containing the counts mentioned earlier in this docstring, - respectively under the keys "notify_count", "highlight_count" and - "unread_count". + A NotifCounts object containing the notification count, the highlight count + and the unread message count. """ return await self.db_pool.runInteraction( "get_unread_event_push_actions_by_room", @@ -255,6 +254,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas room_id: str, user_id: str, ) -> NotifCounts: + # Get the stream ordering of the user's latest receipt in the room. result = self.get_last_receipt_for_user_txn( txn, user_id, @@ -266,13 +266,11 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ), ) - stream_ordering = None if result: _, stream_ordering = result - if stream_ordering is None: - # Either last_read_event_id is None, or it's an event we don't have (e.g. - # because it's been purged), in which case retrieve the stream ordering for + else: + # If the user has no receipts in the room, retrieve the stream ordering for # the latest membership event from this user in this room (which we assume is # a join). event_id = self.db_pool.simple_select_one_onecol_txn( @@ -289,10 +287,26 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas ) def _get_unread_counts_by_pos_txn( - self, txn: LoggingTransaction, room_id: str, user_id: str, stream_ordering: int + self, + txn: LoggingTransaction, + room_id: str, + user_id: str, + receipt_stream_ordering: int, ) -> NotifCounts: """Get the number of unread messages for a user/room that have happened since the given stream ordering. + + Args: + txn: The database transaction. + room_id: The room ID to get unread counts for. + user_id: The user ID to get unread counts for. + receipt_stream_ordering: The stream ordering of the user's latest + receipt in the room. If there are no receipts, the stream ordering + of the user's join event. + + Returns + A NotifCounts object containing the notification count, the highlight count + and the unread message count. """ counts = NotifCounts() @@ -320,7 +334,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas OR last_receipt_stream_ordering = ? ) """, - (room_id, user_id, stream_ordering, stream_ordering), + (room_id, user_id, receipt_stream_ordering, receipt_stream_ordering), ) row = txn.fetchone() @@ -338,17 +352,20 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas AND stream_ordering > ? AND highlight = 1 """ - txn.execute(sql, (user_id, room_id, stream_ordering)) + txn.execute(sql, (user_id, room_id, receipt_stream_ordering)) row = txn.fetchone() if row: counts.highlight_count += row[0] # Finally we need to count push actions that aren't included in the - # summary returned above, e.g. recent events that haven't been - # summarised yet, or the summary is empty due to a recent read receipt. - stream_ordering = max(stream_ordering, summary_stream_ordering) + # summary returned above. This might be due to recent events that haven't + # been summarised yet or the summary is out of date due to a recent read + # receipt. + start_unread_stream_ordering = max( + receipt_stream_ordering, summary_stream_ordering + ) notify_count, unread_count = self._get_notif_unread_count_for_user_room( - txn, room_id, user_id, stream_ordering + txn, room_id, user_id, start_unread_stream_ordering ) counts.notify_count += notify_count @@ -1151,8 +1168,6 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas txn: The database transaction. old_rotate_stream_ordering: The previous maximum event stream ordering. rotate_to_stream_ordering: The new maximum event stream ordering to summarise. - - Returns whether the archiving process has caught up or not. """ # Calculate the new counts that should be upserted into event_push_summary @@ -1238,9 +1253,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas (rotate_to_stream_ordering,), ) - async def _remove_old_push_actions_that_have_rotated( - self, - ) -> None: + async def _remove_old_push_actions_that_have_rotated(self) -> None: """Clear out old push actions that have been summarised.""" # We want to clear out anything that is older than a day that *has* already diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 0090c9f225..124c70ad37 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -161,7 +161,7 @@ class ReceiptsWorkerStore(SQLBaseStore): receipt_type: The receipt types to fetch. Returns: - The latest receipt, if one exists. + The event ID and stream ordering of the latest receipt, if one exists. """ clause, args = make_in_list_sql_clause( -- cgit 1.5.1 From 344a2f767c636259412f7fc2914c1554a5c4dc1d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 15 Aug 2022 13:41:23 -0500 Subject: Instrument `FederationStateIdsServlet` - `/state_ids` (#13499) Instrument FederationStateIdsServlet - `/state_ids` so it's easier to follow what's going on in Jaeger when viewing a trace. --- changelog.d/13499.misc | 1 + synapse/federation/federation_server.py | 11 ++++++++++- synapse/handlers/federation.py | 4 +++- synapse/storage/databases/main/event_federation.py | 3 +++ synapse/util/ratelimitutils.py | 4 ++++ 5 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 changelog.d/13499.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13499.misc b/changelog.d/13499.misc new file mode 100644 index 0000000000..99dbcebec8 --- /dev/null +++ b/changelog.d/13499.misc @@ -0,0 +1 @@ +Instrument `FederationStateIdsServlet` (`/state_ids`) for understandable traces in Jaeger. diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index db4b83a505..75fbc6073d 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -61,7 +61,12 @@ from synapse.logging.context import ( nested_logging_context, run_in_background, ) -from synapse.logging.opentracing import log_kv, start_active_span_from_edu, trace +from synapse.logging.opentracing import ( + log_kv, + start_active_span_from_edu, + tag_args, + trace, +) from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -547,6 +552,8 @@ class FederationServer(FederationBase): return 200, resp + @trace + @tag_args async def on_state_ids_request( self, origin: str, room_id: str, event_id: str ) -> Tuple[int, JsonDict]: @@ -569,6 +576,8 @@ class FederationServer(FederationBase): return 200, resp + @trace + @tag_args async def _on_state_ids_request_compute( self, room_id: str, event_id: str ) -> JsonDict: diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 5042236742..6f5ab86ac4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,7 +59,7 @@ from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import tag_args, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -1081,6 +1081,8 @@ class FederationHandler: return event + @trace + @tag_args async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: """Returns the state at the event. i.e. not including said event.""" event = await self.store.get_event(event_id, check_room_id=room_id) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index eec55b6478..0bc8401f2b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -33,6 +33,7 @@ from synapse.api.constants import MAX_DEPTH, EventTypes from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict +from synapse.logging.opentracing import tag_args, trace from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import ( @@ -126,6 +127,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas ) return await self.get_events_as_list(event_ids) + @trace + @tag_args async def get_auth_chain_ids( self, room_id: str, diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 6394cc39ac..e1beaec5a3 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -27,6 +27,7 @@ from synapse.logging.context import ( make_deferred_yieldable, run_in_background, ) +from synapse.logging.opentracing import start_active_span from synapse.util import Clock if typing.TYPE_CHECKING: @@ -176,8 +177,11 @@ class _PerHostRatelimiter: # Ensure that we've properly cleaned up. self.sleeping_requests.discard(request_id) self.ready_request_queue.pop(request_id, None) + wait_span_scope.__exit__(None, None, None) return r + wait_span_scope = start_active_span("ratelimit wait") + wait_span_scope.__enter__() ret_defer.addCallbacks(on_start, on_err) ret_defer.addBoth(on_both) return make_deferred_yieldable(ret_defer) -- cgit 1.5.1 From 5442891cbca67d3af27c448791589e0b9abeb7f8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 16 Aug 2022 12:22:17 +0100 Subject: Make push rules use proper structures. (#13522) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves load times for push rules: | Version | Time per user | Time for 1k users | | -------------------- | ------------- | ----------------- | | Before | 138 µs | 138ms | | Now (with custom) | 2.11 µs | 2.11ms | | Now (without custom) | 49.7 ns | 0.05 ms | This therefore has a large impact on send times for rooms with large numbers of local users in the room. --- changelog.d/13522.misc | 1 + synapse/push/baserules.py | 518 +++++++++++++-------- synapse/push/bulk_push_rule_evaluator.py | 37 +- synapse/push/clientformat.py | 68 +-- synapse/push/push_rule_evaluator.py | 27 +- .../storage/databases/main/event_push_actions.py | 22 +- synapse/storage/databases/main/push_rule.py | 121 +++-- tests/handlers/test_deactivate_account.py | 33 +- 8 files changed, 494 insertions(+), 333 deletions(-) create mode 100644 changelog.d/13522.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13522.misc b/changelog.d/13522.misc new file mode 100644 index 0000000000..0a8827205d --- /dev/null +++ b/changelog.d/13522.misc @@ -0,0 +1 @@ +Improve performance of sending messages in rooms with thousands of local users. diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index 6c0cc5a6ce..c3e072033c 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -14,128 +14,224 @@ # See the License for the specific language governing permissions and # limitations under the License. -import copy -from typing import Any, Dict, List - -from synapse.push.rulekinds import PRIORITY_CLASS_INVERSE_MAP, PRIORITY_CLASS_MAP +""" +Push rules is the system used to determine which events trigger a push (and a +bump in notification counts). + +This consists of a list of "push rules" for each user, where a push rule is a +pair of "conditions" and "actions". When a user receives an event Synapse +iterates over the list of push rules until it finds one where all the conditions +match the event, at which point "actions" describe the outcome (e.g. notify, +highlight, etc). + +Push rules are split up into 5 different "kinds" (aka "priority classes"), which +are run in order: + 1. Override — highest priority rules, e.g. always ignore notices + 2. Content — content specific rules, e.g. @ notifications + 3. Room — per room rules, e.g. enable/disable notifications for all messages + in a room + 4. Sender — per sender rules, e.g. never notify for messages from a given + user + 5. Underride — the lowest priority "default" rules, e.g. notify for every + message. + +The set of "base rules" are the list of rules that every user has by default. A +user can modify their copy of the push rules in one of three ways: + + 1. Adding a new push rule of a certain kind + 2. Changing the actions of a base rule + 3. Enabling/disabling a base rule. + +The base rules are split into whether they come before or after a particular +kind, so the order of push rule evaluation would be: base rules for before +"override" kind, user defined "override" rules, base rules after "override" +kind, etc, etc. +""" + +import itertools +from typing import Dict, Iterator, List, Mapping, Sequence, Tuple, Union + +import attr + +from synapse.config.experimental import ExperimentalConfig +from synapse.push.rulekinds import PRIORITY_CLASS_MAP + + +@attr.s(auto_attribs=True, slots=True, frozen=True) +class PushRule: + """A push rule + + Attributes: + rule_id: a unique ID for this rule + priority_class: what "kind" of push rule this is (see + `PRIORITY_CLASS_MAP` for mapping between int and kind) + conditions: the sequence of conditions that all need to match + actions: the actions to apply if all conditions are met + default: is this a base rule? + default_enabled: is this enabled by default? + """ + rule_id: str + priority_class: int + conditions: Sequence[Mapping[str, str]] + actions: Sequence[Union[str, Mapping]] + default: bool = False + default_enabled: bool = True -def list_with_base_rules(rawrules: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Combine the list of rules set by the user with the default push rules - Args: - rawrules: The rules the user has modified or set. +@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False) +class PushRules: + """A collection of push rules for an account. - Returns: - A new list with the rules set by the user combined with the defaults. + Can be iterated over, producing push rules in priority order. """ - ruleslist = [] - # Grab the base rules that the user has modified. - # The modified base rules have a priority_class of -1. - modified_base_rules = {r["rule_id"]: r for r in rawrules if r["priority_class"] < 0} + # A mapping from rule ID to push rule that overrides a base rule. These will + # be returned instead of the base rule. + overriden_base_rules: Dict[str, PushRule] = attr.Factory(dict) + + # The following stores the custom push rules at each priority class. + # + # We keep these separate (rather than combining into one big list) to avoid + # copying the base rules around all the time. + override: List[PushRule] = attr.Factory(list) + content: List[PushRule] = attr.Factory(list) + room: List[PushRule] = attr.Factory(list) + sender: List[PushRule] = attr.Factory(list) + underride: List[PushRule] = attr.Factory(list) + + def __iter__(self) -> Iterator[PushRule]: + # When iterating over the push rules we need to return the base rules + # interspersed at the correct spots. + for rule in itertools.chain( + BASE_PREPEND_OVERRIDE_RULES, + self.override, + BASE_APPEND_OVERRIDE_RULES, + self.content, + BASE_APPEND_CONTENT_RULES, + self.room, + self.sender, + self.underride, + BASE_APPEND_UNDERRIDE_RULES, + ): + # Check if a base rule has been overriden by a custom rule. If so + # return that instead. + override_rule = self.overriden_base_rules.get(rule.rule_id) + if override_rule: + yield override_rule + else: + yield rule + + def __len__(self) -> int: + # The length is mostly used by caches to get a sense of "size" / amount + # of memory this object is using, so we only count the number of custom + # rules. + return ( + len(self.overriden_base_rules) + + len(self.override) + + len(self.content) + + len(self.room) + + len(self.sender) + + len(self.underride) + ) - # Remove the modified base rules from the list, They'll be added back - # in the default positions in the list. - rawrules = [r for r in rawrules if r["priority_class"] >= 0] - # shove the server default rules for each kind onto the end of each - current_prio_class = list(PRIORITY_CLASS_INVERSE_MAP)[-1] +@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False) +class FilteredPushRules: + """A wrapper around `PushRules` that filters out disabled experimental push + rules, and includes the "enabled" state for each rule when iterated over. + """ - ruleslist.extend( - make_base_prepend_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules - ) - ) + push_rules: PushRules + enabled_map: Dict[str, bool] + experimental_config: ExperimentalConfig - for r in rawrules: - if r["priority_class"] < current_prio_class: - while r["priority_class"] < current_prio_class: - ruleslist.extend( - make_base_append_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], - modified_base_rules, - ) - ) - current_prio_class -= 1 - if current_prio_class > 0: - ruleslist.extend( - make_base_prepend_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], - modified_base_rules, - ) - ) - - ruleslist.append(r) - - while current_prio_class > 0: - ruleslist.extend( - make_base_append_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules - ) - ) - current_prio_class -= 1 - if current_prio_class > 0: - ruleslist.extend( - make_base_prepend_rules( - PRIORITY_CLASS_INVERSE_MAP[current_prio_class], modified_base_rules - ) - ) + def __iter__(self) -> Iterator[Tuple[PushRule, bool]]: + for rule in self.push_rules: + if not _is_experimental_rule_enabled( + rule.rule_id, self.experimental_config + ): + continue - return ruleslist + enabled = self.enabled_map.get(rule.rule_id, rule.default_enabled) + yield rule, enabled -def make_base_append_rules( - kind: str, modified_base_rules: Dict[str, Dict[str, Any]] -) -> List[Dict[str, Any]]: - rules = [] + def __len__(self) -> int: + return len(self.push_rules) - if kind == "override": - rules = BASE_APPEND_OVERRIDE_RULES - elif kind == "underride": - rules = BASE_APPEND_UNDERRIDE_RULES - elif kind == "content": - rules = BASE_APPEND_CONTENT_RULES - # Copy the rules before modifying them - rules = copy.deepcopy(rules) - for r in rules: - # Only modify the actions, keep the conditions the same. - assert isinstance(r["rule_id"], str) - modified = modified_base_rules.get(r["rule_id"]) - if modified: - r["actions"] = modified["actions"] +DEFAULT_EMPTY_PUSH_RULES = PushRules() - return rules +def compile_push_rules(rawrules: List[PushRule]) -> PushRules: + """Given a set of custom push rules return a `PushRules` instance (which + includes the base rules). + """ + + if not rawrules: + # Fast path to avoid allocating empty lists when there are no custom + # rules for the user. + return DEFAULT_EMPTY_PUSH_RULES -def make_base_prepend_rules( - kind: str, - modified_base_rules: Dict[str, Dict[str, Any]], -) -> List[Dict[str, Any]]: - rules = [] + rules = PushRules() - if kind == "override": - rules = BASE_PREPEND_OVERRIDE_RULES + for rule in rawrules: + # We need to decide which bucket each custom push rule goes into. - # Copy the rules before modifying them - rules = copy.deepcopy(rules) - for r in rules: - # Only modify the actions, keep the conditions the same. - assert isinstance(r["rule_id"], str) - modified = modified_base_rules.get(r["rule_id"]) - if modified: - r["actions"] = modified["actions"] + # If it has the same ID as a base rule then it overrides that... + overriden_base_rule = BASE_RULES_BY_ID.get(rule.rule_id) + if overriden_base_rule: + rules.overriden_base_rules[rule.rule_id] = attr.evolve( + overriden_base_rule, actions=rule.actions + ) + continue + + # ... otherwise it gets added to the appropriate priority class bucket + collection: List[PushRule] + if rule.priority_class == 5: + collection = rules.override + elif rule.priority_class == 4: + collection = rules.content + elif rule.priority_class == 3: + collection = rules.room + elif rule.priority_class == 2: + collection = rules.sender + elif rule.priority_class == 1: + collection = rules.underride + else: + raise Exception(f"Unknown priority class: {rule.priority_class}") + + collection.append(rule) return rules -# We have to annotate these types, otherwise mypy infers them as -# `List[Dict[str, Sequence[Collection[str]]]]`. -BASE_APPEND_CONTENT_RULES: List[Dict[str, Any]] = [ - { - "rule_id": "global/content/.m.rule.contains_user_name", - "conditions": [ +def _is_experimental_rule_enabled( + rule_id: str, experimental_config: ExperimentalConfig +) -> bool: + """Used by `FilteredPushRules` to filter out experimental rules when they + have not been enabled. + """ + if ( + rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl" + and not experimental_config.msc3786_enabled + ): + return False + if ( + rule_id == "global/underride/.org.matrix.msc3772.thread_reply" + and not experimental_config.msc3772_enabled + ): + return False + return True + + +BASE_APPEND_CONTENT_RULES = [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["content"], + rule_id="global/content/.m.rule.contains_user_name", + conditions=[ { "kind": "event_match", "key": "content.body", @@ -143,29 +239,33 @@ BASE_APPEND_CONTENT_RULES: List[Dict[str, Any]] = [ "pattern_type": "user_localpart", } ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight"}, ], - } + ) ] -BASE_PREPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ - { - "rule_id": "global/override/.m.rule.master", - "enabled": False, - "conditions": [], - "actions": ["dont_notify"], - } +BASE_PREPEND_OVERRIDE_RULES = [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.master", + default_enabled=False, + conditions=[], + actions=["dont_notify"], + ) ] -BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ - { - "rule_id": "global/override/.m.rule.suppress_notices", - "conditions": [ +BASE_APPEND_OVERRIDE_RULES = [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.suppress_notices", + conditions=[ { "kind": "event_match", "key": "content.msgtype", @@ -173,13 +273,15 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_suppress_notices", } ], - "actions": ["dont_notify"], - }, + actions=["dont_notify"], + ), # NB. .m.rule.invite_for_me must be higher prio than .m.rule.member_event # otherwise invites will be matched by .m.rule.member_event - { - "rule_id": "global/override/.m.rule.invite_for_me", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.invite_for_me", + conditions=[ { "kind": "event_match", "key": "type", @@ -195,21 +297,23 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ # Match the requester's MXID. {"kind": "event_match", "key": "state_key", "pattern_type": "user_id"}, ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight", "value": False}, ], - }, + ), # Will we sometimes want to know about people joining and leaving? # Perhaps: if so, this could be expanded upon. Seems the most usual case # is that we don't though. We add this override rule so that even if # the room rule is set to notify, we don't get notifications about # join/leave/avatar/displayname events. # See also: https://matrix.org/jira/browse/SYN-607 - { - "rule_id": "global/override/.m.rule.member_event", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.member_event", + conditions=[ { "kind": "event_match", "key": "type", @@ -217,24 +321,28 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_member", } ], - "actions": ["dont_notify"], - }, + actions=["dont_notify"], + ), # This was changed from underride to override so it's closer in priority # to the content rules where the user name highlight rule lives. This # way a room rule is lower priority than both but a custom override rule # is higher priority than both. - { - "rule_id": "global/override/.m.rule.contains_display_name", - "conditions": [{"kind": "contains_display_name"}], - "actions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.contains_display_name", + conditions=[{"kind": "contains_display_name"}], + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight"}, ], - }, - { - "rule_id": "global/override/.m.rule.roomnotif", - "conditions": [ + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.roomnotif", + conditions=[ { "kind": "event_match", "key": "content.body", @@ -247,11 +355,13 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_roomnotif_pl", }, ], - "actions": ["notify", {"set_tweak": "highlight", "value": True}], - }, - { - "rule_id": "global/override/.m.rule.tombstone", - "conditions": [ + actions=["notify", {"set_tweak": "highlight", "value": True}], + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.tombstone", + conditions=[ { "kind": "event_match", "key": "type", @@ -265,11 +375,13 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_tombstone_statekey", }, ], - "actions": ["notify", {"set_tweak": "highlight", "value": True}], - }, - { - "rule_id": "global/override/.m.rule.reaction", - "conditions": [ + actions=["notify", {"set_tweak": "highlight", "value": True}], + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.m.rule.reaction", + conditions=[ { "kind": "event_match", "key": "type", @@ -277,14 +389,16 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_reaction", } ], - "actions": ["dont_notify"], - }, + actions=["dont_notify"], + ), # XXX: This is an experimental rule that is only enabled if msc3786_enabled # is enabled, if it is not the rule gets filtered out in _load_rules() in # PushRulesWorkerStore - { - "rule_id": "global/override/.org.matrix.msc3786.rule.room.server_acl", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["override"], + rule_id="global/override/.org.matrix.msc3786.rule.room.server_acl", + conditions=[ { "kind": "event_match", "key": "type", @@ -298,15 +412,17 @@ BASE_APPEND_OVERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_room_server_acl_state_key", }, ], - "actions": [], - }, + actions=[], + ), ] -BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ - { - "rule_id": "global/underride/.m.rule.call", - "conditions": [ +BASE_APPEND_UNDERRIDE_RULES = [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.call", + conditions=[ { "kind": "event_match", "key": "type", @@ -314,17 +430,19 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_call", } ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "ring"}, {"set_tweak": "highlight", "value": False}, ], - }, + ), # XXX: once m.direct is standardised everywhere, we should use it to detect # a DM from the user's perspective rather than this heuristic. - { - "rule_id": "global/underride/.m.rule.room_one_to_one", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.room_one_to_one", + conditions=[ {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"}, { "kind": "event_match", @@ -333,17 +451,19 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_message", }, ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight", "value": False}, ], - }, + ), # XXX: this is going to fire for events which aren't m.room.messages # but are encrypted (e.g. m.call.*)... - { - "rule_id": "global/underride/.m.rule.encrypted_room_one_to_one", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.encrypted_room_one_to_one", + conditions=[ {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"}, { "kind": "event_match", @@ -352,15 +472,17 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_encrypted", }, ], - "actions": [ + actions=[ "notify", {"set_tweak": "sound", "value": "default"}, {"set_tweak": "highlight", "value": False}, ], - }, - { - "rule_id": "global/underride/.org.matrix.msc3772.thread_reply", - "conditions": [ + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.org.matrix.msc3772.thread_reply", + conditions=[ { "kind": "org.matrix.msc3772.relation_match", "rel_type": "m.thread", @@ -368,11 +490,13 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "sender_type": "user_id", } ], - "actions": ["notify", {"set_tweak": "highlight", "value": False}], - }, - { - "rule_id": "global/underride/.m.rule.message", - "conditions": [ + actions=["notify", {"set_tweak": "highlight", "value": False}], + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.message", + conditions=[ { "kind": "event_match", "key": "type", @@ -380,13 +504,15 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_message", } ], - "actions": ["notify", {"set_tweak": "highlight", "value": False}], - }, + actions=["notify", {"set_tweak": "highlight", "value": False}], + ), # XXX: this is going to fire for events which aren't m.room.messages # but are encrypted (e.g. m.call.*)... - { - "rule_id": "global/underride/.m.rule.encrypted", - "conditions": [ + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.m.rule.encrypted", + conditions=[ { "kind": "event_match", "key": "type", @@ -394,11 +520,13 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_encrypted", } ], - "actions": ["notify", {"set_tweak": "highlight", "value": False}], - }, - { - "rule_id": "global/underride/.im.vector.jitsi", - "conditions": [ + actions=["notify", {"set_tweak": "highlight", "value": False}], + ), + PushRule( + default=True, + priority_class=PRIORITY_CLASS_MAP["underride"], + rule_id="global/underride/.im.vector.jitsi", + conditions=[ { "kind": "event_match", "key": "type", @@ -418,29 +546,27 @@ BASE_APPEND_UNDERRIDE_RULES: List[Dict[str, Any]] = [ "_cache_key": "_is_state_event", }, ], - "actions": ["notify", {"set_tweak": "highlight", "value": False}], - }, + actions=["notify", {"set_tweak": "highlight", "value": False}], + ), ] BASE_RULE_IDS = set() +BASE_RULES_BY_ID: Dict[str, PushRule] = {} + for r in BASE_APPEND_CONTENT_RULES: - r["priority_class"] = PRIORITY_CLASS_MAP["content"] - r["default"] = True - BASE_RULE_IDS.add(r["rule_id"]) + BASE_RULE_IDS.add(r.rule_id) + BASE_RULES_BY_ID[r.rule_id] = r for r in BASE_PREPEND_OVERRIDE_RULES: - r["priority_class"] = PRIORITY_CLASS_MAP["override"] - r["default"] = True - BASE_RULE_IDS.add(r["rule_id"]) + BASE_RULE_IDS.add(r.rule_id) + BASE_RULES_BY_ID[r.rule_id] = r for r in BASE_APPEND_OVERRIDE_RULES: - r["priority_class"] = PRIORITY_CLASS_MAP["override"] - r["default"] = True - BASE_RULE_IDS.add(r["rule_id"]) + BASE_RULE_IDS.add(r.rule_id) + BASE_RULES_BY_ID[r.rule_id] = r for r in BASE_APPEND_UNDERRIDE_RULES: - r["priority_class"] = PRIORITY_CLASS_MAP["underride"] - r["default"] = True - BASE_RULE_IDS.add(r["rule_id"]) + BASE_RULE_IDS.add(r.rule_id) + BASE_RULES_BY_ID[r.rule_id] = r diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 713dcf6950..ccd512be54 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -15,7 +15,18 @@ import itertools import logging -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Set, + Tuple, + Union, +) from prometheus_client import Counter @@ -30,6 +41,7 @@ from synapse.util.caches import register_cache from synapse.util.metrics import measure_func from synapse.visibility import filter_event_for_clients_with_state +from .baserules import FilteredPushRules, PushRule from .push_rule_evaluator import PushRuleEvaluatorForEvent if TYPE_CHECKING: @@ -112,7 +124,7 @@ class BulkPushRuleEvaluator: async def _get_rules_for_event( self, event: EventBase, - ) -> Dict[str, List[Dict[str, Any]]]: + ) -> Dict[str, FilteredPushRules]: """Get the push rules for all users who may need to be notified about the event. @@ -186,7 +198,7 @@ class BulkPushRuleEvaluator: return pl_event.content if pl_event else {}, sender_level async def _get_mutual_relations( - self, event: EventBase, rules: Iterable[Dict[str, Any]] + self, event: EventBase, rules: Iterable[Tuple[PushRule, bool]] ) -> Dict[str, Set[Tuple[str, str]]]: """ Fetch event metadata for events which related to the same event as the given event. @@ -216,12 +228,11 @@ class BulkPushRuleEvaluator: # Pre-filter to figure out which relation types are interesting. rel_types = set() - for rule in rules: - # Skip disabled rules. - if "enabled" in rule and not rule["enabled"]: + for rule, enabled in rules: + if not enabled: continue - for condition in rule["conditions"]: + for condition in rule.conditions: if condition["kind"] != "org.matrix.msc3772.relation_match": continue @@ -254,7 +265,7 @@ class BulkPushRuleEvaluator: count_as_unread = _should_count_as_unread(event, context) rules_by_user = await self._get_rules_for_event(event) - actions_by_user: Dict[str, List[Union[dict, str]]] = {} + actions_by_user: Dict[str, Collection[Union[Mapping, str]]] = {} room_member_count = await self.store.get_number_joined_users_in_room( event.room_id @@ -317,15 +328,13 @@ class BulkPushRuleEvaluator: # current user, it'll be added to the dict later. actions_by_user[uid] = [] - for rule in rules: - if "enabled" in rule and not rule["enabled"]: + for rule, enabled in rules: + if not enabled: continue - matches = evaluator.check_conditions( - rule["conditions"], uid, display_name - ) + matches = evaluator.check_conditions(rule.conditions, uid, display_name) if matches: - actions = [x for x in rule["actions"] if x != "dont_notify"] + actions = [x for x in rule.actions if x != "dont_notify"] if actions and "notify" in actions: # Push rules say we should notify the user of this event actions_by_user[uid] = actions diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index 5117ef6854..73618d9234 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -18,16 +18,15 @@ from typing import Any, Dict, List, Optional from synapse.push.rulekinds import PRIORITY_CLASS_INVERSE_MAP, PRIORITY_CLASS_MAP from synapse.types import UserID +from .baserules import FilteredPushRules, PushRule + def format_push_rules_for_user( - user: UserID, ruleslist: List + user: UserID, ruleslist: FilteredPushRules ) -> Dict[str, Dict[str, list]]: """Converts a list of rawrules and a enabled map into nested dictionaries to match the Matrix client-server format for push rules""" - # We're going to be mutating this a lot, so do a deep copy - ruleslist = copy.deepcopy(ruleslist) - rules: Dict[str, Dict[str, List[Dict[str, Any]]]] = { "global": {}, "device": {}, @@ -35,11 +34,30 @@ def format_push_rules_for_user( rules["global"] = _add_empty_priority_class_arrays(rules["global"]) - for r in ruleslist: - template_name = _priority_class_to_template_name(r["priority_class"]) + for r, enabled in ruleslist: + template_name = _priority_class_to_template_name(r.priority_class) + + rulearray = rules["global"][template_name] + + template_rule = _rule_to_template(r) + if not template_rule: + continue + + rulearray.append(template_rule) + + template_rule["enabled"] = enabled + + if "conditions" not in template_rule: + # Not all formatted rules have explicit conditions, e.g. "room" + # rules omit them as they can be derived from the kind and rule ID. + # + # If the formatted rule has no conditions then we can skip the + # formatting of conditions. + continue # Remove internal stuff. - for c in r["conditions"]: + template_rule["conditions"] = copy.deepcopy(template_rule["conditions"]) + for c in template_rule["conditions"]: c.pop("_cache_key", None) pattern_type = c.pop("pattern_type", None) @@ -52,16 +70,6 @@ def format_push_rules_for_user( if sender_type == "user_id": c["sender"] = user.to_string() - rulearray = rules["global"][template_name] - - template_rule = _rule_to_template(r) - if template_rule: - if "enabled" in r: - template_rule["enabled"] = r["enabled"] - else: - template_rule["enabled"] = True - rulearray.append(template_rule) - return rules @@ -71,24 +79,24 @@ def _add_empty_priority_class_arrays(d: Dict[str, list]) -> Dict[str, list]: return d -def _rule_to_template(rule: Dict[str, Any]) -> Optional[Dict[str, Any]]: - unscoped_rule_id = None - if "rule_id" in rule: - unscoped_rule_id = _rule_id_from_namespaced(rule["rule_id"]) +def _rule_to_template(rule: PushRule) -> Optional[Dict[str, Any]]: + templaterule: Dict[str, Any] + + unscoped_rule_id = _rule_id_from_namespaced(rule.rule_id) - template_name = _priority_class_to_template_name(rule["priority_class"]) + template_name = _priority_class_to_template_name(rule.priority_class) if template_name in ["override", "underride"]: - templaterule = {k: rule[k] for k in ["conditions", "actions"]} + templaterule = {"conditions": rule.conditions, "actions": rule.actions} elif template_name in ["sender", "room"]: - templaterule = {"actions": rule["actions"]} - unscoped_rule_id = rule["conditions"][0]["pattern"] + templaterule = {"actions": rule.actions} + unscoped_rule_id = rule.conditions[0]["pattern"] elif template_name == "content": - if len(rule["conditions"]) != 1: + if len(rule.conditions) != 1: return None - thecond = rule["conditions"][0] + thecond = rule.conditions[0] if "pattern" not in thecond: return None - templaterule = {"actions": rule["actions"]} + templaterule = {"actions": rule.actions} templaterule["pattern"] = thecond["pattern"] else: # This should not be reached unless this function is not kept in sync @@ -97,8 +105,8 @@ def _rule_to_template(rule: Dict[str, Any]) -> Optional[Dict[str, Any]]: if unscoped_rule_id: templaterule["rule_id"] = unscoped_rule_id - if "default" in rule: - templaterule["default"] = rule["default"] + if rule.default: + templaterule["default"] = True return templaterule diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 2e8a017add..3c5632cd91 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -15,7 +15,18 @@ import logging import re -from typing import Any, Dict, List, Mapping, Optional, Pattern, Set, Tuple, Union +from typing import ( + Any, + Dict, + List, + Mapping, + Optional, + Pattern, + Sequence, + Set, + Tuple, + Union, +) from matrix_common.regex import glob_to_regex, to_word_pattern @@ -32,14 +43,14 @@ INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$") def _room_member_count( - ev: EventBase, condition: Dict[str, Any], room_member_count: int + ev: EventBase, condition: Mapping[str, Any], room_member_count: int ) -> bool: return _test_ineq_condition(condition, room_member_count) def _sender_notification_permission( ev: EventBase, - condition: Dict[str, Any], + condition: Mapping[str, Any], sender_power_level: int, power_levels: Dict[str, Union[int, Dict[str, int]]], ) -> bool: @@ -54,7 +65,7 @@ def _sender_notification_permission( return sender_power_level >= room_notif_level -def _test_ineq_condition(condition: Dict[str, Any], number: int) -> bool: +def _test_ineq_condition(condition: Mapping[str, Any], number: int) -> bool: if "is" not in condition: return False m = INEQUALITY_EXPR.match(condition["is"]) @@ -137,7 +148,7 @@ class PushRuleEvaluatorForEvent: self._condition_cache: Dict[str, bool] = {} def check_conditions( - self, conditions: List[dict], uid: str, display_name: Optional[str] + self, conditions: Sequence[Mapping], uid: str, display_name: Optional[str] ) -> bool: """ Returns true if a user's conditions/user ID/display name match the event. @@ -169,7 +180,7 @@ class PushRuleEvaluatorForEvent: return True def matches( - self, condition: Dict[str, Any], user_id: str, display_name: Optional[str] + self, condition: Mapping[str, Any], user_id: str, display_name: Optional[str] ) -> bool: """ Returns true if a user's condition/user ID/display name match the event. @@ -204,7 +215,7 @@ class PushRuleEvaluatorForEvent: # endpoint with an unknown kind, see _rule_tuple_from_request_object. return True - def _event_match(self, condition: dict, user_id: str) -> bool: + def _event_match(self, condition: Mapping, user_id: str) -> bool: """ Check an "event_match" push rule condition. @@ -269,7 +280,7 @@ class PushRuleEvaluatorForEvent: return bool(r.search(body)) - def _relation_match(self, condition: dict, user_id: str) -> bool: + def _relation_match(self, condition: Mapping, user_id: str) -> bool: """ Check an "relation_match" push rule condition. diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index f62aa45ca1..eabf9c9739 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -74,7 +74,17 @@ receipt. """ import logging -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union, cast +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + List, + Mapping, + Optional, + Tuple, + Union, + cast, +) import attr @@ -154,7 +164,9 @@ class NotifCounts: highlight_count: int = 0 -def _serialize_action(actions: List[Union[dict, str]], is_highlight: bool) -> str: +def _serialize_action( + actions: Collection[Union[Mapping, str]], is_highlight: bool +) -> str: """Custom serializer for actions. This allows us to "compress" common actions. We use the fact that most users have the same actions for notifs (and for @@ -750,7 +762,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas async def add_push_actions_to_staging( self, event_id: str, - user_id_actions: Dict[str, List[Union[dict, str]]], + user_id_actions: Dict[str, Collection[Union[Mapping, str]]], count_as_unread: bool, ) -> None: """Add the push actions for the event to the push action staging area. @@ -767,7 +779,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas # This is a helper function for generating the necessary tuple that # can be used to insert into the `event_push_actions_staging` table. def _gen_entry( - user_id: str, actions: List[Union[dict, str]] + user_id: str, actions: Collection[Union[Mapping, str]] ) -> Tuple[str, str, str, int, int, int]: is_highlight = 1 if _action_has_highlight(actions) else 0 notif = 1 if "notify" in actions else 0 @@ -1410,7 +1422,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): ] -def _action_has_highlight(actions: List[Union[dict, str]]) -> bool: +def _action_has_highlight(actions: Collection[Union[Mapping, str]]) -> bool: for action in actions: if not isinstance(action, dict): continue diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 768f95d16c..255620f996 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -14,11 +14,23 @@ # limitations under the License. import abc import logging -from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Tuple, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) from synapse.api.errors import StoreError from synapse.config.homeserver import ExperimentalConfig -from synapse.push.baserules import list_with_base_rules +from synapse.push.baserules import FilteredPushRules, PushRule, compile_push_rules from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import ( @@ -50,60 +62,30 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) -def _is_experimental_rule_enabled( - rule_id: str, experimental_config: ExperimentalConfig -) -> bool: - """Used by `_load_rules` to filter out experimental rules when they - have not been enabled. - """ - if ( - rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl" - and not experimental_config.msc3786_enabled - ): - return False - if ( - rule_id == "global/underride/.org.matrix.msc3772.thread_reply" - and not experimental_config.msc3772_enabled - ): - return False - return True - - def _load_rules( rawrules: List[JsonDict], enabled_map: Dict[str, bool], experimental_config: ExperimentalConfig, -) -> List[JsonDict]: - ruleslist = [] - for rawrule in rawrules: - rule = dict(rawrule) - rule["conditions"] = db_to_json(rawrule["conditions"]) - rule["actions"] = db_to_json(rawrule["actions"]) - rule["default"] = False - ruleslist.append(rule) - - # We're going to be mutating this a lot, so copy it. We also filter out - # any experimental default push rules that aren't enabled. - rules = [ - rule - for rule in list_with_base_rules(ruleslist) - if _is_experimental_rule_enabled(rule["rule_id"], experimental_config) - ] +) -> FilteredPushRules: + """Take the DB rows returned from the DB and convert them into a full + `FilteredPushRules` object. + """ - for i, rule in enumerate(rules): - rule_id = rule["rule_id"] + ruleslist = [ + PushRule( + rule_id=rawrule["rule_id"], + priority_class=rawrule["priority_class"], + conditions=db_to_json(rawrule["conditions"]), + actions=db_to_json(rawrule["actions"]), + ) + for rawrule in rawrules + ] - if rule_id not in enabled_map: - continue - if rule.get("enabled", True) == bool(enabled_map[rule_id]): - continue + push_rules = compile_push_rules(ruleslist) - # Rules are cached across users. - rule = dict(rule) - rule["enabled"] = bool(enabled_map[rule_id]) - rules[i] = rule + filtered_rules = FilteredPushRules(push_rules, enabled_map, experimental_config) - return rules + return filtered_rules # The ABCMeta metaclass ensures that it cannot be instantiated without @@ -162,7 +144,7 @@ class PushRulesWorkerStore( raise NotImplementedError() @cached(max_entries=5000) - async def get_push_rules_for_user(self, user_id: str) -> List[JsonDict]: + async def get_push_rules_for_user(self, user_id: str) -> FilteredPushRules: rows = await self.db_pool.simple_select_list( table="push_rules", keyvalues={"user_name": user_id}, @@ -216,11 +198,11 @@ class PushRulesWorkerStore( @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids") async def bulk_get_push_rules( self, user_ids: Collection[str] - ) -> Dict[str, List[JsonDict]]: + ) -> Dict[str, FilteredPushRules]: if not user_ids: return {} - results: Dict[str, List[JsonDict]] = {user_id: [] for user_id in user_ids} + raw_rules: Dict[str, List[JsonDict]] = {user_id: [] for user_id in user_ids} rows = await self.db_pool.simple_select_many_batch( table="push_rules", @@ -234,11 +216,13 @@ class PushRulesWorkerStore( rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"]))) for row in rows: - results.setdefault(row["user_name"], []).append(row) + raw_rules.setdefault(row["user_name"], []).append(row) enabled_map_by_user = await self.bulk_get_push_rules_enabled(user_ids) - for user_id, rules in results.items(): + results: Dict[str, FilteredPushRules] = {} + + for user_id, rules in raw_rules.items(): results[user_id] = _load_rules( rules, enabled_map_by_user.get(user_id, {}), self.hs.config.experimental ) @@ -345,8 +329,8 @@ class PushRuleStore(PushRulesWorkerStore): user_id: str, rule_id: str, priority_class: int, - conditions: List[Dict[str, str]], - actions: List[Union[JsonDict, str]], + conditions: Sequence[Mapping[str, str]], + actions: Sequence[Union[Mapping[str, Any], str]], before: Optional[str] = None, after: Optional[str] = None, ) -> None: @@ -817,7 +801,7 @@ class PushRuleStore(PushRulesWorkerStore): return self._push_rules_stream_id_gen.get_current_token() async def copy_push_rule_from_room_to_room( - self, new_room_id: str, user_id: str, rule: dict + self, new_room_id: str, user_id: str, rule: PushRule ) -> None: """Copy a single push rule from one room to another for a specific user. @@ -827,21 +811,27 @@ class PushRuleStore(PushRulesWorkerStore): rule: A push rule. """ # Create new rule id - rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1]) + rule_id_scope = "/".join(rule.rule_id.split("/")[:-1]) new_rule_id = rule_id_scope + "/" + new_room_id + new_conditions = [] + # Change room id in each condition - for condition in rule.get("conditions", []): + for condition in rule.conditions: + new_condition = condition if condition.get("key") == "room_id": - condition["pattern"] = new_room_id + new_condition = dict(condition) + new_condition["pattern"] = new_room_id + + new_conditions.append(new_condition) # Add the rule for the new room await self.add_push_rule( user_id=user_id, rule_id=new_rule_id, - priority_class=rule["priority_class"], - conditions=rule["conditions"], - actions=rule["actions"], + priority_class=rule.priority_class, + conditions=new_conditions, + actions=rule.actions, ) async def copy_push_rules_from_room_to_room_for_user( @@ -859,8 +849,11 @@ class PushRuleStore(PushRulesWorkerStore): user_push_rules = await self.get_push_rules_for_user(user_id) # Get rules relating to the old room and copy them to the new room - for rule in user_push_rules: - conditions = rule.get("conditions", []) + for rule, enabled in user_push_rules: + if not enabled: + continue + + conditions = rule.conditions if any( (c.get("key") == "room_id" and c.get("pattern") == old_room_id) for c in conditions diff --git a/tests/handlers/test_deactivate_account.py b/tests/handlers/test_deactivate_account.py index ff9f2e8edb..82baa8f154 100644 --- a/tests/handlers/test_deactivate_account.py +++ b/tests/handlers/test_deactivate_account.py @@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import AccountDataTypes +from synapse.push.baserules import PushRule from synapse.push.rulekinds import PRIORITY_CLASS_MAP from synapse.rest import admin from synapse.rest.client import account, login @@ -130,12 +130,12 @@ class DeactivateAccountTestCase(HomeserverTestCase): ), ) - def _is_custom_rule(self, push_rule: Dict[str, Any]) -> bool: + def _is_custom_rule(self, push_rule: PushRule) -> bool: """ Default rules start with a dot: such as .m.rule and .im.vector. This function returns true iff a rule is custom (not default). """ - return "/." not in push_rule["rule_id"] + return "/." not in push_rule.rule_id def test_push_rules_deleted_upon_account_deactivation(self) -> None: """ @@ -157,22 +157,21 @@ class DeactivateAccountTestCase(HomeserverTestCase): ) # Test the rule exists - push_rules = self.get_success(self._store.get_push_rules_for_user(self.user)) + filtered_push_rules = self.get_success( + self._store.get_push_rules_for_user(self.user) + ) # Filter out default rules; we don't care - push_rules = list(filter(self._is_custom_rule, push_rules)) + push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)] # Check our rule made it self.assertEqual( push_rules, [ - { - "user_name": "@user:test", - "rule_id": "personal.override.rule1", - "priority_class": 5, - "priority": 0, - "conditions": [], - "actions": [], - "default": False, - } + PushRule( + rule_id="personal.override.rule1", + priority_class=5, + conditions=[], + actions=[], + ) ], push_rules, ) @@ -180,9 +179,11 @@ class DeactivateAccountTestCase(HomeserverTestCase): # Request the deactivation of our account self._deactivate_my_account() - push_rules = self.get_success(self._store.get_push_rules_for_user(self.user)) + filtered_push_rules = self.get_success( + self._store.get_push_rules_for_user(self.user) + ) # Filter out default rules; we don't care - push_rules = list(filter(self._is_custom_rule, push_rules)) + push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)] # Check our rule no longer exists self.assertEqual(push_rules, [], push_rules) -- cgit 1.5.1 From c3516e9decc355b75a297d72a13b98a43d312e66 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Tue, 16 Aug 2022 12:16:56 +0000 Subject: Faster room joins: make `/joined_members` block whilst the room is partial stated. (#13514) --- changelog.d/13514.bugfix | 1 + synapse/handlers/message.py | 6 +++++- synapse/storage/controllers/state.py | 13 +++++++++++++ synapse/storage/databases/main/roommember.py | 3 +++ 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13514.bugfix (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13514.bugfix b/changelog.d/13514.bugfix new file mode 100644 index 0000000000..7498af0e47 --- /dev/null +++ b/changelog.d/13514.bugfix @@ -0,0 +1 @@ +Faster room joins: make `/joined_members` block whilst the room is partial stated. \ No newline at end of file diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 6b03603598..8f29ee9a87 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -331,7 +331,11 @@ class MessageHandler: msg="Getting joined members while not being a current member of the room is forbidden.", ) - users_with_profile = await self.store.get_users_in_room_with_profiles(room_id) + users_with_profile = ( + await self._state_storage_controller.get_users_in_room_with_profiles( + room_id + ) + ) # If this is an AS, double check that they are allowed to see the members. # This can either be because the AS user is in the room or because there diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 0d480f1014..0c78eb735e 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -30,6 +30,7 @@ from typing import ( from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.logging.opentracing import trace +from synapse.storage.roommember import ProfileInfo from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( PartialCurrentStateTracker, @@ -506,3 +507,15 @@ class StateStorageController: await self._partial_state_room_tracker.await_full_state(room_id) return await self.stores.main.get_current_hosts_in_room(room_id) + + async def get_users_in_room_with_profiles( + self, room_id: str + ) -> Dict[str, ProfileInfo]: + """ + Get the current users in the room with their profiles. + If the room is currently partial-stated, this will block until the room has + full state. + """ + await self._partial_state_room_tracker.await_full_state(room_id) + + return await self.stores.main.get_users_in_room_with_profiles(room_id) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 93ff4816c8..5e5f607a14 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -283,6 +283,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): Returns: A mapping from user ID to ProfileInfo. + + Preconditions: + - There is full state available for the room (it is not partial-stated). """ def _get_users_in_room_with_profiles( -- cgit 1.5.1 From 0a4efbc1ddc3a58a6d75ad5d4d960b9ed367481e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 16 Aug 2022 12:39:40 -0500 Subject: Instrument the federation/backfill part of `/messages` (#13489) Instrument the federation/backfill part of `/messages` so it's easier to follow what's going on in Jaeger when viewing a trace. Split out from https://github.com/matrix-org/synapse/pull/13440 Follow-up from https://github.com/matrix-org/synapse/pull/13368 Part of https://github.com/matrix-org/synapse/issues/13356 --- changelog.d/13489.misc | 1 + synapse/federation/federation_client.py | 27 ++++- synapse/handlers/federation.py | 10 +- synapse/handlers/federation_event.py | 112 ++++++++++++++++++--- synapse/logging/opentracing.py | 19 +++- synapse/storage/controllers/persist_events.py | 30 ++++-- synapse/storage/controllers/state.py | 5 +- synapse/storage/databases/main/event_federation.py | 6 ++ synapse/storage/databases/main/events.py | 2 + synapse/storage/databases/main/events_worker.py | 38 +++++-- .../storage/util/partial_state_events_tracker.py | 3 + 11 files changed, 220 insertions(+), 33 deletions(-) create mode 100644 changelog.d/13489.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13489.misc b/changelog.d/13489.misc new file mode 100644 index 0000000000..5e4853860e --- /dev/null +++ b/changelog.d/13489.misc @@ -0,0 +1 @@ +Instrument the federation/backfill part of `/messages` for understandable traces in Jaeger. diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 54ffbd8170..987f6dad46 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -61,7 +61,7 @@ from synapse.federation.federation_base import ( ) from synapse.federation.transport.client import SendJoinResponse from synapse.http.types import QueryParams -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace from synapse.types import JsonDict, UserID, get_domain_from_id from synapse.util.async_helpers import concurrently_execute from synapse.util.caches.expiringcache import ExpiringCache @@ -235,6 +235,7 @@ class FederationClient(FederationBase): ) @trace + @tag_args async def backfill( self, dest: str, room_id: str, limit: int, extremities: Collection[str] ) -> Optional[List[EventBase]]: @@ -337,6 +338,8 @@ class FederationClient(FederationBase): return None + @trace + @tag_args async def get_pdu( self, destinations: Iterable[str], @@ -448,6 +451,8 @@ class FederationClient(FederationBase): return event_copy + @trace + @tag_args async def get_room_state_ids( self, destination: str, room_id: str, event_id: str ) -> Tuple[List[str], List[str]]: @@ -467,6 +472,23 @@ class FederationClient(FederationBase): state_event_ids = result["pdu_ids"] auth_event_ids = result.get("auth_chain_ids", []) + set_tag( + SynapseTags.RESULT_PREFIX + "state_event_ids", + str(state_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "state_event_ids.length", + str(len(state_event_ids)), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "auth_event_ids", + str(auth_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "auth_event_ids.length", + str(len(auth_event_ids)), + ) + if not isinstance(state_event_ids, list) or not isinstance( auth_event_ids, list ): @@ -474,6 +496,8 @@ class FederationClient(FederationBase): return state_event_ids, auth_event_ids + @trace + @tag_args async def get_room_state( self, destination: str, @@ -533,6 +557,7 @@ class FederationClient(FederationBase): return valid_state_events, valid_auth_events + @trace async def _check_sigs_and_hash_and_fetch( self, origin: str, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6f5ab86ac4..d13011d138 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -59,7 +59,7 @@ from synapse.events.validator import EventValidator from synapse.federation.federation_client import InvalidResponseError from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import tag_args, trace +from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace from synapse.metrics.background_process_metrics import run_as_background_process from synapse.module_api import NOT_SPAM from synapse.replication.http.federation import ( @@ -370,6 +370,14 @@ class FederationHandler: logger.debug( "_maybe_backfill_inner: extremities_to_request %s", extremities_to_request ) + set_tag( + SynapseTags.RESULT_PREFIX + "extremities_to_request", + str(extremities_to_request), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "extremities_to_request.length", + str(len(extremities_to_request)), + ) # Now we need to decide which hosts to hit first. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 8968b705d4..dd0d610fe9 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -59,7 +59,13 @@ from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.federation.federation_client import InvalidResponseError from synapse.logging.context import nested_logging_context -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import ( + SynapseTags, + set_tag, + start_active_span, + tag_args, + trace, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet from synapse.replication.http.federation import ( @@ -410,6 +416,7 @@ class FederationEventHandler: prev_member_event, ) + @trace async def process_remote_join( self, origin: str, @@ -715,7 +722,7 @@ class FederationEventHandler: @trace async def _process_pulled_events( - self, origin: str, events: Iterable[EventBase], backfilled: bool + self, origin: str, events: Collection[EventBase], backfilled: bool ) -> None: """Process a batch of events we have pulled from a remote server @@ -730,6 +737,15 @@ class FederationEventHandler: backfilled: True if this is part of a historical batch of events (inhibits notification to clients, and validation of device keys.) """ + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str([event.event_id for event in events]), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(events)), + ) + set_tag(SynapseTags.FUNC_ARG_PREFIX + "backfilled", str(backfilled)) logger.debug( "processing pulled backfilled=%s events=%s", backfilled, @@ -753,6 +769,7 @@ class FederationEventHandler: await self._process_pulled_event(origin, ev, backfilled=backfilled) @trace + @tag_args async def _process_pulled_event( self, origin: str, event: EventBase, backfilled: bool ) -> None: @@ -854,6 +871,7 @@ class FederationEventHandler: else: raise + @trace async def _compute_event_context_with_maybe_missing_prevs( self, dest: str, event: EventBase ) -> EventContext: @@ -970,6 +988,8 @@ class FederationEventHandler: event, state_ids_before_event=state_map, partial_state=partial_state ) + @trace + @tag_args async def _get_state_ids_after_missing_prev_event( self, destination: str, @@ -1009,10 +1029,10 @@ class FederationEventHandler: logger.debug("Fetching %i events from cache/store", len(desired_events)) have_events = await self._store.have_seen_events(room_id, desired_events) - missing_desired_events = desired_events - have_events + missing_desired_event_ids = desired_events - have_events logger.debug( "We are missing %i events (got %i)", - len(missing_desired_events), + len(missing_desired_event_ids), len(have_events), ) @@ -1024,13 +1044,30 @@ class FederationEventHandler: # already have a bunch of the state events. It would be nice if the # federation api gave us a way of finding out which we actually need. - missing_auth_events = set(auth_event_ids) - have_events - missing_auth_events.difference_update( - await self._store.have_seen_events(room_id, missing_auth_events) + missing_auth_event_ids = set(auth_event_ids) - have_events + missing_auth_event_ids.difference_update( + await self._store.have_seen_events(room_id, missing_auth_event_ids) ) - logger.debug("We are also missing %i auth events", len(missing_auth_events)) + logger.debug("We are also missing %i auth events", len(missing_auth_event_ids)) - missing_events = missing_desired_events | missing_auth_events + missing_event_ids = missing_desired_event_ids | missing_auth_event_ids + + set_tag( + SynapseTags.RESULT_PREFIX + "missing_auth_event_ids", + str(missing_auth_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_auth_event_ids.length", + str(len(missing_auth_event_ids)), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_desired_event_ids", + str(missing_desired_event_ids), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "missing_desired_event_ids.length", + str(len(missing_desired_event_ids)), + ) # Making an individual request for each of 1000s of events has a lot of # overhead. On the other hand, we don't really want to fetch all of the events @@ -1041,13 +1078,13 @@ class FederationEventHandler: # # TODO: might it be better to have an API which lets us do an aggregate event # request - if (len(missing_events) * 10) >= len(auth_event_ids) + len(state_event_ids): + if (len(missing_event_ids) * 10) >= len(auth_event_ids) + len(state_event_ids): logger.debug("Requesting complete state from remote") await self._get_state_and_persist(destination, room_id, event_id) else: - logger.debug("Fetching %i events from remote", len(missing_events)) + logger.debug("Fetching %i events from remote", len(missing_event_ids)) await self._get_events_and_persist( - destination=destination, room_id=room_id, event_ids=missing_events + destination=destination, room_id=room_id, event_ids=missing_event_ids ) # We now need to fill out the state map, which involves fetching the @@ -1104,6 +1141,14 @@ class FederationEventHandler: event_id, failed_to_fetch, ) + set_tag( + SynapseTags.RESULT_PREFIX + "failed_to_fetch", + str(failed_to_fetch), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "failed_to_fetch.length", + str(len(failed_to_fetch)), + ) if remote_event.is_state() and remote_event.rejected_reason is None: state_map[ @@ -1112,6 +1157,8 @@ class FederationEventHandler: return state_map + @trace + @tag_args async def _get_state_and_persist( self, destination: str, room_id: str, event_id: str ) -> None: @@ -1133,6 +1180,7 @@ class FederationEventHandler: destination=destination, room_id=room_id, event_ids=(event_id,) ) + @trace async def _process_received_pdu( self, origin: str, @@ -1283,6 +1331,7 @@ class FederationEventHandler: except Exception: logger.exception("Failed to resync device for %s", sender) + @trace async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> None: """Handles backfilling the insertion event when we receive a marker event that points to one. @@ -1414,6 +1463,8 @@ class FederationEventHandler: return event_from_response + @trace + @tag_args async def _get_events_and_persist( self, destination: str, room_id: str, event_ids: Collection[str] ) -> None: @@ -1459,6 +1510,7 @@ class FederationEventHandler: logger.info("Fetched %i events of %i requested", len(events), len(event_ids)) await self._auth_and_persist_outliers(room_id, events) + @trace async def _auth_and_persist_outliers( self, room_id: str, events: Iterable[EventBase] ) -> None: @@ -1477,6 +1529,16 @@ class FederationEventHandler: """ event_map = {event.event_id: event for event in events} + event_ids = event_map.keys() + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str(event_ids), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(event_ids)), + ) + # filter out any events we have already seen. This might happen because # the events were eagerly pushed to us (eg, during a room join), or because # another thread has raced against us since we decided to request the event. @@ -1593,6 +1655,7 @@ class FederationEventHandler: backfilled=True, ) + @trace async def _check_event_auth( self, origin: Optional[str], event: EventBase, context: EventContext ) -> None: @@ -1631,6 +1694,14 @@ class FederationEventHandler: claimed_auth_events = await self._load_or_fetch_auth_events_for_event( origin, event ) + set_tag( + SynapseTags.RESULT_PREFIX + "claimed_auth_events", + str([ev.event_id for ev in claimed_auth_events]), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "claimed_auth_events.length", + str(len(claimed_auth_events)), + ) # ... and check that the event passes auth at those auth events. # https://spec.matrix.org/v1.3/server-server-api/#checks-performed-on-receipt-of-a-pdu: @@ -1728,6 +1799,7 @@ class FederationEventHandler: ) context.rejected = RejectedReason.AUTH_ERROR + @trace async def _maybe_kick_guest_users(self, event: EventBase) -> None: if event.type != EventTypes.GuestAccess: return @@ -1935,6 +2007,8 @@ class FederationEventHandler: # instead we raise an AuthError, which will make the caller ignore it. raise AuthError(code=HTTPStatus.FORBIDDEN, msg="Auth events could not be found") + @trace + @tag_args async def _get_remote_auth_chain_for_event( self, destination: str, room_id: str, event_id: str ) -> None: @@ -1963,6 +2037,7 @@ class FederationEventHandler: await self._auth_and_persist_outliers(room_id, remote_auth_events) + @trace async def _run_push_actions_and_persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False ) -> None: @@ -2071,8 +2146,17 @@ class FederationEventHandler: self._message_handler.maybe_schedule_expiry(event) if not backfilled: # Never notify for backfilled events - for event in events: - await self._notify_persisted_event(event, max_stream_token) + with start_active_span("notify_persisted_events"): + set_tag( + SynapseTags.RESULT_PREFIX + "event_ids", + str([ev.event_id for ev in events]), + ) + set_tag( + SynapseTags.RESULT_PREFIX + "event_ids.length", + str(len(events)), + ) + for event in events: + await self._notify_persisted_event(event, max_stream_token) return max_stream_token.stream diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index d1fa2cf8ae..482316a1ff 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -310,6 +310,19 @@ class SynapseTags: # The name of the external cache CACHE_NAME = "cache.name" + # Used to tag function arguments + # + # Tag a named arg. The name of the argument should be appended to this prefix. + FUNC_ARG_PREFIX = "ARG." + # Tag extra variadic number of positional arguments (`def foo(first, second, *extras)`) + FUNC_ARGS = "args" + # Tag keyword args + FUNC_KWARGS = "kwargs" + + # Some intermediate result that's interesting to the function. The label for + # the result should be appended to this prefix. + RESULT_PREFIX = "RESULT." + class SynapseBaggage: FORCE_TRACING = "synapse-force-tracing" @@ -967,9 +980,9 @@ def tag_args(func: Callable[P, R]) -> Callable[P, R]: # first argument only if it's named `self` or `cls`. This isn't fool-proof # but handles the idiomatic cases. for i, arg in enumerate(args[1:], start=1): # type: ignore[index] - set_tag("ARG_" + argspec.args[i], str(arg)) - set_tag("args", str(args[len(argspec.args) :])) # type: ignore[index] - set_tag("kwargs", str(kwargs)) + set_tag(SynapseTags.FUNC_ARG_PREFIX + argspec.args[i], str(arg)) + set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :])) # type: ignore[index] + set_tag(SynapseTags.FUNC_KWARGS, str(kwargs)) yield return _custom_sync_async_decorator(func, _wrapping_logic) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index cf98b0ab48..dad3731b9b 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -45,8 +45,14 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, Membership from synapse.events import EventBase from synapse.events.snapshot import EventContext -from synapse.logging import opentracing from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable +from synapse.logging.opentracing import ( + SynapseTags, + active_span, + set_tag, + start_active_span_follows_from, + trace, +) from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases @@ -223,7 +229,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): queue.append(end_item) # also add our active opentracing span to the item so that we get a link back - span = opentracing.active_span() + span = active_span() if span: end_item.parent_opentracing_span_contexts.append(span.context) @@ -234,7 +240,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): res = await make_deferred_yieldable(end_item.deferred.observe()) # add another opentracing span which links to the persist trace. - with opentracing.start_active_span_follows_from( + with start_active_span_follows_from( f"{task.name}_complete", (end_item.opentracing_span_context,) ): pass @@ -266,7 +272,7 @@ class _EventPeristenceQueue(Generic[_PersistResult]): queue = self._get_drainining_queue(room_id) for item in queue: try: - with opentracing.start_active_span_follows_from( + with start_active_span_follows_from( item.task.name, item.parent_opentracing_span_contexts, inherit_force_tracing=True, @@ -355,7 +361,7 @@ class EventsPersistenceStorageController: f"Found an unexpected task type in event persistence queue: {task}" ) - @opentracing.trace + @trace async def persist_events( self, events_and_contexts: Iterable[Tuple[EventBase, EventContext]], @@ -380,9 +386,21 @@ class EventsPersistenceStorageController: PartialStateConflictError: if attempting to persist a partial state event in a room that has been un-partial stated. """ + event_ids: List[str] = [] partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} for event, ctx in events_and_contexts: partitioned.setdefault(event.room_id, []).append((event, ctx)) + event_ids.append(event.event_id) + + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids", + str(event_ids), + ) + set_tag( + SynapseTags.FUNC_ARG_PREFIX + "event_ids.length", + str(len(event_ids)), + ) + set_tag(SynapseTags.FUNC_ARG_PREFIX + "backfilled", str(backfilled)) async def enqueue( item: Tuple[str, List[Tuple[EventBase, EventContext]]] @@ -418,7 +436,7 @@ class EventsPersistenceStorageController: self.main_store.get_room_max_token(), ) - @opentracing.trace + @trace async def persist_event( self, event: EventBase, context: EventContext, backfilled: bool = False ) -> Tuple[EventBase, PersistedEventPosition, RoomStreamToken]: diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index 0c78eb735e..1ad002f57b 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -29,7 +29,7 @@ from typing import ( from synapse.api.constants import EventTypes from synapse.events import EventBase -from synapse.logging.opentracing import trace +from synapse.logging.opentracing import tag_args, trace from synapse.storage.roommember import ProfileInfo from synapse.storage.state import StateFilter from synapse.storage.util.partial_state_events_tracker import ( @@ -229,6 +229,7 @@ class StateStorageController: return {event: event_to_state[event] for event in event_ids} @trace + @tag_args async def get_state_ids_for_events( self, event_ids: Collection[str], @@ -333,6 +334,7 @@ class StateStorageController: ) @trace + @tag_args async def get_state_group_for_events( self, event_ids: Collection[str], @@ -474,6 +476,7 @@ class StateStorageController: prev_stream_id, max_stream_id ) + @trace async def get_current_state( self, room_id: str, state_filter: Optional[StateFilter] = None ) -> StateMap[EventBase]: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 0bc8401f2b..c836078da6 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -712,6 +712,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas # Return all events where not all sets can reach them. return {eid for eid, n in event_to_missing_sets.items() if n} + @trace + @tag_args async def get_oldest_event_ids_with_depth_in_room( self, room_id: str ) -> List[Tuple[str, int]]: @@ -770,6 +772,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas room_id, ) + @trace async def get_insertion_event_backward_extremities_in_room( self, room_id: str ) -> List[Tuple[str, int]]: @@ -1342,6 +1345,8 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas event_results.reverse() return event_results + @trace + @tag_args async def get_successor_events(self, event_id: str) -> List[str]: """Fetch all events that have the given event as a prev event @@ -1378,6 +1383,7 @@ class EventFederationWorkerStore(SignatureWorkerStore, EventsWorkerStore, SQLBas _delete_old_forward_extrem_cache_txn, ) + @trace async def insert_insertion_extremity(self, event_id: str, room_id: str) -> None: await self.db_pool.simple_upsert( table="insertion_event_extremities", diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 5560b38a48..a4010ee28d 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -40,6 +40,7 @@ from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext +from synapse.logging.opentracing import trace from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -145,6 +146,7 @@ class PersistEventsStore: self._backfill_id_gen: AbstractStreamIdGenerator = self.store._backfill_id_gen self._stream_id_gen: AbstractStreamIdGenerator = self.store._stream_id_gen + @trace async def _persist_events_and_state_updates( self, events_and_contexts: List[Tuple[EventBase, EventContext]], diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index b07d812ae2..8a7cdb024d 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -54,6 +54,7 @@ from synapse.logging.context import ( current_context, make_deferred_yieldable, ) +from synapse.logging.opentracing import start_active_span, tag_args, trace from synapse.metrics.background_process_metrics import ( run_as_background_process, wrap_as_background_process, @@ -430,6 +431,8 @@ class EventsWorkerStore(SQLBaseStore): return {e.event_id: e for e in events} + @trace + @tag_args async def get_events_as_list( self, event_ids: Collection[str], @@ -1090,23 +1093,42 @@ class EventsWorkerStore(SQLBaseStore): """ fetched_event_ids: Set[str] = set() fetched_events: Dict[str, _EventRow] = {} - events_to_fetch = event_ids - while events_to_fetch: - row_map = await self._enqueue_events(events_to_fetch) + async def _fetch_event_ids_and_get_outstanding_redactions( + event_ids_to_fetch: Collection[str], + ) -> Collection[str]: + """ + Fetch all of the given event_ids and return any associated redaction event_ids + that we still need to fetch in the next iteration. + """ + row_map = await self._enqueue_events(event_ids_to_fetch) # we need to recursively fetch any redactions of those events redaction_ids: Set[str] = set() - for event_id in events_to_fetch: + for event_id in event_ids_to_fetch: row = row_map.get(event_id) fetched_event_ids.add(event_id) if row: fetched_events[event_id] = row redaction_ids.update(row.redactions) - events_to_fetch = redaction_ids.difference(fetched_event_ids) - if events_to_fetch: - logger.debug("Also fetching redaction events %s", events_to_fetch) + event_ids_to_fetch = redaction_ids.difference(fetched_event_ids) + return event_ids_to_fetch + + # Grab the initial list of events requested + event_ids_to_fetch = await _fetch_event_ids_and_get_outstanding_redactions( + event_ids + ) + # Then go and recursively find all of the associated redactions + with start_active_span("recursively fetching redactions"): + while event_ids_to_fetch: + logger.debug("Also fetching redaction events %s", event_ids_to_fetch) + + event_ids_to_fetch = ( + await _fetch_event_ids_and_get_outstanding_redactions( + event_ids_to_fetch + ) + ) # build a map from event_id to EventBase event_map: Dict[str, EventBase] = {} @@ -1424,6 +1446,8 @@ class EventsWorkerStore(SQLBaseStore): return {r["event_id"] for r in rows} + @trace + @tag_args async def have_seen_events( self, room_id: str, event_ids: Iterable[str] ) -> Set[str]: diff --git a/synapse/storage/util/partial_state_events_tracker.py b/synapse/storage/util/partial_state_events_tracker.py index 466e5137f2..b4bf49dace 100644 --- a/synapse/storage/util/partial_state_events_tracker.py +++ b/synapse/storage/util/partial_state_events_tracker.py @@ -20,6 +20,7 @@ from twisted.internet import defer from twisted.internet.defer import Deferred from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable +from synapse.logging.opentracing import trace_with_opname from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.room import RoomWorkerStore from synapse.util import unwrapFirstError @@ -58,6 +59,7 @@ class PartialStateEventsTracker: for o in observers: o.callback(None) + @trace_with_opname("PartialStateEventsTracker.await_full_state") async def await_full_state(self, event_ids: Collection[str]) -> None: """Wait for all the given events to have full state. @@ -151,6 +153,7 @@ class PartialCurrentStateTracker: for o in observers: o.callback(None) + @trace_with_opname("PartialCurrentStateTracker.await_full_state") async def await_full_state(self, room_id: str) -> None: # We add the deferred immediately so that the DB call to check for # partial state doesn't race when we unpartial the room. -- cgit 1.5.1 From d75512d19ebea6c0f9e38e9f55474fdb6da02b46 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 17 Aug 2022 11:42:01 +0200 Subject: Add forgotten status to Room Details API (#13503) --- changelog.d/13503.feature | 1 + docs/admin_api/rooms.md | 5 +- synapse/rest/admin/rooms.py | 1 + synapse/storage/databases/main/roommember.py | 24 ++++++++++ tests/rest/admin/test_room.py | 1 + tests/storage/test_roommember.py | 70 ++++++++++++++++++++++++++++ 6 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 changelog.d/13503.feature (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13503.feature b/changelog.d/13503.feature new file mode 100644 index 0000000000..4baabd1e32 --- /dev/null +++ b/changelog.d/13503.feature @@ -0,0 +1 @@ +Add forgotten status to Room Details API. \ No newline at end of file diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md index 9aa489e4a3..ac7c54c20e 100644 --- a/docs/admin_api/rooms.md +++ b/docs/admin_api/rooms.md @@ -302,6 +302,8 @@ The following fields are possible in the JSON response body: * `state_events` - Total number of state_events of a room. Complexity of the room. * `room_type` - The type of the room taken from the room's creation event; for example "m.space" if the room is a space. If the room does not define a type, the value will be `null`. +* `forgotten` - Whether all local users have + [forgotten](https://spec.matrix.org/latest/client-server-api/#leaving-rooms) the room. The API is: @@ -330,7 +332,8 @@ A response body like the following is returned: "guest_access": null, "history_visibility": "shared", "state_events": 93534, - "room_type": "m.space" + "room_type": "m.space", + "forgotten": false } ``` diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 9d953d58de..68054ffc28 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -303,6 +303,7 @@ class RoomRestServlet(RestServlet): members = await self.store.get_users_in_room(room_id) ret["joined_local_devices"] = await self.store.count_devices_by_users(members) + ret["forgotten"] = await self.store.is_locally_forgotten_room(room_id) return HTTPStatus.OK, ret diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 5e5f607a14..827c1f1efd 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -1215,6 +1215,30 @@ class RoomMemberWorkerStore(EventsWorkerStore): "get_forgotten_rooms_for_user", _get_forgotten_rooms_for_user_txn ) + async def is_locally_forgotten_room(self, room_id: str) -> bool: + """Returns whether all local users have forgotten this room_id. + + Args: + room_id: The room ID to query. + + Returns: + Whether the room is forgotten. + """ + + sql = """ + SELECT count(*) > 0 FROM local_current_membership + INNER JOIN room_memberships USING (room_id, event_id) + WHERE + room_id = ? + AND forgotten = 0; + """ + + rows = await self.db_pool.execute("is_forgotten_room", None, sql, room_id) + + # `count(*)` returns always an integer + # If any rows still exist it means someone has not forgotten this room yet + return not rows[0][0] + async def get_rooms_user_has_been_in(self, user_id: str) -> Set[str]: """Get all rooms that the user has ever been in. diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index dd5000679a..fd6da557c1 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -1633,6 +1633,7 @@ class RoomTestCase(unittest.HomeserverTestCase): self.assertIn("history_visibility", channel.json_body) self.assertIn("state_events", channel.json_body) self.assertIn("room_type", channel.json_body) + self.assertIn("forgotten", channel.json_body) self.assertEqual(room_id_1, channel.json_body["room_id"]) def test_single_room_devices(self) -> None: diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index 240b02cb9f..ceec690285 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -23,6 +23,7 @@ from synapse.util import Clock from tests import unittest from tests.server import TestHomeServer +from tests.test_utils import event_injection class RoomMemberStoreTestCase(unittest.HomeserverTestCase): @@ -157,6 +158,75 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): # Check that alice's display name is now None self.assertEqual(row[0]["display_name"], None) + def test_room_is_locally_forgotten(self): + """Test that when the last local user has forgotten a room it is known as forgotten.""" + # join two local and one remote user + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.get_success( + event_injection.inject_member_event(self.hs, self.room, self.u_bob, "join") + ) + self.get_success( + event_injection.inject_member_event( + self.hs, self.room, self.u_charlie.to_string(), "join" + ) + ) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # local users leave the room and the room is not forgotten + self.get_success( + event_injection.inject_member_event( + self.hs, self.room, self.u_alice, "leave" + ) + ) + self.get_success( + event_injection.inject_member_event(self.hs, self.room, self.u_bob, "leave") + ) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # first user forgets the room, room is not forgotten + self.get_success(self.store.forget(self.u_alice, self.room)) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # second (last local) user forgets the room and the room is forgotten + self.get_success(self.store.forget(self.u_bob, self.room)) + self.assertTrue( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + def test_join_locally_forgotten_room(self): + """Tests if a user joins a forgotten room the room is not forgotten anymore.""" + self.room = self.helper.create_room_as(self.u_alice, tok=self.t_alice) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # after leaving and forget the room, it is forgotten + self.get_success( + event_injection.inject_member_event( + self.hs, self.room, self.u_alice, "leave" + ) + ) + self.get_success(self.store.forget(self.u_alice, self.room)) + self.assertTrue( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + + # after rejoin the room is not forgotten anymore + self.get_success( + event_injection.inject_member_event( + self.hs, self.room, self.u_alice, "join" + ) + ) + self.assertFalse( + self.get_success(self.store.is_locally_forgotten_room(self.room)) + ) + class CurrentStateMembershipUpdateTestCase(unittest.HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: -- cgit 1.5.1 From 8bdf2bd31ef003f0e89a588d8977d4f689ef6856 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Wed, 17 Aug 2022 18:08:23 +0000 Subject: Fix a bug in the `/event_reports` Admin API which meant that the total count could be larger than the number of results you can actually query for. (#13525) Co-authored-by: Brendan Abolivier --- changelog.d/13525.bugfix | 1 + synapse/storage/databases/main/room.py | 6 ++++++ tests/rest/admin/test_event_reports.py | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 changelog.d/13525.bugfix (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13525.bugfix b/changelog.d/13525.bugfix new file mode 100644 index 0000000000..dbd1adbc88 --- /dev/null +++ b/changelog.d/13525.bugfix @@ -0,0 +1 @@ +Fix a bug in the `/event_reports` Admin API which meant that the total count could be larger than the number of results you can actually query for. \ No newline at end of file diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 0f1f0d11ea..b7d4baa6bb 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -2001,9 +2001,15 @@ class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore): where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" + # We join on room_stats_state despite not using any columns from it + # because the join can influence the number of rows returned; + # e.g. a room that doesn't have state, maybe because it was deleted. + # The query returning the total count should be consistent with + # the query returning the results. sql = """ SELECT COUNT(*) as total_event_reports FROM event_reports AS er + JOIN room_stats_state ON room_stats_state.room_id = er.room_id {} """.format( where_clause diff --git a/tests/rest/admin/test_event_reports.py b/tests/rest/admin/test_event_reports.py index fbc490f46d..8a4e5c3f77 100644 --- a/tests/rest/admin/test_event_reports.py +++ b/tests/rest/admin/test_event_reports.py @@ -410,6 +410,33 @@ class EventReportsTestCase(unittest.HomeserverTestCase): self.assertIn("score", c) self.assertIn("reason", c) + def test_count_correct_despite_table_deletions(self) -> None: + """ + Tests that the count matches the number of rows, even if rows in joined tables + are missing. + """ + + # Delete rows from room_stats_state for one of our rooms. + self.get_success( + self.hs.get_datastores().main.db_pool.simple_delete( + "room_stats_state", {"room_id": self.room_id1}, desc="_" + ) + ) + + channel = self.make_request( + "GET", + self.url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + # The 'total' field is 10 because only 10 reports will actually + # be retrievable since we deleted the rows in the room_stats_state + # table. + self.assertEqual(channel.json_body["total"], 10) + # This is consistent with the number of rows actually returned. + self.assertEqual(len(channel.json_body["event_reports"]), 10) + class EventReportDetailTestCase(unittest.HomeserverTestCase): servlets = [ -- cgit 1.5.1 From 3dd175b628bab5638165f20de9eade36a4e88147 Mon Sep 17 00:00:00 2001 From: Quentin Gliech Date: Mon, 22 Aug 2022 15:17:59 +0200 Subject: `synapse.api.auth.Auth` cleanup: make permission-related methods use `Requester` instead of the `UserID` (#13024) Part of #13019 This changes all the permission-related methods to rely on the Requester instead of the UserID. This is a first step towards enabling scoped access tokens at some point, since I expect the Requester to have scope-related informations in it. It also changes methods which figure out the user/device/appservice out of the access token to return a Requester instead of something else. This avoids having store-related objects in the methods signatures. --- changelog.d/13024.misc | 1 + synapse/api/auth.py | 202 +++++++++++------------ synapse/handlers/auth.py | 17 +- synapse/handlers/directory.py | 24 ++- synapse/handlers/initial_sync.py | 6 +- synapse/handlers/message.py | 23 +-- synapse/handlers/pagination.py | 2 +- synapse/handlers/register.py | 15 +- synapse/handlers/relations.py | 2 +- synapse/handlers/room.py | 4 +- synapse/handlers/room_member.py | 10 +- synapse/handlers/typing.py | 10 +- synapse/http/site.py | 2 +- synapse/rest/admin/_base.py | 10 +- synapse/rest/admin/media.py | 6 +- synapse/rest/admin/rooms.py | 12 +- synapse/rest/admin/users.py | 15 +- synapse/rest/client/profile.py | 4 +- synapse/rest/client/register.py | 3 - synapse/rest/client/room.py | 13 +- synapse/server_notices/server_notices_manager.py | 2 +- synapse/storage/databases/main/registration.py | 2 +- tests/api/test_auth.py | 8 +- tests/handlers/test_typing.py | 8 +- tests/rest/client/test_retention.py | 4 +- tests/rest/client/test_shadow_banned.py | 6 +- 26 files changed, 203 insertions(+), 208 deletions(-) create mode 100644 changelog.d/13024.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13024.misc b/changelog.d/13024.misc new file mode 100644 index 0000000000..aa43c82429 --- /dev/null +++ b/changelog.d/13024.misc @@ -0,0 +1 @@ +Refactor methods in `synapse.api.auth.Auth` to use `Requester` objects everywhere instead of user IDs. diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 523bad0c55..9a1aea083f 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -37,8 +37,7 @@ from synapse.logging.opentracing import ( start_active_span, trace, ) -from synapse.storage.databases.main.registration import TokenLookupResult -from synapse.types import Requester, UserID, create_requester +from synapse.types import Requester, create_requester if TYPE_CHECKING: from synapse.server import HomeServer @@ -70,14 +69,14 @@ class Auth: async def check_user_in_room( self, room_id: str, - user_id: str, + requester: Requester, allow_departed_users: bool = False, ) -> Tuple[str, Optional[str]]: """Check if the user is in the room, or was at some point. Args: room_id: The room to check. - user_id: The user to check. + requester: The user making the request, according to the access token. current_state: Optional map of the current state of the room. If provided then that map is used to check whether they are a @@ -94,6 +93,7 @@ class Auth: membership event ID of the user. """ + user_id = requester.user.to_string() ( membership, member_event_id, @@ -182,96 +182,69 @@ class Auth: access_token = self.get_access_token_from_request(request) - ( - user_id, - device_id, - app_service, - ) = await self._get_appservice_user_id_and_device_id(request) - if user_id and app_service: - if ip_addr and self._track_appservice_user_ips: - await self.store.insert_client_ip( - user_id=user_id, - access_token=access_token, - ip=ip_addr, - user_agent=user_agent, - device_id="dummy-device" - if device_id is None - else device_id, # stubbed - ) - - requester = create_requester( - user_id, app_service=app_service, device_id=device_id + # First check if it could be a request from an appservice + requester = await self._get_appservice_user(request) + if not requester: + # If not, it should be from a regular user + requester = await self.get_user_by_access_token( + access_token, allow_expired=allow_expired ) - request.requester = user_id - return requester - - user_info = await self.get_user_by_access_token( - access_token, allow_expired=allow_expired - ) - token_id = user_info.token_id - is_guest = user_info.is_guest - shadow_banned = user_info.shadow_banned - - # Deny the request if the user account has expired. - if not allow_expired: - if await self._account_validity_handler.is_user_expired( - user_info.user_id - ): - # Raise the error if either an account validity module has determined - # the account has expired, or the legacy account validity - # implementation is enabled and determined the account has expired - raise AuthError( - 403, - "User account has expired", - errcode=Codes.EXPIRED_ACCOUNT, - ) - - device_id = user_info.device_id - - if access_token and ip_addr: + # Deny the request if the user account has expired. + # This check is only done for regular users, not appservice ones. + if not allow_expired: + if await self._account_validity_handler.is_user_expired( + requester.user.to_string() + ): + # Raise the error if either an account validity module has determined + # the account has expired, or the legacy account validity + # implementation is enabled and determined the account has expired + raise AuthError( + 403, + "User account has expired", + errcode=Codes.EXPIRED_ACCOUNT, + ) + + if ip_addr and ( + not requester.app_service or self._track_appservice_user_ips + ): + # XXX(quenting): I'm 95% confident that we could skip setting the + # device_id to "dummy-device" for appservices, and that the only impact + # would be some rows which whould not deduplicate in the 'user_ips' + # table during the transition + recorded_device_id = ( + "dummy-device" + if requester.device_id is None and requester.app_service is not None + else requester.device_id + ) await self.store.insert_client_ip( - user_id=user_info.token_owner, + user_id=requester.authenticated_entity, access_token=access_token, ip=ip_addr, user_agent=user_agent, - device_id=device_id, + device_id=recorded_device_id, ) + # Track also the puppeted user client IP if enabled and the user is puppeting if ( - user_info.user_id != user_info.token_owner + requester.user.to_string() != requester.authenticated_entity and self._track_puppeted_user_ips ): await self.store.insert_client_ip( - user_id=user_info.user_id, + user_id=requester.user.to_string(), access_token=access_token, ip=ip_addr, user_agent=user_agent, - device_id=device_id, + device_id=requester.device_id, ) - if is_guest and not allow_guest: + if requester.is_guest and not allow_guest: raise AuthError( 403, "Guest access not allowed", errcode=Codes.GUEST_ACCESS_FORBIDDEN, ) - # Mark the token as used. This is used to invalidate old refresh - # tokens after some time. - if not user_info.token_used and token_id is not None: - await self.store.mark_access_token_as_used(token_id) - - requester = create_requester( - user_info.user_id, - token_id, - is_guest, - shadow_banned, - device_id, - app_service=app_service, - authenticated_entity=user_info.token_owner, - ) - request.requester = requester return requester except KeyError: @@ -308,9 +281,7 @@ class Auth: 403, "Application service has not registered this user (%s)" % user_id ) - async def _get_appservice_user_id_and_device_id( - self, request: Request - ) -> Tuple[Optional[str], Optional[str], Optional[ApplicationService]]: + async def _get_appservice_user(self, request: Request) -> Optional[Requester]: """ Given a request, reads the request parameters to determine: - whether it's an application service that's making this request @@ -325,15 +296,13 @@ class Auth: Must use `org.matrix.msc3202.device_id` in place of `device_id` for now. Returns: - 3-tuple of - (user ID?, device ID?, application service?) + the application service `Requester` of that request Postconditions: - - If an application service is returned, so is a user ID - - A user ID is never returned without an application service - - A device ID is never returned without a user ID or an application service - - The returned application service, if present, is permitted to control the - returned user ID. + - The `app_service` field in the returned `Requester` is set + - The `user_id` field in the returned `Requester` is either the application + service sender or the controlled user set by the `user_id` URI parameter + - The returned application service is permitted to control the returned user ID. - The returned device ID, if present, has been checked to be a valid device ID for the returned user ID. """ @@ -343,12 +312,12 @@ class Auth: self.get_access_token_from_request(request) ) if app_service is None: - return None, None, None + return None if app_service.ip_range_whitelist: ip_address = IPAddress(request.getClientAddress().host) if ip_address not in app_service.ip_range_whitelist: - return None, None, None + return None # This will always be set by the time Twisted calls us. assert request.args is not None @@ -382,13 +351,15 @@ class Auth: Codes.EXCLUSIVE, ) - return effective_user_id, effective_device_id, app_service + return create_requester( + effective_user_id, app_service=app_service, device_id=effective_device_id + ) async def get_user_by_access_token( self, token: str, allow_expired: bool = False, - ) -> TokenLookupResult: + ) -> Requester: """Validate access token and get user_id from it Args: @@ -405,9 +376,9 @@ class Auth: # First look in the database to see if the access token is present # as an opaque token. - r = await self.store.get_user_by_access_token(token) - if r: - valid_until_ms = r.valid_until_ms + user_info = await self.store.get_user_by_access_token(token) + if user_info: + valid_until_ms = user_info.valid_until_ms if ( not allow_expired and valid_until_ms is not None @@ -419,7 +390,20 @@ class Auth: msg="Access token has expired", soft_logout=True ) - return r + # Mark the token as used. This is used to invalidate old refresh + # tokens after some time. + await self.store.mark_access_token_as_used(user_info.token_id) + + requester = create_requester( + user_id=user_info.user_id, + access_token_id=user_info.token_id, + is_guest=user_info.is_guest, + shadow_banned=user_info.shadow_banned, + device_id=user_info.device_id, + authenticated_entity=user_info.token_owner, + ) + + return requester # If the token isn't found in the database, then it could still be a # macaroon for a guest, so we check that here. @@ -445,11 +429,12 @@ class Auth: "Guest access token used for regular user" ) - return TokenLookupResult( + return create_requester( user_id=user_id, is_guest=True, # all guests get the same device id device_id=GUEST_DEVICE_ID, + authenticated_entity=user_id, ) except ( pymacaroons.exceptions.MacaroonException, @@ -472,32 +457,33 @@ class Auth: request.requester = create_requester(service.sender, app_service=service) return service - async def is_server_admin(self, user: UserID) -> bool: + async def is_server_admin(self, requester: Requester) -> bool: """Check if the given user is a local server admin. Args: - user: user to check + requester: The user making the request, according to the access token. Returns: True if the user is an admin """ - return await self.store.is_server_admin(user) + return await self.store.is_server_admin(requester.user) - async def check_can_change_room_list(self, room_id: str, user: UserID) -> bool: + async def check_can_change_room_list( + self, room_id: str, requester: Requester + ) -> bool: """Determine whether the user is allowed to edit the room's entry in the published room list. Args: - room_id - user + room_id: The room to check. + requester: The user making the request, according to the access token. """ - is_admin = await self.is_server_admin(user) + is_admin = await self.is_server_admin(requester) if is_admin: return True - user_id = user.to_string() - await self.check_user_in_room(room_id, user_id) + await self.check_user_in_room(room_id, requester) # We currently require the user is a "moderator" in the room. We do this # by checking if they would (theoretically) be able to change the @@ -516,7 +502,9 @@ class Auth: send_level = event_auth.get_send_level( EventTypes.CanonicalAlias, "", power_level_event ) - user_level = event_auth.get_user_power_level(user_id, auth_events) + user_level = event_auth.get_user_power_level( + requester.user.to_string(), auth_events + ) return user_level >= send_level @@ -574,16 +562,16 @@ class Auth: @trace async def check_user_in_room_or_world_readable( - self, room_id: str, user_id: str, allow_departed_users: bool = False + self, room_id: str, requester: Requester, allow_departed_users: bool = False ) -> Tuple[str, Optional[str]]: """Checks that the user is or was in the room or the room is world readable. If it isn't then an exception is raised. Args: - room_id: room to check - user_id: user to check - allow_departed_users: if True, accept users that were previously - members but have now departed + room_id: The room to check. + requester: The user making the request, according to the access token. + allow_departed_users: If True, accept users that were previously + members but have now departed. Returns: Resolves to the current membership of the user in the room and the @@ -598,7 +586,7 @@ class Auth: # * The user is a guest user, and has joined the room # else it will throw. return await self.check_user_in_room( - room_id, user_id, allow_departed_users=allow_departed_users + room_id, requester, allow_departed_users=allow_departed_users ) except AuthError: visibility = await self._storage_controllers.state.get_current_state_event( @@ -613,6 +601,6 @@ class Auth: raise UnstableSpecAuthError( 403, "User %s not in room %s, and room previews are disabled" - % (user_id, room_id), + % (requester.user, room_id), errcode=Codes.NOT_JOINED, ) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index bfa5535044..0327fc57a4 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -280,7 +280,7 @@ class AuthHandler: that it isn't stolen by re-authenticating them. Args: - requester: The user, as given by the access token + requester: The user making the request, according to the access token. request: The request sent by the client. @@ -1435,20 +1435,25 @@ class AuthHandler: access_token: access token to be deleted """ - user_info = await self.auth.get_user_by_access_token(access_token) + token = await self.store.get_user_by_access_token(access_token) + if not token: + # At this point, the token should already have been fetched once by + # the caller, so this should not happen, unless of a race condition + # between two delete requests + raise SynapseError(HTTPStatus.UNAUTHORIZED, "Unrecognised access token") await self.store.delete_access_token(access_token) # see if any modules want to know about this await self.password_auth_provider.on_logged_out( - user_id=user_info.user_id, - device_id=user_info.device_id, + user_id=token.user_id, + device_id=token.device_id, access_token=access_token, ) # delete pushers associated with this access token - if user_info.token_id is not None: + if token.token_id is not None: await self.hs.get_pusherpool().remove_pushers_by_access_token( - user_info.user_id, (user_info.token_id,) + token.user_id, (token.token_id,) ) async def delete_access_tokens_for_user( diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 09a7a4b238..948f66a94d 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -30,7 +30,7 @@ from synapse.api.errors import ( from synapse.appservice import ApplicationService from synapse.module_api import NOT_SPAM from synapse.storage.databases.main.directory import RoomAliasMapping -from synapse.types import JsonDict, Requester, RoomAlias, UserID, get_domain_from_id +from synapse.types import JsonDict, Requester, RoomAlias, get_domain_from_id if TYPE_CHECKING: from synapse.server import HomeServer @@ -133,7 +133,7 @@ class DirectoryHandler: else: # Server admins are not subject to the same constraints as normal # users when creating an alias (e.g. being in the room). - is_admin = await self.auth.is_server_admin(requester.user) + is_admin = await self.auth.is_server_admin(requester) if (self.require_membership and check_membership) and not is_admin: rooms_for_user = await self.store.get_rooms_for_user(user_id) @@ -197,7 +197,7 @@ class DirectoryHandler: user_id = requester.user.to_string() try: - can_delete = await self._user_can_delete_alias(room_alias, user_id) + can_delete = await self._user_can_delete_alias(room_alias, requester) except StoreError as e: if e.code == 404: raise NotFoundError("Unknown room alias") @@ -400,7 +400,9 @@ class DirectoryHandler: # either no interested services, or no service with an exclusive lock return True - async def _user_can_delete_alias(self, alias: RoomAlias, user_id: str) -> bool: + async def _user_can_delete_alias( + self, alias: RoomAlias, requester: Requester + ) -> bool: """Determine whether a user can delete an alias. One of the following must be true: @@ -413,7 +415,7 @@ class DirectoryHandler: """ creator = await self.store.get_room_alias_creator(alias.to_string()) - if creator == user_id: + if creator == requester.user.to_string(): return True # Resolve the alias to the corresponding room. @@ -422,9 +424,7 @@ class DirectoryHandler: if not room_id: return False - return await self.auth.check_can_change_room_list( - room_id, UserID.from_string(user_id) - ) + return await self.auth.check_can_change_room_list(room_id, requester) async def edit_published_room_list( self, requester: Requester, room_id: str, visibility: str @@ -463,7 +463,7 @@ class DirectoryHandler: raise SynapseError(400, "Unknown room") can_change_room_list = await self.auth.check_can_change_room_list( - room_id, requester.user + room_id, requester ) if not can_change_room_list: raise AuthError( @@ -528,10 +528,8 @@ class DirectoryHandler: Get a list of the aliases that currently point to this room on this server """ # allow access to server admins and current members of the room - is_admin = await self.auth.is_server_admin(requester.user) + is_admin = await self.auth.is_server_admin(requester) if not is_admin: - await self.auth.check_user_in_room_or_world_readable( - room_id, requester.user.to_string() - ) + await self.auth.check_user_in_room_or_world_readable(room_id, requester) return await self.store.get_aliases_for_room(room_id) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 6484e47e5f..860c82c110 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -309,18 +309,18 @@ class InitialSyncHandler: if blocked: raise SynapseError(403, "This room has been blocked on this server") - user_id = requester.user.to_string() - ( membership, member_event_id, ) = await self.auth.check_user_in_room_or_world_readable( room_id, - user_id, + requester, allow_departed_users=True, ) is_peeking = member_event_id is None + user_id = requester.user.to_string() + if membership == Membership.JOIN: result = await self._room_initial_sync_joined( user_id, room_id, pagin_config, membership, is_peeking diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 8f29ee9a87..acd3de06f6 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -104,7 +104,7 @@ class MessageHandler: async def get_room_data( self, - user_id: str, + requester: Requester, room_id: str, event_type: str, state_key: str, @@ -112,7 +112,7 @@ class MessageHandler: """Get data from a room. Args: - user_id + requester: The user who did the request. room_id event_type state_key @@ -125,7 +125,7 @@ class MessageHandler: membership, membership_event_id, ) = await self.auth.check_user_in_room_or_world_readable( - room_id, user_id, allow_departed_users=True + room_id, requester, allow_departed_users=True ) if membership == Membership.JOIN: @@ -161,11 +161,10 @@ class MessageHandler: async def get_state_events( self, - user_id: str, + requester: Requester, room_id: str, state_filter: Optional[StateFilter] = None, at_token: Optional[StreamToken] = None, - is_guest: bool = False, ) -> List[dict]: """Retrieve all state events for a given room. If the user is joined to the room then return the current state. If the user has @@ -174,14 +173,13 @@ class MessageHandler: visible. Args: - user_id: The user requesting state events. + requester: The user requesting state events. room_id: The room ID to get all state events from. state_filter: The state filter used to fetch state from the database. at_token: the stream token of the at which we are requesting the stats. If the user is not allowed to view the state as of that stream token, we raise a 403 SynapseError. If None, returns the current state based on the current_state_events table. - is_guest: whether this user is a guest Returns: A list of dicts representing state events. [{}, {}, {}] Raises: @@ -191,6 +189,7 @@ class MessageHandler: members of this room. """ state_filter = state_filter or StateFilter.all() + user_id = requester.user.to_string() if at_token: last_event_id = ( @@ -223,7 +222,7 @@ class MessageHandler: membership, membership_event_id, ) = await self.auth.check_user_in_room_or_world_readable( - room_id, user_id, allow_departed_users=True + room_id, requester, allow_departed_users=True ) if membership == Membership.JOIN: @@ -317,12 +316,11 @@ class MessageHandler: Returns: A dict of user_id to profile info """ - user_id = requester.user.to_string() if not requester.app_service: # We check AS auth after fetching the room membership, as it # requires us to pull out all joined members anyway. membership, _ = await self.auth.check_user_in_room_or_world_readable( - room_id, user_id, allow_departed_users=True + room_id, requester, allow_departed_users=True ) if membership != Membership.JOIN: raise SynapseError( @@ -340,7 +338,10 @@ class MessageHandler: # If this is an AS, double check that they are allowed to see the members. # This can either be because the AS user is in the room or because there # is a user in the room that the AS is "interested in" - if requester.app_service and user_id not in users_with_profile: + if ( + requester.app_service + and requester.user.to_string() not in users_with_profile + ): for uid in users_with_profile: if requester.app_service.is_interested_in_user(uid): break diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index e1e34e3b16..74e944bce7 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -464,7 +464,7 @@ class PaginationHandler: membership, member_event_id, ) = await self.auth.check_user_in_room_or_world_readable( - room_id, user_id, allow_departed_users=True + room_id, requester, allow_departed_users=True ) if pagin_config.direction == "b": diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index c77d181722..20ec22105a 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -29,7 +29,13 @@ from synapse.api.constants import ( JoinRules, LoginType, ) -from synapse.api.errors import AuthError, Codes, ConsentNotGivenError, SynapseError +from synapse.api.errors import ( + AuthError, + Codes, + ConsentNotGivenError, + InvalidClientTokenError, + SynapseError, +) from synapse.appservice import ApplicationService from synapse.config.server import is_threepid_reserved from synapse.http.servlet import assert_params_in_dict @@ -180,10 +186,7 @@ class RegistrationHandler: ) if guest_access_token: user_data = await self.auth.get_user_by_access_token(guest_access_token) - if ( - not user_data.is_guest - or UserID.from_string(user_data.user_id).localpart != localpart - ): + if not user_data.is_guest or user_data.user.localpart != localpart: raise AuthError( 403, "Cannot register taken user ID without valid guest " @@ -618,7 +621,7 @@ class RegistrationHandler: user_id = user.to_string() service = self.store.get_app_service_by_token(as_token) if not service: - raise AuthError(403, "Invalid application service token.") + raise InvalidClientTokenError() if not service.is_interested_in_user(user_id): raise SynapseError( 400, diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index 72d25df8c8..28d7093f08 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -103,7 +103,7 @@ class RelationsHandler: # TODO Properly handle a user leaving a room. (_, member_event_id) = await self._auth.check_user_in_room_or_world_readable( - room_id, user_id, allow_departed_users=True + room_id, requester, allow_departed_users=True ) # This gets the original event and checks that a) the event exists and diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 55395457c3..2bf0ebd025 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -721,7 +721,7 @@ class RoomCreationHandler: # allow the server notices mxid to create rooms is_requester_admin = True else: - is_requester_admin = await self.auth.is_server_admin(requester.user) + is_requester_admin = await self.auth.is_server_admin(requester) # Let the third party rules modify the room creation config if needed, or abort # the room creation entirely with an exception. @@ -1279,7 +1279,7 @@ class RoomContextHandler: """ user = requester.user if use_admin_priviledge: - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) before_limit = math.floor(limit / 2.0) after_limit = limit - before_limit diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 70dc69c809..d1909665d6 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -179,7 +179,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): """Try and join a room that this server is not in Args: - requester + requester: The user making the request, according to the access token. remote_room_hosts: List of servers that can be used to join via. room_id: Room that we are trying to join user: User who is trying to join @@ -744,7 +744,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): is_requester_admin = True else: - is_requester_admin = await self.auth.is_server_admin(requester.user) + is_requester_admin = await self.auth.is_server_admin(requester) if not is_requester_admin: if self.config.server.block_non_admin_invites: @@ -868,7 +868,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): bypass_spam_checker = True else: - bypass_spam_checker = await self.auth.is_server_admin(requester.user) + bypass_spam_checker = await self.auth.is_server_admin(requester) inviter = await self._get_inviter(target.to_string(), room_id) if ( @@ -1410,7 +1410,7 @@ class RoomMemberHandler(metaclass=abc.ABCMeta): ShadowBanError if the requester has been shadow-banned. """ if self.config.server.block_non_admin_invites: - is_requester_admin = await self.auth.is_server_admin(requester.user) + is_requester_admin = await self.auth.is_server_admin(requester) if not is_requester_admin: raise SynapseError( 403, "Invites have been disabled on this server", Codes.FORBIDDEN @@ -1693,7 +1693,7 @@ class RoomMemberMasterHandler(RoomMemberHandler): check_complexity and self.hs.config.server.limit_remote_rooms.admins_can_join ): - check_complexity = not await self.auth.is_server_admin(user) + check_complexity = not await self.store.is_server_admin(user) if check_complexity: # Fetch the room complexity diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 27aa0d3126..bcac3372a2 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -253,12 +253,11 @@ class TypingWriterHandler(FollowerTypingHandler): self, target_user: UserID, requester: Requester, room_id: str, timeout: int ) -> None: target_user_id = target_user.to_string() - auth_user_id = requester.user.to_string() if not self.is_mine_id(target_user_id): raise SynapseError(400, "User is not hosted on this homeserver") - if target_user_id != auth_user_id: + if target_user != requester.user: raise AuthError(400, "Cannot set another user's typing state") if requester.shadow_banned: @@ -266,7 +265,7 @@ class TypingWriterHandler(FollowerTypingHandler): await self.clock.sleep(random.randint(1, 10)) raise ShadowBanError() - await self.auth.check_user_in_room(room_id, target_user_id) + await self.auth.check_user_in_room(room_id, requester) logger.debug("%s has started typing in %s", target_user_id, room_id) @@ -289,12 +288,11 @@ class TypingWriterHandler(FollowerTypingHandler): self, target_user: UserID, requester: Requester, room_id: str ) -> None: target_user_id = target_user.to_string() - auth_user_id = requester.user.to_string() if not self.is_mine_id(target_user_id): raise SynapseError(400, "User is not hosted on this homeserver") - if target_user_id != auth_user_id: + if target_user != requester.user: raise AuthError(400, "Cannot set another user's typing state") if requester.shadow_banned: @@ -302,7 +300,7 @@ class TypingWriterHandler(FollowerTypingHandler): await self.clock.sleep(random.randint(1, 10)) raise ShadowBanError() - await self.auth.check_user_in_room(room_id, target_user_id) + await self.auth.check_user_in_room(room_id, requester) logger.debug("%s has stopped typing in %s", target_user_id, room_id) diff --git a/synapse/http/site.py b/synapse/http/site.py index eeec74b78a..1155f3f610 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -226,7 +226,7 @@ class SynapseRequest(Request): # If this is a request where the target user doesn't match the user who # authenticated (e.g. and admin is puppetting a user) then we return both. - if self._requester.user.to_string() != authenticated_entity: + if requester != authenticated_entity: return requester, authenticated_entity return requester, None diff --git a/synapse/rest/admin/_base.py b/synapse/rest/admin/_base.py index 399b205aaf..b467a61dfb 100644 --- a/synapse/rest/admin/_base.py +++ b/synapse/rest/admin/_base.py @@ -19,7 +19,7 @@ from typing import Iterable, Pattern from synapse.api.auth import Auth from synapse.api.errors import AuthError from synapse.http.site import SynapseRequest -from synapse.types import UserID +from synapse.types import Requester def admin_patterns(path_regex: str, version: str = "v1") -> Iterable[Pattern]: @@ -48,19 +48,19 @@ async def assert_requester_is_admin(auth: Auth, request: SynapseRequest) -> None AuthError if the requester is not a server admin """ requester = await auth.get_user_by_req(request) - await assert_user_is_admin(auth, requester.user) + await assert_user_is_admin(auth, requester) -async def assert_user_is_admin(auth: Auth, user_id: UserID) -> None: +async def assert_user_is_admin(auth: Auth, requester: Requester) -> None: """Verify that the given user is an admin user Args: auth: Auth singleton - user_id: user to check + requester: The user making the request, according to the access token. Raises: AuthError if the user is not a server admin """ - is_admin = await auth.is_server_admin(user_id) + is_admin = await auth.is_server_admin(requester) if not is_admin: raise AuthError(HTTPStatus.FORBIDDEN, "You are not a server admin") diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index 19d4a008e8..73470f09ae 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -54,7 +54,7 @@ class QuarantineMediaInRoom(RestServlet): self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) logging.info("Quarantining room: %s", room_id) @@ -81,7 +81,7 @@ class QuarantineMediaByUser(RestServlet): self, request: SynapseRequest, user_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) logging.info("Quarantining media by user: %s", user_id) @@ -110,7 +110,7 @@ class QuarantineMediaByID(RestServlet): self, request: SynapseRequest, server_name: str, media_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) logging.info("Quarantining media by ID: %s/%s", server_name, media_id) diff --git a/synapse/rest/admin/rooms.py b/synapse/rest/admin/rooms.py index 68054ffc28..3d870629c4 100644 --- a/synapse/rest/admin/rooms.py +++ b/synapse/rest/admin/rooms.py @@ -75,7 +75,7 @@ class RoomRestV2Servlet(RestServlet): ) -> Tuple[int, JsonDict]: requester = await self._auth.get_user_by_req(request) - await assert_user_is_admin(self._auth, requester.user) + await assert_user_is_admin(self._auth, requester) content = parse_json_object_from_request(request) @@ -327,7 +327,7 @@ class RoomRestServlet(RestServlet): pagination_handler: "PaginationHandler", ) -> Tuple[int, JsonDict]: requester = await auth.get_user_by_req(request) - await assert_user_is_admin(auth, requester.user) + await assert_user_is_admin(auth, requester) content = parse_json_object_from_request(request) @@ -461,7 +461,7 @@ class JoinRoomAliasServlet(ResolveRoomIdMixin, RestServlet): assert request.args is not None requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) content = parse_json_object_from_request(request) @@ -551,7 +551,7 @@ class MakeRoomAdminRestServlet(ResolveRoomIdMixin, RestServlet): self, request: SynapseRequest, room_identifier: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) content = parse_json_object_from_request(request, allow_empty_body=True) room_id, _ = await self.resolve_room_id(room_identifier) @@ -742,7 +742,7 @@ class RoomEventContextServlet(RestServlet): self, request: SynapseRequest, room_id: str, event_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=False) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) limit = parse_integer(request, "limit", default=10) @@ -834,7 +834,7 @@ class BlockRoomRestServlet(RestServlet): self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: requester = await self._auth.get_user_by_req(request) - await assert_user_is_admin(self._auth, requester.user) + await assert_user_is_admin(self._auth, requester) content = parse_json_object_from_request(request) diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index ba2f7fa6d8..78ee9b6532 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -183,7 +183,7 @@ class UserRestServletV2(RestServlet): self, request: SynapseRequest, user_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) target_user = UserID.from_string(user_id) body = parse_json_object_from_request(request) @@ -575,10 +575,9 @@ class WhoisRestServlet(RestServlet): ) -> Tuple[int, JsonDict]: target_user = UserID.from_string(user_id) requester = await self.auth.get_user_by_req(request) - auth_user = requester.user - if target_user != auth_user: - await assert_user_is_admin(self.auth, auth_user) + if target_user != requester.user: + await assert_user_is_admin(self.auth, requester) if not self.is_mine(target_user): raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only whois a local user") @@ -601,7 +600,7 @@ class DeactivateAccountRestServlet(RestServlet): self, request: SynapseRequest, target_user_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) if not self.is_mine(UserID.from_string(target_user_id)): raise SynapseError( @@ -693,7 +692,7 @@ class ResetPasswordRestServlet(RestServlet): This needs user to have administrator access in Synapse. """ requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) UserID.from_string(target_user_id) @@ -807,7 +806,7 @@ class UserAdminServlet(RestServlet): self, request: SynapseRequest, user_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) auth_user = requester.user target_user = UserID.from_string(user_id) @@ -921,7 +920,7 @@ class UserTokenRestServlet(RestServlet): self, request: SynapseRequest, user_id: str ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) + await assert_user_is_admin(self.auth, requester) auth_user = requester.user if not self.is_mine_id(user_id): diff --git a/synapse/rest/client/profile.py b/synapse/rest/client/profile.py index c16d707909..e69fa0829d 100644 --- a/synapse/rest/client/profile.py +++ b/synapse/rest/client/profile.py @@ -66,7 +66,7 @@ class ProfileDisplaynameRestServlet(RestServlet): ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request, allow_guest=True) user = UserID.from_string(user_id) - is_admin = await self.auth.is_server_admin(requester.user) + is_admin = await self.auth.is_server_admin(requester) content = parse_json_object_from_request(request) @@ -123,7 +123,7 @@ class ProfileAvatarURLRestServlet(RestServlet): ) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) user = UserID.from_string(user_id) - is_admin = await self.auth.is_server_admin(requester.user) + is_admin = await self.auth.is_server_admin(requester) content = parse_json_object_from_request(request) try: diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index 956c45e60a..1b953d3fa0 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -484,9 +484,6 @@ class RegisterRestServlet(RestServlet): "Appservice token must be provided when using a type of m.login.application_service", ) - # Verify the AS - self.auth.get_appservice_by_req(request) - # Set the desired user according to the AS API (which uses the # 'user' key not 'username'). Since this is a new addition, we'll # fallback to 'username' if they gave one. diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 13bc9482c5..0eafbae457 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -229,7 +229,7 @@ class RoomStateEventRestServlet(TransactionRestServlet): msg_handler = self.message_handler data = await msg_handler.get_room_data( - user_id=requester.user.to_string(), + requester=requester, room_id=room_id, event_type=event_type, state_key=state_key, @@ -574,7 +574,7 @@ class RoomMemberListRestServlet(RestServlet): events = await handler.get_state_events( room_id=room_id, - user_id=requester.user.to_string(), + requester=requester, at_token=at_token, state_filter=StateFilter.from_types([(EventTypes.Member, None)]), ) @@ -696,8 +696,7 @@ class RoomStateRestServlet(RestServlet): # Get all the current state for this room events = await self.message_handler.get_state_events( room_id=room_id, - user_id=requester.user.to_string(), - is_guest=requester.is_guest, + requester=requester, ) return 200, events @@ -755,7 +754,7 @@ class RoomEventServlet(RestServlet): == "true" ) if include_unredacted_content and not await self.auth.is_server_admin( - requester.user + requester ): power_level_event = ( await self._storage_controllers.state.get_current_state_event( @@ -1260,9 +1259,7 @@ class TimestampLookupRestServlet(RestServlet): self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: requester = await self._auth.get_user_by_req(request) - await self._auth.check_user_in_room_or_world_readable( - room_id, requester.user.to_string() - ) + await self._auth.check_user_in_room_or_world_readable(room_id, requester) timestamp = parse_integer(request, "ts", required=True) direction = parse_string(request, "dir", default="f", allowed_values=["f", "b"]) diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py index 8ecab86ec7..70d054a8f4 100644 --- a/synapse/server_notices/server_notices_manager.py +++ b/synapse/server_notices/server_notices_manager.py @@ -244,7 +244,7 @@ class ServerNoticesManager: assert self.server_notices_mxid is not None notice_user_data_in_room = await self._message_handler.get_room_data( - self.server_notices_mxid, + create_requester(self.server_notices_mxid), room_id, EventTypes.Member, self.server_notices_mxid, diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index cb63cd9b7d..7fb9c801da 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -69,9 +69,9 @@ class TokenLookupResult: """ user_id: str + token_id: int is_guest: bool = False shadow_banned: bool = False - token_id: Optional[int] = None device_id: Optional[str] = None valid_until_ms: Optional[int] = None token_owner: str = attr.ib() diff --git a/tests/api/test_auth.py b/tests/api/test_auth.py index dfcfaf79b6..e0f363555b 100644 --- a/tests/api/test_auth.py +++ b/tests/api/test_auth.py @@ -284,10 +284,13 @@ class AuthTestCase(unittest.HomeserverTestCase): TokenLookupResult( user_id="@baldrick:matrix.org", device_id="device", + token_id=5, token_owner="@admin:matrix.org", + token_used=True, ) ) self.store.insert_client_ip = simple_async_mock(None) + self.store.mark_access_token_as_used = simple_async_mock(None) request = Mock(args={}) request.getClientAddress.return_value.host = "127.0.0.1" request.args[b"access_token"] = [self.test_token] @@ -301,10 +304,13 @@ class AuthTestCase(unittest.HomeserverTestCase): TokenLookupResult( user_id="@baldrick:matrix.org", device_id="device", + token_id=5, token_owner="@admin:matrix.org", + token_used=True, ) ) self.store.insert_client_ip = simple_async_mock(None) + self.store.mark_access_token_as_used = simple_async_mock(None) request = Mock(args={}) request.getClientAddress.return_value.host = "127.0.0.1" request.args[b"access_token"] = [self.test_token] @@ -347,7 +353,7 @@ class AuthTestCase(unittest.HomeserverTestCase): serialized = macaroon.serialize() user_info = self.get_success(self.auth.get_user_by_access_token(serialized)) - self.assertEqual(user_id, user_info.user_id) + self.assertEqual(user_id, user_info.user.to_string()) self.assertTrue(user_info.is_guest) self.store.get_user_by_id.assert_called_with(user_id) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 7af1333126..8adba29d7f 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -25,7 +25,7 @@ from synapse.api.constants import EduTypes from synapse.api.errors import AuthError from synapse.federation.transport.server import TransportLayerServer from synapse.server import HomeServer -from synapse.types import JsonDict, UserID, create_requester +from synapse.types import JsonDict, Requester, UserID, create_requester from synapse.util import Clock from tests import unittest @@ -117,8 +117,10 @@ class TypingNotificationsTestCase(unittest.HomeserverTestCase): self.room_members = [] - async def check_user_in_room(room_id: str, user_id: str) -> None: - if user_id not in [u.to_string() for u in self.room_members]: + async def check_user_in_room(room_id: str, requester: Requester) -> None: + if requester.user.to_string() not in [ + u.to_string() for u in self.room_members + ]: raise AuthError(401, "User is not in the room") return None diff --git a/tests/rest/client/test_retention.py b/tests/rest/client/test_retention.py index ac9c113354..9c8c1889d3 100644 --- a/tests/rest/client/test_retention.py +++ b/tests/rest/client/test_retention.py @@ -20,7 +20,7 @@ from synapse.api.constants import EventTypes from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer -from synapse.types import JsonDict +from synapse.types import JsonDict, create_requester from synapse.util import Clock from synapse.visibility import filter_events_for_client @@ -188,7 +188,7 @@ class RetentionTestCase(unittest.HomeserverTestCase): message_handler = self.hs.get_message_handler() create_event = self.get_success( message_handler.get_room_data( - self.user_id, room_id, EventTypes.Create, state_key="" + create_requester(self.user_id), room_id, EventTypes.Create, state_key="" ) ) diff --git a/tests/rest/client/test_shadow_banned.py b/tests/rest/client/test_shadow_banned.py index d9bd8c4a28..c50f034b34 100644 --- a/tests/rest/client/test_shadow_banned.py +++ b/tests/rest/client/test_shadow_banned.py @@ -26,7 +26,7 @@ from synapse.rest.client import ( room_upgrade_rest_servlet, ) from synapse.server import HomeServer -from synapse.types import UserID +from synapse.types import UserID, create_requester from synapse.util import Clock from tests import unittest @@ -275,7 +275,7 @@ class ProfileTestCase(_ShadowBannedBase): message_handler = self.hs.get_message_handler() event = self.get_success( message_handler.get_room_data( - self.banned_user_id, + create_requester(self.banned_user_id), room_id, "m.room.member", self.banned_user_id, @@ -310,7 +310,7 @@ class ProfileTestCase(_ShadowBannedBase): message_handler = self.hs.get_message_handler() event = self.get_success( message_handler.get_room_data( - self.banned_user_id, + create_requester(self.banned_user_id), room_id, "m.room.member", self.banned_user_id, -- cgit 1.5.1 From 5e7847dc923142bc68834f9b9538ada3fdd887d5 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Tue, 23 Aug 2022 10:49:59 +0100 Subject: Cache user IDs instead of profile objects (#13573) The profile objects are never used and increase cache size significantly. --- changelog.d/13573.misc | 1 + synapse/handlers/sync.py | 4 +- synapse/state/__init__.py | 13 +++--- synapse/storage/databases/main/roommember.py | 67 ++++++++++++---------------- synapse/util/caches/descriptors.py | 26 ++++++++--- 5 files changed, 57 insertions(+), 54 deletions(-) create mode 100644 changelog.d/13573.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13573.misc b/changelog.d/13573.misc new file mode 100644 index 0000000000..1ce9c0c081 --- /dev/null +++ b/changelog.d/13573.misc @@ -0,0 +1 @@ +Cache user IDs instead of profiles to reduce cache memory usage. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b4d3f3958c..2d95b1fa24 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -2421,10 +2421,10 @@ class SyncHandler: joined_room.room_id, joined_room.event_pos.stream ) ) - users_in_room = await self.state.get_current_users_in_room( + user_ids_in_room = await self.state.get_current_user_ids_in_room( joined_room.room_id, extrems ) - if user_id in users_in_room: + if user_id in user_ids_in_room: joined_room_ids.add(joined_room.room_id) return frozenset(joined_room_ids) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index c355e4f98a..3047e1b1ad 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -44,7 +44,6 @@ from synapse.logging.context import ContextResourceUsage from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.roommember import ProfileInfo from synapse.storage.state import StateFilter from synapse.types import StateMap from synapse.util.async_helpers import Linearizer @@ -210,11 +209,11 @@ class StateHandler: ret = await self.resolve_state_groups_for_events(room_id, event_ids) return await ret.get_state(self._state_storage_controller, state_filter) - async def get_current_users_in_room( + async def get_current_user_ids_in_room( self, room_id: str, latest_event_ids: List[str] - ) -> Dict[str, ProfileInfo]: + ) -> Set[str]: """ - Get the users who are currently in a room. + Get the users IDs who are currently in a room. Note: This is much slower than using the equivalent method `DataStore.get_users_in_room` or `DataStore.get_users_in_room_with_profiles`, @@ -225,15 +224,15 @@ class StateHandler: room_id: The ID of the room. latest_event_ids: Precomputed list of latest event IDs. Will be computed if None. Returns: - Dictionary of user IDs to their profileinfo. + Set of user IDs in the room. """ assert latest_event_ids is not None - logger.debug("calling resolve_state_groups from get_current_users_in_room") + logger.debug("calling resolve_state_groups from get_current_user_ids_in_room") entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids) state = await entry.get_state(self._state_storage_controller, StateFilter.all()) - return await self.store.get_joined_users_from_state(room_id, state, entry) + return await self.store.get_joined_user_ids_from_state(room_id, state, entry) async def get_hosts_in_room_at_events( self, room_id: str, event_ids: Collection[str] diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 827c1f1efd..0eb024a809 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -835,9 +835,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): return shared_room_ids or frozenset() - async def get_joined_users_from_state( + async def get_joined_user_ids_from_state( self, room_id: str, state: StateMap[str], state_entry: "_StateCacheEntry" - ) -> Dict[str, ProfileInfo]: + ) -> Set[str]: state_group: Union[object, int] = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a @@ -848,25 +848,25 @@ class RoomMemberWorkerStore(EventsWorkerStore): assert state_group is not None with Measure(self._clock, "get_joined_users_from_state"): - return await self._get_joined_users_from_context( + return await self._get_joined_user_ids_from_context( room_id, state_group, state, context=state_entry ) @cached(num_args=2, iterable=True, max_entries=100000) - async def _get_joined_users_from_context( + async def _get_joined_user_ids_from_context( self, room_id: str, state_group: Union[object, int], current_state_ids: StateMap[str], event: Optional[EventBase] = None, context: Optional["_StateCacheEntry"] = None, - ) -> Dict[str, ProfileInfo]: + ) -> Set[str]: # We don't use `state_group`, it's there so that we can cache based # on it. However, it's important that it's never None, since two current_states # with a state_group of None are likely to be different. assert state_group is not None - users_in_room = {} + users_in_room = set() member_event_ids = [ e_id for key, e_id in current_state_ids.items() @@ -879,11 +879,11 @@ class RoomMemberWorkerStore(EventsWorkerStore): # If we do then we can reuse that result and simply update it with # any membership changes in `delta_ids` if context.prev_group and context.delta_ids: - prev_res = self._get_joined_users_from_context.cache.get_immediate( + prev_res = self._get_joined_user_ids_from_context.cache.get_immediate( (room_id, context.prev_group), None ) - if prev_res and isinstance(prev_res, dict): - users_in_room = dict(prev_res) + if prev_res and isinstance(prev_res, set): + users_in_room = prev_res member_event_ids = [ e_id for key, e_id in context.delta_ids.items() @@ -891,7 +891,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): ] for etype, state_key in context.delta_ids: if etype == EventTypes.Member: - users_in_room.pop(state_key, None) + users_in_room.discard(state_key) # We check if we have any of the member event ids in the event cache # before we ask the DB @@ -908,42 +908,41 @@ class RoomMemberWorkerStore(EventsWorkerStore): ev_entry = event_map.get(event_id) if ev_entry and not ev_entry.event.rejected_reason: if ev_entry.event.membership == Membership.JOIN: - users_in_room[ev_entry.event.state_key] = ProfileInfo( - display_name=ev_entry.event.content.get("displayname", None), - avatar_url=ev_entry.event.content.get("avatar_url", None), - ) + users_in_room.add(ev_entry.event.state_key) else: missing_member_event_ids.append(event_id) if missing_member_event_ids: - event_to_memberships = await self._get_joined_profiles_from_event_ids( + event_to_memberships = await self._get_user_ids_from_membership_event_ids( missing_member_event_ids ) - users_in_room.update(row for row in event_to_memberships.values() if row) + users_in_room.update(event_to_memberships.values()) if event is not None and event.type == EventTypes.Member: if event.membership == Membership.JOIN: if event.event_id in member_event_ids: - users_in_room[event.state_key] = ProfileInfo( - display_name=event.content.get("displayname", None), - avatar_url=event.content.get("avatar_url", None), - ) + users_in_room.add(event.state_key) return users_in_room - @cached(max_entries=10000) - def _get_joined_profile_from_event_id( + @cached( + max_entries=10000, + # This name matches the old function that has been replaced - the cache name + # is kept here to maintain backwards compatibility. + name="_get_joined_profile_from_event_id", + ) + def _get_user_id_from_membership_event_id( self, event_id: str ) -> Optional[Tuple[str, ProfileInfo]]: raise NotImplementedError() @cachedList( - cached_method_name="_get_joined_profile_from_event_id", + cached_method_name="_get_user_id_from_membership_event_id", list_name="event_ids", ) - async def _get_joined_profiles_from_event_ids( + async def _get_user_ids_from_membership_event_ids( self, event_ids: Iterable[str] - ) -> Dict[str, Optional[Tuple[str, ProfileInfo]]]: + ) -> Dict[str, str]: """For given set of member event_ids check if they point to a join event and if so return the associated user and profile info. @@ -958,21 +957,13 @@ class RoomMemberWorkerStore(EventsWorkerStore): table="room_memberships", column="event_id", iterable=event_ids, - retcols=("user_id", "display_name", "avatar_url", "event_id"), + retcols=("user_id", "event_id"), keyvalues={"membership": Membership.JOIN}, batch_size=1000, - desc="_get_joined_profiles_from_event_ids", + desc="_get_user_ids_from_membership_event_ids", ) - return { - row["event_id"]: ( - row["user_id"], - ProfileInfo( - avatar_url=row["avatar_url"], display_name=row["display_name"] - ), - ) - for row in rows - } + return {row["event_id"]: row["user_id"] for row in rows} @cached(max_entries=10000) async def is_host_joined(self, room_id: str, host: str) -> bool: @@ -1131,12 +1122,12 @@ class RoomMemberWorkerStore(EventsWorkerStore): else: # The cache doesn't match the state group or prev state group, # so we calculate the result from first principles. - joined_users = await self.get_joined_users_from_state( + joined_user_ids = await self.get_joined_user_ids_from_state( room_id, state, state_entry ) cache.hosts_to_joined_users = {} - for user_id in joined_users: + for user_id in joined_user_ids: host = intern_string(get_domain_from_id(user_id)) cache.hosts_to_joined_users.setdefault(host, set()).add(user_id) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 867f315b2a..9d4bc89edb 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -73,8 +73,10 @@ class _CacheDescriptorBase: num_args: Optional[int], uncached_args: Optional[Collection[str]] = None, cache_context: bool = False, + name: Optional[str] = None, ): self.orig = orig + self.name = name or orig.__name__ arg_spec = inspect.getfullargspec(orig) all_args = arg_spec.args @@ -211,7 +213,7 @@ class LruCacheDescriptor(_CacheDescriptorBase): def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]: cache: LruCache[CacheKey, Any] = LruCache( - cache_name=self.orig.__name__, + cache_name=self.name, max_size=self.max_entries, ) @@ -241,7 +243,7 @@ class LruCacheDescriptor(_CacheDescriptorBase): wrapped = cast(_CachedFunction, _wrapped) wrapped.cache = cache - obj.__dict__[self.orig.__name__] = wrapped + obj.__dict__[self.name] = wrapped return wrapped @@ -301,12 +303,14 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): cache_context: bool = False, iterable: bool = False, prune_unread_entries: bool = True, + name: Optional[str] = None, ): super().__init__( orig, num_args=num_args, uncached_args=uncached_args, cache_context=cache_context, + name=name, ) if tree and self.num_args < 2: @@ -321,7 +325,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): def __get__(self, obj: Optional[Any], owner: Optional[Type]) -> Callable[..., Any]: cache: DeferredCache[CacheKey, Any] = DeferredCache( - name=self.orig.__name__, + name=self.name, max_entries=self.max_entries, tree=self.tree, iterable=self.iterable, @@ -372,7 +376,7 @@ class DeferredCacheDescriptor(_CacheDescriptorBase): wrapped.cache = cache wrapped.num_args = self.num_args - obj.__dict__[self.orig.__name__] = wrapped + obj.__dict__[self.name] = wrapped return wrapped @@ -393,6 +397,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): cached_method_name: str, list_name: str, num_args: Optional[int] = None, + name: Optional[str] = None, ): """ Args: @@ -403,7 +408,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): but including list_name) to use as cache keys. Defaults to all named args of the function. """ - super().__init__(orig, num_args=num_args, uncached_args=None) + super().__init__(orig, num_args=num_args, uncached_args=None, name=name) self.list_name = list_name @@ -525,7 +530,7 @@ class DeferredCacheListDescriptor(_CacheDescriptorBase): else: return defer.succeed(results) - obj.__dict__[self.orig.__name__] = wrapped + obj.__dict__[self.name] = wrapped return wrapped @@ -577,6 +582,7 @@ def cached( cache_context: bool = False, iterable: bool = False, prune_unread_entries: bool = True, + name: Optional[str] = None, ) -> Callable[[F], _CachedFunction[F]]: func = lambda orig: DeferredCacheDescriptor( orig, @@ -587,13 +593,18 @@ def cached( cache_context=cache_context, iterable=iterable, prune_unread_entries=prune_unread_entries, + name=name, ) return cast(Callable[[F], _CachedFunction[F]], func) def cachedList( - *, cached_method_name: str, list_name: str, num_args: Optional[int] = None + *, + cached_method_name: str, + list_name: str, + num_args: Optional[int] = None, + name: Optional[str] = None, ) -> Callable[[F], _CachedFunction[F]]: """Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`. @@ -628,6 +639,7 @@ def cachedList( cached_method_name=cached_method_name, list_name=list_name, num_args=num_args, + name=name, ) return cast(Callable[[F], _CachedFunction[F]], func) -- cgit 1.5.1 From aec87a0f9369a3015b2a53469f88d1de274e8b71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 23 Aug 2022 13:15:43 +0100 Subject: Speed up fetching large numbers of push rules (#13592) --- changelog.d/13592.misc | 1 + synapse/replication/slave/storage/push_rule.py | 1 - synapse/storage/databases/main/account_data.py | 3 --- synapse/storage/databases/main/push_rule.py | 6 +----- 4 files changed, 2 insertions(+), 9 deletions(-) create mode 100644 changelog.d/13592.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13592.misc b/changelog.d/13592.misc new file mode 100644 index 0000000000..8f48d557e5 --- /dev/null +++ b/changelog.d/13592.misc @@ -0,0 +1 @@ +Minor speed up of fetching large numbers of push rules. diff --git a/synapse/replication/slave/storage/push_rule.py b/synapse/replication/slave/storage/push_rule.py index 52ee3f7e58..5e65eaf1e0 100644 --- a/synapse/replication/slave/storage/push_rule.py +++ b/synapse/replication/slave/storage/push_rule.py @@ -31,6 +31,5 @@ class SlavedPushRuleStore(SlavedEventStore, PushRulesWorkerStore): self._push_rules_stream_id_gen.advance(instance_name, token) for row in rows: self.get_push_rules_for_user.invalidate((row.user_id,)) - self.get_push_rules_enabled_for_user.invalidate((row.user_id,)) self.push_rules_stream_cache.entity_has_changed(row.user_id, token) return super().process_replication_rows(stream_name, instance_name, token, rows) diff --git a/synapse/storage/databases/main/account_data.py b/synapse/storage/databases/main/account_data.py index 9af9f4f18e..c38b8a9e5a 100644 --- a/synapse/storage/databases/main/account_data.py +++ b/synapse/storage/databases/main/account_data.py @@ -650,9 +650,6 @@ class AccountDataWorkerStore(PushRulesWorkerStore, CacheInvalidationWorkerStore) txn, self.get_account_data_for_room, (user_id,) ) self._invalidate_cache_and_stream(txn, self.get_push_rules_for_user, (user_id,)) - self._invalidate_cache_and_stream( - txn, self.get_push_rules_enabled_for_user, (user_id,) - ) # This user might be contained in the ignored_by cache for other users, # so we have to invalidate it all. self._invalidate_all_cache_and_stream(txn, self.ignored_by) diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 255620f996..5079edd1e0 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -165,7 +165,6 @@ class PushRulesWorkerStore( return _load_rules(rows, enabled_map, self.hs.config.experimental) - @cached(max_entries=5000) async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool]: results = await self.db_pool.simple_select_list( table="push_rules_enable", @@ -229,9 +228,6 @@ class PushRulesWorkerStore( return results - @cachedList( - cached_method_name="get_push_rules_enabled_for_user", list_name="user_ids" - ) async def bulk_get_push_rules_enabled( self, user_ids: Collection[str] ) -> Dict[str, Dict[str, bool]]: @@ -246,6 +242,7 @@ class PushRulesWorkerStore( iterable=user_ids, retcols=("user_name", "rule_id", "enabled"), desc="bulk_get_push_rules_enabled", + batch_size=1000, ) for row in rows: enabled = bool(row["enabled"]) @@ -792,7 +789,6 @@ class PushRuleStore(PushRulesWorkerStore): self.db_pool.simple_insert_txn(txn, "push_rules_stream", values=values) txn.call_after(self.get_push_rules_for_user.invalidate, (user_id,)) - txn.call_after(self.get_push_rules_enabled_for_user.invalidate, (user_id,)) txn.call_after( self.push_rules_stream_cache.entity_has_changed, user_id, stream_id ) -- cgit 1.5.1 From 05c9c7363b09d2517a79915b831ce423c7defc7e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 23 Aug 2022 15:14:05 +0100 Subject: Fix regression caused by #13573 (#13600) Broke in #13573. --- changelog.d/13600.misc | 1 + synapse/storage/databases/main/roommember.py | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 changelog.d/13600.misc (limited to 'synapse/storage/databases/main') diff --git a/changelog.d/13600.misc b/changelog.d/13600.misc new file mode 100644 index 0000000000..1ce9c0c081 --- /dev/null +++ b/changelog.d/13600.misc @@ -0,0 +1 @@ +Cache user IDs instead of profiles to reduce cache memory usage. Contributed by Nick @ Beeper (@fizzadar). diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 0eb024a809..046ad3a11c 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -916,7 +916,9 @@ class RoomMemberWorkerStore(EventsWorkerStore): event_to_memberships = await self._get_user_ids_from_membership_event_ids( missing_member_event_ids ) - users_in_room.update(event_to_memberships.values()) + users_in_room.update( + user_id for user_id in event_to_memberships.values() if user_id + ) if event is not None and event.type == EventTypes.Member: if event.membership == Membership.JOIN: @@ -942,15 +944,15 @@ class RoomMemberWorkerStore(EventsWorkerStore): ) async def _get_user_ids_from_membership_event_ids( self, event_ids: Iterable[str] - ) -> Dict[str, str]: + ) -> Dict[str, Optional[str]]: """For given set of member event_ids check if they point to a join - event and if so return the associated user and profile info. + event. Args: event_ids: The member event IDs to lookup Returns: - Map from event ID to `user_id` and ProfileInfo (or None if not join event). + Map from event ID to `user_id`, or None if event is not a join. """ rows = await self.db_pool.simple_select_many_batch( -- cgit 1.5.1