diff --git a/changelog.d/14911.misc b/changelog.d/14911.misc
new file mode 100644
index 0000000000..93ceaeafc9
--- /dev/null
+++ b/changelog.d/14911.misc
@@ -0,0 +1 @@
+Improve type hints.
diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py
index a0b3854f1b..6592a4a6b7 100644
--- a/scripts-dev/mypy_synapse_plugin.py
+++ b/scripts-dev/mypy_synapse_plugin.py
@@ -16,13 +16,24 @@
can crop up, e.g the cache descriptors.
"""
-from typing import Callable, Optional, Type
+from typing import Callable, Optional, Tuple, Type, Union
+import mypy.types
from mypy.erasetype import remove_instance_last_known_values
-from mypy.nodes import ARG_NAMED_OPT
-from mypy.plugin import MethodSigContext, Plugin
+from mypy.errorcodes import ErrorCode
+from mypy.nodes import ARG_NAMED_OPT, TempNode, Var
+from mypy.plugin import FunctionSigContext, MethodSigContext, Plugin
from mypy.typeops import bind_self
-from mypy.types import CallableType, Instance, NoneType, UnionType
+from mypy.types import (
+ AnyType,
+ CallableType,
+ Instance,
+ NoneType,
+ TupleType,
+ TypeAliasType,
+ UninhabitedType,
+ UnionType,
+)
class SynapsePlugin(Plugin):
@@ -36,9 +47,37 @@ class SynapsePlugin(Plugin):
)
):
return cached_function_method_signature
+
+ if fullname in (
+ "synapse.util.caches.descriptors._CachedFunctionDescriptor.__call__",
+ "synapse.util.caches.descriptors._CachedListFunctionDescriptor.__call__",
+ ):
+ return check_is_cacheable_wrapper
+
return None
+def _get_true_return_type(signature: CallableType) -> mypy.types.Type:
+ """
+ Get the "final" return type of a callable which might return an Awaitable/Deferred.
+ """
+ if isinstance(signature.ret_type, Instance):
+ # If a coroutine, unwrap the coroutine's return type.
+ if signature.ret_type.type.fullname == "typing.Coroutine":
+ return signature.ret_type.args[2]
+
+ # If an awaitable, unwrap the awaitable's final value.
+ elif signature.ret_type.type.fullname == "typing.Awaitable":
+ return signature.ret_type.args[0]
+
+ # If a Deferred, unwrap the Deferred's final value.
+ elif signature.ret_type.type.fullname == "twisted.internet.defer.Deferred":
+ return signature.ret_type.args[0]
+
+ # Otherwise, return the raw value of the function.
+ return signature.ret_type
+
+
def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
"""Fixes the `CachedFunction.__call__` signature to be correct.
@@ -47,16 +86,17 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
1. the `self` argument needs to be marked as "bound";
2. any `cache_context` argument should be removed;
3. an optional keyword argument `on_invalidated` should be added.
+ 4. Wrap the return type to always be a Deferred.
"""
- # First we mark this as a bound function signature.
- signature = bind_self(ctx.default_signature)
+ # 1. Mark this as a bound function signature.
+ signature: CallableType = bind_self(ctx.default_signature)
- # Secondly, we remove any "cache_context" args.
+ # 2. Remove any "cache_context" args.
#
# Note: We should be only doing this if `cache_context=True` is set, but if
# it isn't then the code will raise an exception when its called anyway, so
- # its not the end of the world.
+ # it's not the end of the world.
context_arg_index = None
for idx, name in enumerate(signature.arg_names):
if name == "cache_context":
@@ -72,7 +112,7 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
arg_names.pop(context_arg_index)
arg_kinds.pop(context_arg_index)
- # Third, we add an optional "on_invalidate" argument.
+ # 3. Add an optional "on_invalidate" argument.
#
# This is a either
# - a callable which accepts no input and returns nothing, or
@@ -94,35 +134,16 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
arg_names.append("on_invalidate")
arg_kinds.append(ARG_NAMED_OPT) # Arg is an optional kwarg.
- # Finally we ensure the return type is a Deferred.
- if (
- isinstance(signature.ret_type, Instance)
- and signature.ret_type.type.fullname == "twisted.internet.defer.Deferred"
- ):
- # If it is already a Deferred, nothing to do.
- ret_type = signature.ret_type
- else:
- ret_arg = None
- if isinstance(signature.ret_type, Instance):
- # If a coroutine, wrap the coroutine's return type in a Deferred.
- if signature.ret_type.type.fullname == "typing.Coroutine":
- ret_arg = signature.ret_type.args[2]
-
- # If an awaitable, wrap the awaitable's final value in a Deferred.
- elif signature.ret_type.type.fullname == "typing.Awaitable":
- ret_arg = signature.ret_type.args[0]
-
- # Otherwise, wrap the return value in a Deferred.
- if ret_arg is None:
- ret_arg = signature.ret_type
-
- # This should be able to use ctx.api.named_generic_type, but that doesn't seem
- # to find the correct symbol for anything more than 1 module deep.
- #
- # modules is not part of CheckerPluginInterface. The following is a combination
- # of TypeChecker.named_generic_type and TypeChecker.lookup_typeinfo.
- sym = ctx.api.modules["twisted.internet.defer"].names.get("Deferred") # type: ignore[attr-defined]
- ret_type = Instance(sym.node, [remove_instance_last_known_values(ret_arg)])
+ # 4. Ensure the return type is a Deferred.
+ ret_arg = _get_true_return_type(signature)
+
+ # This should be able to use ctx.api.named_generic_type, but that doesn't seem
+ # to find the correct symbol for anything more than 1 module deep.
+ #
+ # modules is not part of CheckerPluginInterface. The following is a combination
+ # of TypeChecker.named_generic_type and TypeChecker.lookup_typeinfo.
+ sym = ctx.api.modules["twisted.internet.defer"].names.get("Deferred") # type: ignore[attr-defined]
+ ret_type = Instance(sym.node, [remove_instance_last_known_values(ret_arg)])
signature = signature.copy_modified(
arg_types=arg_types,
@@ -134,6 +155,198 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType:
return signature
+def check_is_cacheable_wrapper(ctx: MethodSigContext) -> CallableType:
+ """Asserts that the signature of a method returns a value which can be cached.
+
+ Makes no changes to the provided method signature.
+ """
+ # The true signature, this isn't being modified so this is what will be returned.
+ signature: CallableType = ctx.default_signature
+
+ if not isinstance(ctx.args[0][0], TempNode):
+ ctx.api.note("Cached function is not a TempNode?!", ctx.context) # type: ignore[attr-defined]
+ return signature
+
+ orig_sig = ctx.args[0][0].type
+ if not isinstance(orig_sig, CallableType):
+ ctx.api.fail("Cached 'function' is not a callable", ctx.context)
+ return signature
+
+ check_is_cacheable(orig_sig, ctx)
+
+ return signature
+
+
+def check_is_cacheable(
+ signature: CallableType,
+ ctx: Union[MethodSigContext, FunctionSigContext],
+) -> None:
+ """
+ Check if a callable returns a type which can be cached.
+
+ Args:
+ signature: The callable to check.
+ ctx: The signature context, used for error reporting.
+ """
+ # Unwrap the true return type from the cached function.
+ return_type = _get_true_return_type(signature)
+
+ verbose = ctx.api.options.verbosity >= 1
+ # TODO Technically a cachedList only needs immutable values, but forcing them
+ # to return Mapping instead of Dict is fine.
+ ok, note = is_cacheable(return_type, signature, verbose)
+
+ if ok:
+ message = f"function {signature.name} is @cached, returning {return_type}"
+ else:
+ message = f"function {signature.name} is @cached, but has mutable return value {return_type}"
+
+ if note:
+ message += f" ({note})"
+ message = message.replace("builtins.", "").replace("typing.", "")
+
+ if ok and note:
+ ctx.api.note(message, ctx.context) # type: ignore[attr-defined]
+ elif not ok:
+ ctx.api.fail(message, ctx.context, code=AT_CACHED_MUTABLE_RETURN)
+
+
+# Immutable simple values.
+IMMUTABLE_VALUE_TYPES = {
+ "builtins.bool",
+ "builtins.int",
+ "builtins.float",
+ "builtins.str",
+ "builtins.bytes",
+}
+
+# Types defined in Synapse which are known to be immutable.
+IMMUTABLE_CUSTOM_TYPES = {
+ "synapse.synapse_rust.acl.ServerAclEvaluator",
+ "synapse.synapse_rust.push.FilteredPushRules",
+ # This is technically not immutable, but close enough.
+ "signedjson.types.VerifyKey",
+}
+
+# Immutable containers only if the values are also immutable.
+IMMUTABLE_CONTAINER_TYPES_REQUIRING_IMMUTABLE_ELEMENTS = {
+ "builtins.frozenset",
+ "builtins.tuple",
+ "typing.AbstractSet",
+ "typing.Sequence",
+ "immutabledict.immutabledict",
+}
+
+MUTABLE_CONTAINER_TYPES = {
+ "builtins.set",
+ "builtins.list",
+ "builtins.dict",
+}
+
+AT_CACHED_MUTABLE_RETURN = ErrorCode(
+ "synapse-@cached-mutable",
+ "@cached() should have an immutable return type",
+ "General",
+)
+
+
+def is_cacheable(
+ rt: mypy.types.Type, signature: CallableType, verbose: bool
+) -> Tuple[bool, Optional[str]]:
+ """
+ Check if a particular type is cachable.
+
+ A type is cachable if it is immutable; for complex types this recurses to
+ check each type parameter.
+
+ Returns: a 2-tuple (cacheable, message).
+ - cachable: False means the type is definitely not cacheable;
+ true means anything else.
+ - Optional message.
+ """
+
+ # This should probably be done via a TypeVisitor. Apologies to the reader!
+ if isinstance(rt, AnyType):
+ return True, ("may be mutable" if verbose else None)
+
+ elif isinstance(rt, Instance):
+ if (
+ rt.type.fullname in IMMUTABLE_VALUE_TYPES
+ or rt.type.fullname in IMMUTABLE_CUSTOM_TYPES
+ ):
+ # "Simple" types are generally immutable.
+ return True, None
+
+ elif rt.type.fullname == "typing.Mapping":
+ # Generally mapping keys are immutable, but they only *have* to be
+ # hashable, which doesn't imply immutability. E.g. Mapping[K, V]
+ # is cachable iff K and V are cachable.
+ return is_cacheable(rt.args[0], signature, verbose) and is_cacheable(
+ rt.args[1], signature, verbose
+ )
+
+ elif rt.type.fullname in IMMUTABLE_CONTAINER_TYPES_REQUIRING_IMMUTABLE_ELEMENTS:
+ # E.g. Collection[T] is cachable iff T is cachable.
+ return is_cacheable(rt.args[0], signature, verbose)
+
+ elif rt.type.fullname in MUTABLE_CONTAINER_TYPES:
+ # Mutable containers are mutable regardless of their underlying type.
+ return False, None
+
+ elif "attrs" in rt.type.metadata:
+ # attrs classes are only cachable iff it is frozen (immutable itself)
+ # and all attributes are cachable.
+ frozen = rt.type.metadata["attrs"]["frozen"]
+ if frozen:
+ for attribute in rt.type.metadata["attrs"]["attributes"]:
+ attribute_name = attribute["name"]
+ symbol_node = rt.type.names[attribute_name].node
+ assert isinstance(symbol_node, Var)
+ assert symbol_node.type is not None
+ ok, note = is_cacheable(symbol_node.type, signature, verbose)
+ if not ok:
+ return False, f"non-frozen attrs property: {attribute_name}"
+ # All attributes were frozen.
+ return True, None
+ else:
+ return False, "non-frozen attrs class"
+
+ else:
+ # Ensure we fail for unknown types, these generally means that the
+ # above code is not complete.
+ return (
+ False,
+ f"Don't know how to handle {rt.type.fullname} return type instance",
+ )
+
+ elif isinstance(rt, NoneType):
+ # None is cachable.
+ return True, None
+
+ elif isinstance(rt, (TupleType, UnionType)):
+ # Tuples and unions are cachable iff all their items are cachable.
+ for item in rt.items:
+ ok, note = is_cacheable(item, signature, verbose)
+ if not ok:
+ return False, note
+ # This discards notes but that's probably fine
+ return True, None
+
+ elif isinstance(rt, TypeAliasType):
+ # For a type alias, check if the underlying real type is cachable.
+ return is_cacheable(mypy.types.get_proper_type(rt), signature, verbose)
+
+ elif isinstance(rt, UninhabitedType) and rt.is_noreturn:
+ # There is no return value, just consider it cachable. This is only used
+ # in tests.
+ return True, None
+
+ else:
+ # Ensure we fail for unknown types, these generally means that the
+ # above code is not complete.
+ return False, f"Don't know how to handle {type(rt).__qualname__} return type"
+
+
def plugin(version: str) -> Type[SynapsePlugin]:
# This is the entry point of the plugin, and lets us deal with the fact
# that the mypy plugin interface is *not* stable by looking at the version
diff --git a/synapse/handlers/room_list.py b/synapse/handlers/room_list.py
index bb0bdb8e6f..36e2db8975 100644
--- a/synapse/handlers/room_list.py
+++ b/synapse/handlers/room_list.py
@@ -33,7 +33,7 @@ from synapse.api.errors import (
RequestSendFailed,
SynapseError,
)
-from synapse.types import JsonDict, ThirdPartyInstanceID
+from synapse.types import JsonDict, JsonMapping, ThirdPartyInstanceID
from synapse.util.caches.descriptors import _CacheContext, cached
from synapse.util.caches.response_cache import ResponseCache
@@ -256,7 +256,7 @@ class RoomListHandler:
cache_context: _CacheContext,
with_alias: bool = True,
allow_private: bool = False,
- ) -> Optional[JsonDict]:
+ ) -> Optional[JsonMapping]:
"""Returns the entry for a room
Args:
diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py
index ba99e63d26..39556481ff 100644
--- a/synapse/storage/databases/main/event_push_actions.py
+++ b/synapse/storage/databases/main/event_push_actions.py
@@ -182,6 +182,7 @@ class UserPushAction(EmailPushAction):
profile_tag: str
+# TODO This is used as a cached value and is mutable.
@attr.s(slots=True, auto_attribs=True)
class NotifCounts:
"""
@@ -193,7 +194,7 @@ class NotifCounts:
highlight_count: int = 0
-@attr.s(slots=True, auto_attribs=True)
+@attr.s(slots=True, frozen=True, auto_attribs=True)
class RoomNotifCounts:
"""
The per-user, per-room count of notifications. Used by sync and push.
@@ -201,7 +202,7 @@ class RoomNotifCounts:
main_timeline: NotifCounts
# Map of thread ID to the notification counts.
- threads: Dict[str, NotifCounts]
+ threads: Mapping[str, NotifCounts]
@staticmethod
def empty() -> "RoomNotifCounts":
@@ -483,7 +484,7 @@ class EventPushActionsWorkerStore(ReceiptsWorkerStore, StreamWorkerStore, SQLBas
return room_to_count
- @cached(tree=True, max_entries=5000, iterable=True)
+ @cached(tree=True, max_entries=5000, iterable=True) # type: ignore[synapse-@cached-mutable]
async def get_unread_event_push_actions_by_room_for_user(
self,
room_id: str,
diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py
index b67f780c10..9246b418f5 100644
--- a/synapse/storage/databases/main/relations.py
+++ b/synapse/storage/databases/main/relations.py
@@ -458,7 +458,7 @@ class RelationsWorkerStore(SQLBaseStore):
)
return result is not None
- @cached()
+ @cached() # type: ignore[synapse-@cached-mutable]
async def get_references_for_event(self, event_id: str) -> List[JsonDict]:
raise NotImplementedError()
@@ -512,11 +512,12 @@ class RelationsWorkerStore(SQLBaseStore):
"_get_references_for_events_txn", _get_references_for_events_txn
)
- @cached()
+ @cached() # type: ignore[synapse-@cached-mutable]
def get_applicable_edit(self, event_id: str) -> Optional[EventBase]:
raise NotImplementedError()
- @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids")
+ # TODO: This returns a mutable object, which is generally bad.
+ @cachedList(cached_method_name="get_applicable_edit", list_name="event_ids") # type: ignore[synapse-@cached-mutable]
async def get_applicable_edits(
self, event_ids: Collection[str]
) -> Mapping[str, Optional[EventBase]]:
@@ -598,11 +599,12 @@ class RelationsWorkerStore(SQLBaseStore):
for original_event_id in event_ids
}
- @cached()
+ @cached() # type: ignore[synapse-@cached-mutable]
def get_thread_summary(self, event_id: str) -> Optional[Tuple[int, EventBase]]:
raise NotImplementedError()
- @cachedList(cached_method_name="get_thread_summary", list_name="event_ids")
+ # TODO: This returns a mutable object, which is generally bad.
+ @cachedList(cached_method_name="get_thread_summary", list_name="event_ids") # type: ignore[synapse-@cached-mutable]
async def get_thread_summaries(
self, event_ids: Collection[str]
) -> Mapping[str, Optional[Tuple[int, EventBase]]]:
diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py
index 3755773faa..e93573f315 100644
--- a/synapse/storage/databases/main/roommember.py
+++ b/synapse/storage/databases/main/roommember.py
@@ -275,7 +275,7 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
_get_users_in_room_with_profiles,
)
- @cached(max_entries=100000)
+ @cached(max_entries=100000) # type: ignore[synapse-@cached-mutable]
async def get_room_summary(self, room_id: str) -> Mapping[str, MemberSummary]:
"""Get the details of a room roughly suitable for use by the room
summary extension to /sync. Useful when lazy loading room members.
@@ -1071,7 +1071,8 @@ class RoomMemberWorkerStore(EventsWorkerStore, CacheInvalidationWorkerStore):
)
return {row["event_id"]: row["membership"] for row in rows}
- @cached(max_entries=10000)
+ # TODO This returns a mutable object, which is generally confusing when using a cache.
+ @cached(max_entries=10000) # type: ignore[synapse-@cached-mutable]
def _get_joined_hosts_cache(self, room_id: str) -> "_JoinedHostsCache":
return _JoinedHostsCache()
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 2500381b7b..cbfb32014c 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -45,6 +45,7 @@ class ProfileInfo:
display_name: Optional[str]
+# TODO This is used as a cached value and is mutable.
@attr.s(slots=True, frozen=True, weakref_slot=False, auto_attribs=True)
class MemberSummary:
# A truncated list of (user_id, event_id) tuples for users of a given
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py
index 8514a75a1c..ce736fdf75 100644
--- a/synapse/util/caches/descriptors.py
+++ b/synapse/util/caches/descriptors.py
@@ -36,6 +36,8 @@ from typing import (
)
from weakref import WeakValueDictionary
+import attr
+
from twisted.internet import defer
from twisted.python.failure import Failure
@@ -466,6 +468,35 @@ class _CacheContext:
)
+@attr.s(auto_attribs=True, slots=True, frozen=True)
+class _CachedFunctionDescriptor:
+ """Helper for `@cached`, we name it so that we can hook into it with mypy
+ plugin."""
+
+ max_entries: int
+ num_args: Optional[int]
+ uncached_args: Optional[Collection[str]]
+ tree: bool
+ cache_context: bool
+ iterable: bool
+ prune_unread_entries: bool
+ name: Optional[str]
+
+ def __call__(self, orig: F) -> CachedFunction[F]:
+ d = DeferredCacheDescriptor(
+ orig,
+ max_entries=self.max_entries,
+ num_args=self.num_args,
+ uncached_args=self.uncached_args,
+ tree=self.tree,
+ cache_context=self.cache_context,
+ iterable=self.iterable,
+ prune_unread_entries=self.prune_unread_entries,
+ name=self.name,
+ )
+ return cast(CachedFunction[F], d)
+
+
def cached(
*,
max_entries: int = 1000,
@@ -476,9 +507,8 @@ def cached(
iterable: bool = False,
prune_unread_entries: bool = True,
name: Optional[str] = None,
-) -> Callable[[F], CachedFunction[F]]:
- func = lambda orig: DeferredCacheDescriptor(
- orig,
+) -> _CachedFunctionDescriptor:
+ return _CachedFunctionDescriptor(
max_entries=max_entries,
num_args=num_args,
uncached_args=uncached_args,
@@ -489,7 +519,26 @@ def cached(
name=name,
)
- return cast(Callable[[F], CachedFunction[F]], func)
+
+@attr.s(auto_attribs=True, slots=True, frozen=True)
+class _CachedListFunctionDescriptor:
+ """Helper for `@cachedList`, we name it so that we can hook into it with mypy
+ plugin."""
+
+ cached_method_name: str
+ list_name: str
+ num_args: Optional[int] = None
+ name: Optional[str] = None
+
+ def __call__(self, orig: F) -> CachedFunction[F]:
+ d = DeferredCacheListDescriptor(
+ orig,
+ cached_method_name=self.cached_method_name,
+ list_name=self.list_name,
+ num_args=self.num_args,
+ name=self.name,
+ )
+ return cast(CachedFunction[F], d)
def cachedList(
@@ -498,7 +547,7 @@ def cachedList(
list_name: str,
num_args: Optional[int] = None,
name: Optional[str] = None,
-) -> Callable[[F], CachedFunction[F]]:
+) -> _CachedListFunctionDescriptor:
"""Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`.
Used to do batch lookups for an already created cache. One of the arguments
@@ -527,16 +576,13 @@ def cachedList(
def batch_do_something(self, first_arg, second_args):
...
"""
- func = lambda orig: DeferredCacheListDescriptor(
- orig,
+ return _CachedListFunctionDescriptor(
cached_method_name=cached_method_name,
list_name=list_name,
num_args=num_args,
name=name,
)
- return cast(Callable[[F], CachedFunction[F]], func)
-
def _get_cache_key_builder(
param_names: Sequence[str],
|