diff options
Diffstat (limited to 'synapse')
30 files changed, 959 insertions, 495 deletions
diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 4a204a5823..bb4d53d778 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -42,6 +42,7 @@ from synapse.crypto import context_factory from synapse.events.presence_router import load_legacy_presence_router from synapse.events.spamcheck import load_legacy_spam_checkers from synapse.events.third_party_rules import load_legacy_third_party_event_rules +from synapse.handlers.auth import load_legacy_password_auth_providers from synapse.logging.context import PreserveLoggingContext from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.metrics.jemalloc import setup_jemalloc_stats @@ -379,6 +380,7 @@ async def start(hs: "HomeServer"): load_legacy_spam_checkers(hs) load_legacy_third_party_event_rules(hs) load_legacy_presence_router(hs) + load_legacy_password_auth_providers(hs) # If we've configured an expiry time for caches, start the background job now. setup_expire_lru_cache_entries(hs) diff --git a/synapse/config/password_auth_providers.py b/synapse/config/password_auth_providers.py index 83994df798..f980102b45 100644 --- a/synapse/config/password_auth_providers.py +++ b/synapse/config/password_auth_providers.py @@ -25,6 +25,29 @@ class PasswordAuthProviderConfig(Config): section = "authproviders" def read_config(self, config, **kwargs): + """Parses the old password auth providers config. The config format looks like this: + + password_providers: + # Example config for an LDAP auth provider + - module: "ldap_auth_provider.LdapAuthProvider" + config: + enabled: true + uri: "ldap://ldap.example.com:389" + start_tls: true + base: "ou=users,dc=example,dc=com" + attributes: + uid: "cn" + mail: "email" + name: "givenName" + #bind_dn: + #bind_password: + #filter: "(objectClass=posixAccount)" + + We expect admins to use modules for this feature (which is why it doesn't appear + in the sample config file), but we want to keep support for it around for a bit + for backwards compatibility. + """ + self.password_providers: List[Tuple[Type, Any]] = [] providers = [] @@ -49,33 +72,3 @@ class PasswordAuthProviderConfig(Config): ) self.password_providers.append((provider_class, provider_config)) - - def generate_config_section(self, **kwargs): - return """\ - # Password providers allow homeserver administrators to integrate - # their Synapse installation with existing authentication methods - # ex. LDAP, external tokens, etc. - # - # For more information and known implementations, please see - # https://matrix-org.github.io/synapse/latest/password_auth_providers.html - # - # Note: instances wishing to use SAML or CAS authentication should - # instead use the `saml2_config` or `cas_config` options, - # respectively. - # - password_providers: - # # Example config for an LDAP auth provider - # - module: "ldap_auth_provider.LdapAuthProvider" - # config: - # enabled: true - # uri: "ldap://ldap.example.com:389" - # start_tls: true - # base: "ou=users,dc=example,dc=com" - # attributes: - # uid: "cn" - # mail: "email" - # name: "givenName" - # #bind_dn: - # #bind_password: - # #filter: "(objectClass=posixAccount)" - """ diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 50f2a4c1f4..4f409f31e1 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -90,13 +90,13 @@ class EventBuilder: ) @property - def state_key(self): + def state_key(self) -> str: if self._state_key is not None: return self._state_key raise AttributeError("state_key") - def is_state(self): + def is_state(self) -> bool: return self._state_key is not None async def build( diff --git a/synapse/events/presence_router.py b/synapse/events/presence_router.py index 68b8b19024..a58f313e8b 100644 --- a/synapse/events/presence_router.py +++ b/synapse/events/presence_router.py @@ -14,6 +14,7 @@ import logging from typing import ( TYPE_CHECKING, + Any, Awaitable, Callable, Dict, @@ -33,14 +34,13 @@ if TYPE_CHECKING: GET_USERS_FOR_STATES_CALLBACK = Callable[ [Iterable[UserPresenceState]], Awaitable[Dict[str, Set[UserPresenceState]]] ] -GET_INTERESTED_USERS_CALLBACK = Callable[ - [str], Awaitable[Union[Set[str], "PresenceRouter.ALL_USERS"]] -] +# This must either return a set of strings or the constant PresenceRouter.ALL_USERS. +GET_INTERESTED_USERS_CALLBACK = Callable[[str], Awaitable[Union[Set[str], str]]] logger = logging.getLogger(__name__) -def load_legacy_presence_router(hs: "HomeServer"): +def load_legacy_presence_router(hs: "HomeServer") -> None: """Wrapper that loads a presence router module configured using the old configuration, and registers the hooks they implement. """ @@ -69,9 +69,10 @@ def load_legacy_presence_router(hs: "HomeServer"): if f is None: return None - def run(*args, **kwargs): - # mypy doesn't do well across function boundaries so we need to tell it - # f is definitely not None. + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None return maybe_awaitable(f(*args, **kwargs)) @@ -104,7 +105,7 @@ class PresenceRouter: self, get_users_for_states: Optional[GET_USERS_FOR_STATES_CALLBACK] = None, get_interested_users: Optional[GET_INTERESTED_USERS_CALLBACK] = None, - ): + ) -> None: # PresenceRouter modules are required to implement both of these methods # or neither of them as they are assumed to act in a complementary manner paired_methods = [get_users_for_states, get_interested_users] @@ -142,7 +143,7 @@ class PresenceRouter: # Don't include any extra destinations for presence updates return {} - users_for_states = {} + users_for_states: Dict[str, Set[UserPresenceState]] = {} # run all the callbacks for get_users_for_states and combine the results for callback in self._get_users_for_states_callbacks: try: @@ -171,7 +172,7 @@ class PresenceRouter: return users_for_states - async def get_interested_users(self, user_id: str) -> Union[Set[str], ALL_USERS]: + async def get_interested_users(self, user_id: str) -> Union[Set[str], str]: """ Retrieve a list of users that `user_id` is interested in receiving the presence of. This will be in addition to those they share a room with. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 5ba01eeef9..d7527008c4 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -11,17 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union import attr from frozendict import frozendict +from twisted.internet.defer import Deferred + from synapse.appservice import ApplicationService from synapse.events import EventBase from synapse.logging.context import make_deferred_yieldable, run_in_background -from synapse.types import StateMap +from synapse.types import JsonDict, StateMap if TYPE_CHECKING: + from synapse.storage import Storage from synapse.storage.databases.main import DataStore @@ -112,13 +115,13 @@ class EventContext: @staticmethod def with_state( - state_group, - state_group_before_event, - current_state_ids, - prev_state_ids, - prev_group=None, - delta_ids=None, - ): + state_group: Optional[int], + state_group_before_event: Optional[int], + current_state_ids: Optional[StateMap[str]], + prev_state_ids: Optional[StateMap[str]], + prev_group: Optional[int] = None, + delta_ids: Optional[StateMap[str]] = None, + ) -> "EventContext": return EventContext( current_state_ids=current_state_ids, prev_state_ids=prev_state_ids, @@ -129,22 +132,22 @@ class EventContext: ) @staticmethod - def for_outlier(): + def for_outlier() -> "EventContext": """Return an EventContext instance suitable for persisting an outlier event""" return EventContext( current_state_ids={}, prev_state_ids={}, ) - async def serialize(self, event: EventBase, store: "DataStore") -> dict: + async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` Args: - event (FrozenEvent): The event that this context relates to + event: The event that this context relates to Returns: - dict + The serialized event. """ # We don't serialize the full state dicts, instead they get pulled out @@ -170,17 +173,16 @@ class EventContext: } @staticmethod - def deserialize(storage, input): + def deserialize(storage: "Storage", input: JsonDict) -> "EventContext": """Converts a dict that was produced by `serialize` back into a EventContext. Args: - storage (Storage): Used to convert AS ID to AS object and fetch - state. - input (dict): A dict produced by `serialize` + storage: Used to convert AS ID to AS object and fetch state. + input: A dict produced by `serialize` Returns: - EventContext + The event context. """ context = _AsyncEventContextImpl( # We use the state_group and prev_state_id stuff to pull the @@ -241,22 +243,25 @@ class EventContext: await self._ensure_fetched() return self._current_state_ids - async def get_prev_state_ids(self): + async def get_prev_state_ids(self) -> StateMap[str]: """ Gets the room state map, excluding this event. For a non-state event, this will be the same as get_current_state_ids(). Returns: - dict[(str, str), str]|None: Returns None if state_group - is None, which happens when the associated event is an outlier. - Maps a (type, state_key) to the event ID of the state event matching - this tuple. + Returns {} if state_group is None, which happens when the associated + event is an outlier. + + Maps a (type, state_key) to the event ID of the state event matching + this tuple. """ await self._ensure_fetched() + # There *should* be previous state IDs now. + assert self._prev_state_ids is not None return self._prev_state_ids - def get_cached_current_state_ids(self): + def get_cached_current_state_ids(self) -> Optional[StateMap[str]]: """Gets the current state IDs if we have them already cached. It is an error to access this for a rejected event, since rejected state should @@ -264,16 +269,17 @@ class EventContext: ``rejected`` is set. Returns: - dict[(str, str), str]|None: Returns None if we haven't cached the - state or if state_group is None, which happens when the associated - event is an outlier. + Returns None if we haven't cached the state or if state_group is None + (which happens when the associated event is an outlier). + + Otherwise, returns the the current state IDs. """ if self.rejected: raise RuntimeError("Attempt to access state_ids of rejected event") return self._current_state_ids - async def _ensure_fetched(self): + async def _ensure_fetched(self) -> None: return None @@ -285,46 +291,46 @@ class _AsyncEventContextImpl(EventContext): Attributes: - _storage (Storage) + _storage - _fetching_state_deferred (Deferred|None): Resolves when *_state_ids have - been calculated. None if we haven't started calculating yet + _fetching_state_deferred: Resolves when *_state_ids have been calculated. + None if we haven't started calculating yet - _event_type (str): The type of the event the context is associated with. + _event_type: The type of the event the context is associated with. - _event_state_key (str): The state_key of the event the context is - associated with. + _event_state_key: The state_key of the event the context is associated with. - _prev_state_id (str|None): If the event associated with the context is - a state event, then `_prev_state_id` is the event_id of the state - that was replaced. + _prev_state_id: If the event associated with the context is a state event, + then `_prev_state_id` is the event_id of the state that was replaced. """ # This needs to have a default as we're inheriting - _storage = attr.ib(default=None) - _prev_state_id = attr.ib(default=None) - _event_type = attr.ib(default=None) - _event_state_key = attr.ib(default=None) - _fetching_state_deferred = attr.ib(default=None) + _storage: "Storage" = attr.ib(default=None) + _prev_state_id: Optional[str] = attr.ib(default=None) + _event_type: str = attr.ib(default=None) + _event_state_key: Optional[str] = attr.ib(default=None) + _fetching_state_deferred: Optional["Deferred[None]"] = attr.ib(default=None) - async def _ensure_fetched(self): + async def _ensure_fetched(self) -> None: if not self._fetching_state_deferred: self._fetching_state_deferred = run_in_background(self._fill_out_state) - return await make_deferred_yieldable(self._fetching_state_deferred) + await make_deferred_yieldable(self._fetching_state_deferred) - async def _fill_out_state(self): + async def _fill_out_state(self) -> None: """Called to populate the _current_state_ids and _prev_state_ids attributes by loading from the database. """ if self.state_group is None: return - self._current_state_ids = await self._storage.state.get_state_ids_for_group( + current_state_ids = await self._storage.state.get_state_ids_for_group( self.state_group ) + # Set this separately so mypy knows current_state_ids is not None. + self._current_state_ids = current_state_ids if self._event_state_key is not None: - self._prev_state_ids = dict(self._current_state_ids) + self._prev_state_ids = dict(current_state_ids) key = (self._event_type, self._event_state_key) if self._prev_state_id: @@ -332,10 +338,12 @@ class _AsyncEventContextImpl(EventContext): else: self._prev_state_ids.pop(key, None) else: - self._prev_state_ids = self._current_state_ids + self._prev_state_ids = current_state_ids -def _encode_state_dict(state_dict): +def _encode_state_dict( + state_dict: Optional[StateMap[str]], +) -> Optional[List[Tuple[str, str, str]]]: """Since dicts of (type, state_key) -> event_id cannot be serialized in JSON we need to convert them to a form that can. """ @@ -345,7 +353,9 @@ def _encode_state_dict(state_dict): return [(etype, state_key, v) for (etype, state_key), v in state_dict.items()] -def _decode_state_dict(input): +def _decode_state_dict( + input: Optional[List[Tuple[str, str, str]]] +) -> Optional[StateMap[str]]: """Decodes a state dict encoded using `_encode_state_dict` above""" if input is None: return None diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index ae4c8ab257..3134beb8d3 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -77,7 +77,7 @@ CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK = Callable[ ] -def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): +def load_legacy_spam_checkers(hs: "synapse.server.HomeServer") -> None: """Wrapper that loads spam checkers configured using the old configuration, and registers the spam checker hooks they implement. """ @@ -129,9 +129,9 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): request_info: Collection[Tuple[str, str]], auth_provider_id: Optional[str], ) -> Union[Awaitable[RegistrationBehaviour], RegistrationBehaviour]: - # We've already made sure f is not None above, but mypy doesn't - # do well across function boundaries so we need to tell it f is - # definitely not None. + # Assertion required because mypy can't prove we won't + # change `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None return f( @@ -146,9 +146,10 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): "Bad signature for callback check_registration_for_spam", ) - def run(*args, **kwargs): - # mypy doesn't do well across function boundaries so we need to tell it - # wrapped_func is definitely not None. + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert wrapped_func is not None return maybe_awaitable(wrapped_func(*args, **kwargs)) @@ -165,7 +166,7 @@ def load_legacy_spam_checkers(hs: "synapse.server.HomeServer"): class SpamChecker: - def __init__(self): + def __init__(self) -> None: self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] self._user_may_join_room_callbacks: List[USER_MAY_JOIN_ROOM_CALLBACK] = [] self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = [] @@ -209,7 +210,7 @@ class SpamChecker: CHECK_REGISTRATION_FOR_SPAM_CALLBACK ] = None, check_media_file_for_spam: Optional[CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK] = None, - ): + ) -> None: """Register callbacks from module for each hook.""" if check_event_for_spam is not None: self._check_event_for_spam_callbacks.append(check_event_for_spam) @@ -275,7 +276,9 @@ class SpamChecker: return False - async def user_may_join_room(self, user_id: str, room_id: str, is_invited: bool): + async def user_may_join_room( + self, user_id: str, room_id: str, is_invited: bool + ) -> bool: """Checks if a given users is allowed to join a room. Not called when a user creates a room. @@ -285,7 +288,7 @@ class SpamChecker: is_invited: Whether the user is invited into the room Returns: - bool: Whether the user may join the room + Whether the user may join the room """ for callback in self._user_may_join_room_callbacks: if await callback(user_id, room_id, is_invited) is False: diff --git a/synapse/events/third_party_rules.py b/synapse/events/third_party_rules.py index 976d9fa446..2a6dabdab6 100644 --- a/synapse/events/third_party_rules.py +++ b/synapse/events/third_party_rules.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Awaitable, Callable, List, Optional, Tuple from synapse.api.errors import SynapseError from synapse.events import EventBase @@ -38,7 +38,7 @@ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK = Callable[ ] -def load_legacy_third_party_event_rules(hs: "HomeServer"): +def load_legacy_third_party_event_rules(hs: "HomeServer") -> None: """Wrapper that loads a third party event rules module configured using the old configuration, and registers the hooks they implement. """ @@ -77,9 +77,9 @@ def load_legacy_third_party_event_rules(hs: "HomeServer"): event: EventBase, state_events: StateMap[EventBase], ) -> Tuple[bool, Optional[dict]]: - # We've already made sure f is not None above, but mypy doesn't do well - # across function boundaries so we need to tell it f is definitely not - # None. + # Assertion required because mypy can't prove we won't change + # `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None res = await f(event, state_events) @@ -98,9 +98,9 @@ def load_legacy_third_party_event_rules(hs: "HomeServer"): async def wrap_on_create_room( requester: Requester, config: dict, is_requester_admin: bool ) -> None: - # We've already made sure f is not None above, but mypy doesn't do well - # across function boundaries so we need to tell it f is definitely not - # None. + # Assertion required because mypy can't prove we won't change + # `f` back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None res = await f(requester, config, is_requester_admin) @@ -112,9 +112,10 @@ def load_legacy_third_party_event_rules(hs: "HomeServer"): return wrap_on_create_room - def run(*args, **kwargs): - # mypy doesn't do well across function boundaries so we need to tell it - # f is definitely not None. + def run(*args: Any, **kwargs: Any) -> Awaitable: + # Assertion required because mypy can't prove we won't change `f` + # back to `None`. See + # https://mypy.readthedocs.io/en/latest/common_issues.html#narrowing-and-inner-functions assert f is not None return maybe_awaitable(f(*args, **kwargs)) @@ -162,7 +163,7 @@ class ThirdPartyEventRules: check_visibility_can_be_modified: Optional[ CHECK_VISIBILITY_CAN_BE_MODIFIED_CALLBACK ] = None, - ): + ) -> None: """Register callbacks from modules for each hook.""" if check_event_allowed is not None: self._check_event_allowed_callbacks.append(check_event_allowed) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 520edbbf61..23bd24d963 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -13,18 +13,32 @@ # limitations under the License. import collections.abc import re -from typing import Any, Mapping, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Mapping, + Optional, + Union, +) from frozendict import frozendict from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersion +from synapse.types import JsonDict from synapse.util.async_helpers import yieldable_gather_results from synapse.util.frozenutils import unfreeze from . import EventBase +if TYPE_CHECKING: + from synapse.server import HomeServer + # Split strings on "." but not "\." This uses a negative lookbehind assertion for '\' # (?<!stuff) matches if the current position in the string is not preceded # by a match for 'stuff'. @@ -65,7 +79,7 @@ def prune_event(event: EventBase) -> EventBase: return pruned_event -def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: +def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDict: """Redacts the event_dict in the same way as `prune_event`, except it operates on dicts rather than event objects @@ -97,7 +111,7 @@ def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: new_content = {} - def add_fields(*fields): + def add_fields(*fields: str) -> None: for field in fields: if field in event_dict["content"]: new_content[field] = event_dict["content"][field] @@ -151,7 +165,7 @@ def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: allowed_fields["content"] = new_content - unsigned = {} + unsigned: JsonDict = {} allowed_fields["unsigned"] = unsigned event_unsigned = event_dict.get("unsigned", {}) @@ -164,16 +178,16 @@ def prune_event_dict(room_version: RoomVersion, event_dict: dict) -> dict: return allowed_fields -def _copy_field(src, dst, field): +def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None: """Copy the field in 'src' to 'dst'. For example, if src={"foo":{"bar":5}} and dst={}, and field=["foo","bar"] then dst={"foo":{"bar":5}}. Args: - src(dict): The dict to read from. - dst(dict): The dict to modify. - field(list<str>): List of keys to drill down to in 'src'. + src: The dict to read from. + dst: The dict to modify. + field: List of keys to drill down to in 'src'. """ if len(field) == 0: # this should be impossible return @@ -205,7 +219,7 @@ def _copy_field(src, dst, field): sub_out_dict[key_to_move] = sub_dict[key_to_move] -def only_fields(dictionary, fields): +def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: """Return a new dict with only the fields in 'dictionary' which are present in 'fields'. @@ -215,11 +229,11 @@ def only_fields(dictionary, fields): A literal '.' character in a field name may be escaped using a '\'. Args: - dictionary(dict): The dictionary to read from. - fields(list<str>): A list of fields to copy over. Only shallow refs are + dictionary: The dictionary to read from. + fields: A list of fields to copy over. Only shallow refs are taken. Returns: - dict: A new dictionary with only the given fields. If fields was empty, + A new dictionary with only the given fields. If fields was empty, the same dictionary is returned. """ if len(fields) == 0: @@ -235,17 +249,17 @@ def only_fields(dictionary, fields): [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields ] - output = {} + output: JsonDict = {} for field_array in split_fields: _copy_field(dictionary, output, field_array) return output -def format_event_raw(d): +def format_event_raw(d: JsonDict) -> JsonDict: return d -def format_event_for_client_v1(d): +def format_event_for_client_v1(d: JsonDict) -> JsonDict: d = format_event_for_client_v2(d) sender = d.get("sender") @@ -267,7 +281,7 @@ def format_event_for_client_v1(d): return d -def format_event_for_client_v2(d): +def format_event_for_client_v2(d: JsonDict) -> JsonDict: drop_keys = ( "auth_events", "prev_events", @@ -282,37 +296,37 @@ def format_event_for_client_v2(d): return d -def format_event_for_client_v2_without_room_id(d): +def format_event_for_client_v2_without_room_id(d: JsonDict) -> JsonDict: d = format_event_for_client_v2(d) d.pop("room_id", None) return d def serialize_event( - e, - time_now_ms, - as_client_event=True, - event_format=format_event_for_client_v1, - token_id=None, - only_event_fields=None, - include_stripped_room_state=False, -): + e: Union[JsonDict, EventBase], + time_now_ms: int, + as_client_event: bool = True, + event_format: Callable[[JsonDict], JsonDict] = format_event_for_client_v1, + token_id: Optional[str] = None, + only_event_fields: Optional[List[str]] = None, + include_stripped_room_state: bool = False, +) -> JsonDict: """Serialize event for clients Args: - e (EventBase) - time_now_ms (int) - as_client_event (bool) + e + time_now_ms + as_client_event event_format token_id only_event_fields - include_stripped_room_state (bool): Some events can have stripped room state + include_stripped_room_state: Some events can have stripped room state stored in the `unsigned` field. This is required for invite and knock functionality. If this option is False, that state will be removed from the event before it is returned. Otherwise, it will be kept. Returns: - dict + The serialized event dictionary. """ # FIXME(erikj): To handle the case of presence events and the like @@ -369,25 +383,29 @@ class EventClientSerializer: clients. """ - def __init__(self, hs): + def __init__(self, hs: "HomeServer"): self.store = hs.get_datastore() self.experimental_msc1849_support_enabled = ( hs.config.server.experimental_msc1849_support_enabled ) async def serialize_event( - self, event, time_now, bundle_aggregations=True, **kwargs - ): + self, + event: Union[JsonDict, EventBase], + time_now: int, + bundle_aggregations: bool = True, + **kwargs: Any, + ) -> JsonDict: """Serializes a single event. Args: - event (EventBase) - time_now (int): The current time in milliseconds - bundle_aggregations (bool): Whether to bundle in related events + event + time_now: The current time in milliseconds + bundle_aggregations: Whether to bundle in related events **kwargs: Arguments to pass to `serialize_event` Returns: - dict: The serialized event + The serialized event """ # To handle the case of presence events and the like if not isinstance(event, EventBase): @@ -448,25 +466,27 @@ class EventClientSerializer: return serialized_event - def serialize_events(self, events, time_now, **kwargs): + async def serialize_events( + self, events: Iterable[Union[JsonDict, EventBase]], time_now: int, **kwargs: Any + ) -> List[JsonDict]: """Serializes multiple events. Args: - event (iter[EventBase]) - time_now (int): The current time in milliseconds + event + time_now: The current time in milliseconds **kwargs: Arguments to pass to `serialize_event` Returns: - Deferred[list[dict]]: The list of serialized events + The list of serialized events """ - return yieldable_gather_results( + return await yieldable_gather_results( self.serialize_event, events, time_now=time_now, **kwargs ) def copy_power_levels_contents( old_power_levels: Mapping[str, Union[int, Mapping[str, int]]] -): +) -> Dict[str, Union[int, Dict[str, int]]]: """Copy the content of a power_levels event, unfreezing frozendicts along the way Raises: @@ -475,7 +495,7 @@ def copy_power_levels_contents( if not isinstance(old_power_levels, collections.abc.Mapping): raise TypeError("Not a valid power-levels content: %r" % (old_power_levels,)) - power_levels = {} + power_levels: Dict[str, Union[int, Dict[str, int]]] = {} for k, v in old_power_levels.items(): if isinstance(v, int): @@ -483,7 +503,8 @@ def copy_power_levels_contents( continue if isinstance(v, collections.abc.Mapping): - power_levels[k] = h = {} + h: Dict[str, int] = {} + power_levels[k] = h for k1, v1 in v.items(): # we should only have one level of nesting if not isinstance(v1, int): @@ -498,7 +519,7 @@ def copy_power_levels_contents( return power_levels -def validate_canonicaljson(value: Any): +def validate_canonicaljson(value: Any) -> None: """ Ensure that the JSON object is valid according to the rules of canonical JSON. diff --git a/synapse/events/validator.py b/synapse/events/validator.py index 6eb6544c4c..4d459c17f1 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import collections.abc -from typing import Union +from typing import Iterable, Union import jsonschema @@ -28,11 +28,11 @@ from synapse.events.utils import ( validate_canonicaljson, ) from synapse.federation.federation_server import server_matches_acl_event -from synapse.types import EventID, RoomID, UserID +from synapse.types import EventID, JsonDict, RoomID, UserID class EventValidator: - def validate_new(self, event: EventBase, config: HomeServerConfig): + def validate_new(self, event: EventBase, config: HomeServerConfig) -> None: """Validates the event has roughly the right format Args: @@ -116,7 +116,7 @@ class EventValidator: errcode=Codes.BAD_JSON, ) - def _validate_retention(self, event: EventBase): + def _validate_retention(self, event: EventBase) -> None: """Checks that an event that defines the retention policy for a room respects the format enforced by the spec. @@ -156,7 +156,7 @@ class EventValidator: errcode=Codes.BAD_JSON, ) - def validate_builder(self, event: Union[EventBase, EventBuilder]): + def validate_builder(self, event: Union[EventBase, EventBuilder]) -> None: """Validates that the builder/event has roughly the right format. Only checks values that we expect a proto event to have, rather than all the fields an event would have @@ -204,14 +204,14 @@ class EventValidator: self._ensure_state_event(event) - def _ensure_strings(self, d, keys): + def _ensure_strings(self, d: JsonDict, keys: Iterable[str]) -> None: for s in keys: if s not in d: raise SynapseError(400, "'%s' not in content" % (s,)) if not isinstance(d[s], str): raise SynapseError(400, "'%s' not a string type" % (s,)) - def _ensure_state_event(self, event): + def _ensure_state_event(self, event: Union[EventBase, EventBuilder]) -> None: if not event.is_state(): raise SynapseError(400, "'%s' must be state events" % (event.type,)) @@ -244,7 +244,9 @@ POWER_LEVELS_SCHEMA = { } -def _create_power_level_validator(): +# This could return something newer than Draft 7, but that's the current "latest" +# validator. +def _create_power_level_validator() -> jsonschema.Draft7Validator: validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA) # by default jsonschema does not consider a frozendict to be an object so diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index f4612a5b92..ebe75a9e9b 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -200,46 +200,13 @@ class AuthHandler: self.bcrypt_rounds = hs.config.registration.bcrypt_rounds - # we can't use hs.get_module_api() here, because to do so will create an - # import loop. - # - # TODO: refactor this class to separate the lower-level stuff that - # ModuleApi can use from the higher-level stuff that uses ModuleApi, as - # better way to break the loop - account_handler = ModuleApi(hs, self) - - self.password_providers = [ - PasswordProvider.load(module, config, account_handler) - for module, config in hs.config.authproviders.password_providers - ] - - logger.info("Extra password_providers: %s", self.password_providers) + self.password_auth_provider = hs.get_password_auth_provider() self.hs = hs # FIXME better possibility to access registrationHandler later? self.macaroon_gen = hs.get_macaroon_generator() self._password_enabled = hs.config.auth.password_enabled self._password_localdb_enabled = hs.config.auth.password_localdb_enabled - # start out by assuming PASSWORD is enabled; we will remove it later if not. - login_types = set() - if self._password_localdb_enabled: - login_types.add(LoginType.PASSWORD) - - for provider in self.password_providers: - login_types.update(provider.get_supported_login_types().keys()) - - if not self._password_enabled: - login_types.discard(LoginType.PASSWORD) - - # Some clients just pick the first type in the list. In this case, we want - # them to use PASSWORD (rather than token or whatever), so we want to make sure - # that comes first, where it's present. - self._supported_login_types = [] - if LoginType.PASSWORD in login_types: - self._supported_login_types.append(LoginType.PASSWORD) - login_types.remove(LoginType.PASSWORD) - self._supported_login_types.extend(login_types) - # Ratelimiter for failed auth during UIA. Uses same ratelimit config # as per `rc_login.failed_attempts`. self._failed_uia_attempts_ratelimiter = Ratelimiter( @@ -427,11 +394,10 @@ class AuthHandler: ui_auth_types.add(LoginType.PASSWORD) # also allow auth from password providers - for provider in self.password_providers: - for t in provider.get_supported_login_types().keys(): - if t == LoginType.PASSWORD and not self._password_enabled: - continue - ui_auth_types.add(t) + for t in self.password_auth_provider.get_supported_login_types().keys(): + if t == LoginType.PASSWORD and not self._password_enabled: + continue + ui_auth_types.add(t) # if sso is enabled, allow the user to log in via SSO iff they have a mapping # from sso to mxid. @@ -1038,7 +1004,25 @@ class AuthHandler: Returns: login types """ - return self._supported_login_types + # Load any login types registered by modules + # This is stored in the password_auth_provider so this doesn't trigger + # any callbacks + types = list(self.password_auth_provider.get_supported_login_types().keys()) + + # This list should include PASSWORD if (either _password_localdb_enabled is + # true or if one of the modules registered it) AND _password_enabled is true + # Also: + # Some clients just pick the first type in the list. In this case, we want + # them to use PASSWORD (rather than token or whatever), so we want to make sure + # that comes first, where it's present. + if LoginType.PASSWORD in types: + types.remove(LoginType.PASSWORD) + if self._password_enabled: + types.insert(0, LoginType.PASSWORD) + elif self._password_localdb_enabled and self._password_enabled: + types.insert(0, LoginType.PASSWORD) + + return types async def validate_login( self, @@ -1217,15 +1201,20 @@ class AuthHandler: known_login_type = False - for provider in self.password_providers: - supported_login_types = provider.get_supported_login_types() - if login_type not in supported_login_types: - # this password provider doesn't understand this login type - continue - + # Check if login_type matches a type registered by one of the modules + # We don't need to remove LoginType.PASSWORD from the list if password login is + # disabled, since if that were the case then by this point we know that the + # login_type is not LoginType.PASSWORD + supported_login_types = self.password_auth_provider.get_supported_login_types() + # check if the login type being used is supported by a module + if login_type in supported_login_types: + # Make a note that this login type is supported by the server known_login_type = True + # Get all the fields expected for this login types login_fields = supported_login_types[login_type] + # go through the login submission and keep track of which required fields are + # provided/not provided missing_fields = [] login_dict = {} for f in login_fields: @@ -1233,6 +1222,7 @@ class AuthHandler: missing_fields.append(f) else: login_dict[f] = login_submission[f] + # raise an error if any of the expected fields for that login type weren't provided if missing_fields: raise SynapseError( 400, @@ -1240,10 +1230,15 @@ class AuthHandler: % (login_type, missing_fields), ) - result = await provider.check_auth(username, login_type, login_dict) + # call all of the check_auth hooks for that login_type + # it will return a result once the first success is found (or None otherwise) + result = await self.password_auth_provider.check_auth( + username, login_type, login_dict + ) if result: return result + # if no module managed to authenticate the user, then fallback to built in password based auth if login_type == LoginType.PASSWORD and self._password_localdb_enabled: known_login_type = True @@ -1282,11 +1277,16 @@ class AuthHandler: completed login/registration, or `None`. If authentication was unsuccessful, `user_id` and `callback` are both `None`. """ - for provider in self.password_providers: - result = await provider.check_3pid_auth(medium, address, password) - if result: - return result + # call all of the check_3pid_auth callbacks + # Result will be from the first callback that returns something other than None + # If all the callbacks return None, then result is also set to None + result = await self.password_auth_provider.check_3pid_auth( + medium, address, password + ) + if result: + return result + # if result is None then return (None, None) return None, None async def _check_local_password(self, user_id: str, password: str) -> Optional[str]: @@ -1365,13 +1365,12 @@ class AuthHandler: user_info = await self.auth.get_user_by_access_token(access_token) await self.store.delete_access_token(access_token) - # see if any of our auth providers want to know about this - for provider in self.password_providers: - await provider.on_logged_out( - user_id=user_info.user_id, - device_id=user_info.device_id, - access_token=access_token, - ) + # see if any modules want to know about this + await self.password_auth_provider.on_logged_out( + user_id=user_info.user_id, + device_id=user_info.device_id, + access_token=access_token, + ) # delete pushers associated with this access token if user_info.token_id is not None: @@ -1398,12 +1397,11 @@ class AuthHandler: user_id, except_token_id=except_token_id, device_id=device_id ) - # see if any of our auth providers want to know about this - for provider in self.password_providers: - for token, _, device_id in tokens_and_devices: - await provider.on_logged_out( - user_id=user_id, device_id=device_id, access_token=token - ) + # see if any modules want to know about this + for token, _, device_id in tokens_and_devices: + await self.password_auth_provider.on_logged_out( + user_id=user_id, device_id=device_id, access_token=token + ) # delete pushers associated with the access tokens await self.hs.get_pusherpool().remove_pushers_by_access_token( @@ -1811,40 +1809,228 @@ class MacaroonGenerator: return macaroon -class PasswordProvider: - """Wrapper for a password auth provider module +def load_legacy_password_auth_providers(hs: "HomeServer") -> None: + module_api = hs.get_module_api() + for module, config in hs.config.authproviders.password_providers: + load_single_legacy_password_auth_provider( + module=module, config=config, api=module_api + ) - This class abstracts out all of the backwards-compatibility hacks for - password providers, to provide a consistent interface. - """ - @classmethod - def load( - cls, module: Type, config: JsonDict, module_api: ModuleApi - ) -> "PasswordProvider": - try: - pp = module(config=config, account_handler=module_api) - except Exception as e: - logger.error("Error while initializing %r: %s", module, e) - raise - return cls(pp, module_api) +def load_single_legacy_password_auth_provider( + module: Type, config: JsonDict, api: ModuleApi +) -> None: + try: + provider = module(config=config, account_handler=api) + except Exception as e: + logger.error("Error while initializing %r: %s", module, e) + raise + + # The known hooks. If a module implements a method who's name appears in this set + # we'll want to register it + password_auth_provider_methods = { + "check_3pid_auth", + "on_logged_out", + } + + # All methods that the module provides should be async, but this wasn't enforced + # in the old module system, so we wrap them if needed + def async_wrapper(f: Optional[Callable]) -> Optional[Callable[..., Awaitable]]: + # f might be None if the callback isn't implemented by the module. In this + # case we don't want to register a callback at all so we return None. + if f is None: + return None + + # We need to wrap check_password because its old form would return a boolean + # but we now want it to behave just like check_auth() and return the matrix id of + # the user if authentication succeeded or None otherwise + if f.__name__ == "check_password": + + async def wrapped_check_password( + username: str, login_type: str, login_dict: JsonDict + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + matrix_user_id = api.get_qualified_user_id(username) + password = login_dict["password"] + + is_valid = await f(matrix_user_id, password) + + if is_valid: + return matrix_user_id, None + + return None - def __init__(self, pp: "PasswordProvider", module_api: ModuleApi): - self._pp = pp - self._module_api = module_api + return wrapped_check_password + + # We need to wrap check_auth as in the old form it could return + # just a str, but now it must return Optional[Tuple[str, Optional[Callable]] + if f.__name__ == "check_auth": + + async def wrapped_check_auth( + username: str, login_type: str, login_dict: JsonDict + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + result = await f(username, login_type, login_dict) + + if isinstance(result, str): + return result, None + + return result + + return wrapped_check_auth + + # We need to wrap check_3pid_auth as in the old form it could return + # just a str, but now it must return Optional[Tuple[str, Optional[Callable]] + if f.__name__ == "check_3pid_auth": + + async def wrapped_check_3pid_auth( + medium: str, address: str, password: str + ) -> Optional[Tuple[str, Optional[Callable]]]: + # We've already made sure f is not None above, but mypy doesn't do well + # across function boundaries so we need to tell it f is definitely not + # None. + assert f is not None + + result = await f(medium, address, password) + + if isinstance(result, str): + return result, None + + return result - self._supported_login_types = {} + return wrapped_check_3pid_auth - # grandfather in check_password support - if hasattr(self._pp, "check_password"): - self._supported_login_types[LoginType.PASSWORD] = ("password",) + def run(*args: Tuple, **kwargs: Dict) -> Awaitable: + # mypy doesn't do well across function boundaries so we need to tell it + # f is definitely not None. + assert f is not None - g = getattr(self._pp, "get_supported_login_types", None) - if g: - self._supported_login_types.update(g()) + return maybe_awaitable(f(*args, **kwargs)) - def __str__(self) -> str: - return str(self._pp) + return run + + # populate hooks with the implemented methods, wrapped with async_wrapper + hooks = { + hook: async_wrapper(getattr(provider, hook, None)) + for hook in password_auth_provider_methods + } + + supported_login_types = {} + # call get_supported_login_types and add that to the dict + g = getattr(provider, "get_supported_login_types", None) + if g is not None: + # Note the old module style also called get_supported_login_types at loading time + # and it is synchronous + supported_login_types.update(g()) + + auth_checkers = {} + # Legacy modules have a check_auth method which expects to be called with one of + # the keys returned by get_supported_login_types. New style modules register a + # dictionary of login_type->check_auth_method mappings + check_auth = async_wrapper(getattr(provider, "check_auth", None)) + if check_auth is not None: + for login_type, fields in supported_login_types.items(): + # need tuple(fields) since fields can be any Iterable type (so may not be hashable) + auth_checkers[(login_type, tuple(fields))] = check_auth + + # if it has a "check_password" method then it should handle all auth checks + # with login type of LoginType.PASSWORD + check_password = async_wrapper(getattr(provider, "check_password", None)) + if check_password is not None: + # need to use a tuple here for ("password",) not a list since lists aren't hashable + auth_checkers[(LoginType.PASSWORD, ("password",))] = check_password + + api.register_password_auth_provider_callbacks(hooks, auth_checkers=auth_checkers) + + +CHECK_3PID_AUTH_CALLBACK = Callable[ + [str, str, str], + Awaitable[ + Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]] + ], +] +ON_LOGGED_OUT_CALLBACK = Callable[[str, Optional[str], str], Awaitable] +CHECK_AUTH_CALLBACK = Callable[ + [str, str, JsonDict], + Awaitable[ + Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]] + ], +] + + +class PasswordAuthProvider: + """ + A class that the AuthHandler calls when authenticating users + It allows modules to provide alternative methods for authentication + """ + + def __init__(self) -> None: + # lists of callbacks + self.check_3pid_auth_callbacks: List[CHECK_3PID_AUTH_CALLBACK] = [] + self.on_logged_out_callbacks: List[ON_LOGGED_OUT_CALLBACK] = [] + + # Mapping from login type to login parameters + self._supported_login_types: Dict[str, Iterable[str]] = {} + + # Mapping from login type to auth checker callbacks + self.auth_checker_callbacks: Dict[str, List[CHECK_AUTH_CALLBACK]] = {} + + def register_password_auth_provider_callbacks( + self, + check_3pid_auth: Optional[CHECK_3PID_AUTH_CALLBACK] = None, + on_logged_out: Optional[ON_LOGGED_OUT_CALLBACK] = None, + auth_checkers: Optional[Dict[Tuple[str, Tuple], CHECK_AUTH_CALLBACK]] = None, + ) -> None: + # Register check_3pid_auth callback + if check_3pid_auth is not None: + self.check_3pid_auth_callbacks.append(check_3pid_auth) + + # register on_logged_out callback + if on_logged_out is not None: + self.on_logged_out_callbacks.append(on_logged_out) + + if auth_checkers is not None: + # register a new supported login_type + # Iterate through all of the types being registered + for (login_type, fields), callback in auth_checkers.items(): + # Note: fields may be empty here. This would allow a modules auth checker to + # be called with just 'login_type' and no password or other secrets + + # Need to check that all the field names are strings or may get nasty errors later + for f in fields: + if not isinstance(f, str): + raise RuntimeError( + "A module tried to register support for login type: %s with parameters %s" + " but all parameter names must be strings" + % (login_type, fields) + ) + + # 2 modules supporting the same login type must expect the same fields + # e.g. 1 can't expect "pass" if the other expects "password" + # so throw an exception if that happens + if login_type not in self._supported_login_types.get(login_type, []): + self._supported_login_types[login_type] = fields + else: + fields_currently_supported = self._supported_login_types.get( + login_type + ) + if fields_currently_supported != fields: + raise RuntimeError( + "A module tried to register support for login type: %s with parameters %s" + " but another module had already registered support for that type with parameters %s" + % (login_type, fields, fields_currently_supported) + ) + + # Add the new method to the list of auth_checker_callbacks for this login type + self.auth_checker_callbacks.setdefault(login_type, []).append(callback) def get_supported_login_types(self) -> Mapping[str, Iterable[str]]: """Get the login types supported by this password provider @@ -1852,20 +2038,15 @@ class PasswordProvider: Returns a map from a login type identifier (such as m.login.password) to an iterable giving the fields which must be provided by the user in the submission to the /login API. - - This wrapper adds m.login.password to the list if the underlying password - provider supports the check_password() api. """ + return self._supported_login_types async def check_auth( self, username: str, login_type: str, login_dict: JsonDict - ) -> Optional[Tuple[str, Optional[Callable]]]: + ) -> Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]]: """Check if the user has presented valid login credentials - This wrapper also calls check_password() if the underlying password provider - supports the check_password() api and the login type is m.login.password. - Args: username: user id presented by the client. Either an MXID or an unqualified username. @@ -1879,63 +2060,130 @@ class PasswordProvider: user, and `callback` is an optional callback which will be called with the result from the /login call (including access_token, device_id, etc.) """ - # first grandfather in a call to check_password - if login_type == LoginType.PASSWORD: - check_password = getattr(self._pp, "check_password", None) - if check_password: - qualified_user_id = self._module_api.get_qualified_user_id(username) - is_valid = await check_password( - qualified_user_id, login_dict["password"] - ) - if is_valid: - return qualified_user_id, None - check_auth = getattr(self._pp, "check_auth", None) - if not check_auth: - return None - result = await check_auth(username, login_type, login_dict) + # Go through all callbacks for the login type until one returns with a value + # other than None (i.e. until a callback returns a success) + for callback in self.auth_checker_callbacks[login_type]: + try: + result = await callback(username, login_type, login_dict) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue - # Check if the return value is a str or a tuple - if isinstance(result, str): - # If it's a str, set callback function to None - return result, None + if result is not None: + # Check that the callback returned a Tuple[str, Optional[Callable]] + # "type: ignore[unreachable]" is used after some isinstance checks because mypy thinks + # result is always the right type, but as it is 3rd party code it might not be + + if not isinstance(result, tuple) or len(result) != 2: + logger.warning( + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue - return result + # pull out the two parts of the tuple so we can do type checking + str_result, callback_result = result + + # the 1st item in the tuple should be a str + if not isinstance(str_result, str): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # the second should be Optional[Callable] + if callback_result is not None: + if not callable(callback_result): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # The result is a (str, Optional[callback]) tuple so return the successful result + return result + + # If this point has been reached then none of the callbacks successfully authenticated + # the user so return None + return None async def check_3pid_auth( self, medium: str, address: str, password: str - ) -> Optional[Tuple[str, Optional[Callable]]]: - g = getattr(self._pp, "check_3pid_auth", None) - if not g: - return None - + ) -> Optional[Tuple[str, Optional[Callable[["LoginResponse"], Awaitable[None]]]]]: # This function is able to return a deferred that either # resolves None, meaning authentication failure, or upon # success, to a str (which is the user_id) or a tuple of # (user_id, callback_func), where callback_func should be run # after we've finished everything else - result = await g(medium, address, password) - # Check if the return value is a str or a tuple - if isinstance(result, str): - # If it's a str, set callback function to None - return result, None + for callback in self.check_3pid_auth_callbacks: + try: + result = await callback(medium, address, password) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue - return result + if result is not None: + # Check that the callback returned a Tuple[str, Optional[Callable]] + # "type: ignore[unreachable]" is used after some isinstance checks because mypy thinks + # result is always the right type, but as it is 3rd party code it might not be + + if not isinstance(result, tuple) or len(result) != 2: + logger.warning( + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # pull out the two parts of the tuple so we can do type checking + str_result, callback_result = result + + # the 1st item in the tuple should be a str + if not isinstance(str_result, str): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # the second should be Optional[Callable] + if callback_result is not None: + if not callable(callback_result): + logger.warning( # type: ignore[unreachable] + "Wrong type returned by module API callback %s: %s, expected" + " Optional[Tuple[str, Optional[Callable]]]", + callback, + result, + ) + continue + + # The result is a (str, Optional[callback]) tuple so return the successful result + return result + + # If this point has been reached then none of the callbacks successfully authenticated + # the user so return None + return None async def on_logged_out( self, user_id: str, device_id: Optional[str], access_token: str ) -> None: - g = getattr(self._pp, "on_logged_out", None) - if not g: - return - # This might return an awaitable, if it does block the log out - # until it completes. - await maybe_awaitable( - g( - user_id=user_id, - device_id=device_id, - access_token=access_token, - ) - ) + # call all of the on_logged_out callbacks + for callback in self.on_logged_out_callbacks: + try: + callback(user_id, device_id, access_token) + except Exception as e: + logger.warning("Failed to run module API callback %s: %s", callback, e) + continue diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 75e6019760..6eafbea25d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -14,7 +14,18 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + Iterable, + List, + Mapping, + Optional, + Set, + Tuple, +) from synapse.api import errors from synapse.api.constants import EventTypes @@ -595,7 +606,7 @@ class DeviceHandler(DeviceWorkerHandler): def _update_device_from_client_ips( - device: JsonDict, client_ips: Dict[Tuple[str, str], JsonDict] + device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]] ) -> None: ip = client_ips.get((device["user_id"], device["device_id"]), {}) device.update({"last_seen_ts": ip.get("last_seen"), "last_seen_ip": ip.get("ip")}) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4de9f4b828..2e024b551f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -607,29 +607,6 @@ class EventCreationHandler: builder.internal_metadata.historical = historical - # Strip down the auth_event_ids to only what we need to auth the event. - # For example, we don't need extra m.room.member that don't match event.sender - if auth_event_ids is not None: - # If auth events are provided, prev events must be also. - assert prev_event_ids is not None - - temp_event = await builder.build( - prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, - depth=depth, - ) - auth_events = await self.store.get_events_as_list(auth_event_ids) - # Create a StateMap[str] - auth_event_state_map = { - (e.type, e.state_key): e.event_id for e in auth_events - } - # Actually strip down and use the necessary auth events - auth_event_ids = self._event_auth_handler.compute_auth_events( - event=temp_event, - current_state_ids=auth_event_state_map, - for_verification=False, - ) - event, context = await self.create_new_client_event( builder=builder, requester=requester, @@ -936,6 +913,33 @@ class EventCreationHandler: Tuple of created event, context """ + # Strip down the auth_event_ids to only what we need to auth the event. + # For example, we don't need extra m.room.member that don't match event.sender + full_state_ids_at_event = None + if auth_event_ids is not None: + # If auth events are provided, prev events must be also. + assert prev_event_ids is not None + + # Copy the full auth state before it stripped down + full_state_ids_at_event = auth_event_ids.copy() + + temp_event = await builder.build( + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + depth=depth, + ) + auth_events = await self.store.get_events_as_list(auth_event_ids) + # Create a StateMap[str] + auth_event_state_map = { + (e.type, e.state_key): e.event_id for e in auth_events + } + # Actually strip down and use the necessary auth events + auth_event_ids = self._event_auth_handler.compute_auth_events( + event=temp_event, + current_state_ids=auth_event_state_map, + for_verification=False, + ) + if prev_event_ids is not None: assert ( len(prev_event_ids) <= 10 @@ -965,6 +969,13 @@ class EventCreationHandler: if builder.internal_metadata.outlier: event.internal_metadata.outlier = True context = EventContext.for_outlier() + elif ( + event.type == EventTypes.MSC2716_INSERTION + and full_state_ids_at_event + and builder.internal_metadata.is_historical() + ): + old_state = await self.store.get_events_as_list(full_state_ids_at_event) + context = await self.state.compute_event_context(event, old_state=old_state) else: context = await self.state.compute_event_context(event) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 7072bca1fc..6f39e9446f 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -465,17 +465,35 @@ class RoomCreationHandler: # the room has been created # Calculate the minimum power level needed to clone the room event_power_levels = power_levels.get("events", {}) + if not isinstance(event_power_levels, dict): + event_power_levels = {} state_default = power_levels.get("state_default", 50) + try: + state_default_int = int(state_default) # type: ignore[arg-type] + except (TypeError, ValueError): + state_default_int = 50 ban = power_levels.get("ban", 50) - needed_power_level = max(state_default, ban, max(event_power_levels.values())) + try: + ban = int(ban) # type: ignore[arg-type] + except (TypeError, ValueError): + ban = 50 + needed_power_level = max( + state_default_int, ban, max(event_power_levels.values()) + ) # Get the user's current power level, this matches the logic in get_user_power_level, # but without the entire state map. user_power_levels = power_levels.setdefault("users", {}) + if not isinstance(user_power_levels, dict): + user_power_levels = {} users_default = power_levels.get("users_default", 0) current_power_level = user_power_levels.get(user_id, users_default) + try: + current_power_level_int = int(current_power_level) # type: ignore[arg-type] + except (TypeError, ValueError): + current_power_level_int = 0 # Raise the requester's power level in the new room if necessary - if current_power_level < needed_power_level: + if current_power_level_int < needed_power_level: user_power_levels[user_id] = needed_power_level await self._send_events_for_new_room( diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 51dd4e7555..2f5a3e4d19 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -13,6 +13,10 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +def generate_fake_event_id() -> str: + return "$fake_" + random_string(43) + + class RoomBatchHandler: def __init__(self, hs: "HomeServer"): self.hs = hs @@ -177,6 +181,11 @@ class RoomBatchHandler: state_event_ids_at_start = [] auth_event_ids = initial_auth_event_ids.copy() + + # Make the state events float off on their own so we don't have a + # bunch of `@mxid joined the room` noise between each batch + prev_event_id_for_state_chain = generate_fake_event_id() + for state_event in state_events_at_start: assert_params_in_dict( state_event, ["type", "origin_server_ts", "content", "sender"] @@ -200,10 +209,6 @@ class RoomBatchHandler: # Mark all events as historical event_dict["content"][EventContentFields.MSC2716_HISTORICAL] = True - # Make the state events float off on their own so we don't have a - # bunch of `@mxid joined the room` noise between each batch - fake_prev_event_id = "$" + random_string(43) - # TODO: This is pretty much the same as some other code to handle inserting state in this file if event_dict["type"] == EventTypes.Member: membership = event_dict["content"].get("membership", None) @@ -216,7 +221,7 @@ class RoomBatchHandler: action=membership, content=event_dict["content"], outlier=True, - prev_event_ids=[fake_prev_event_id], + prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. @@ -235,7 +240,7 @@ class RoomBatchHandler: ), event_dict, outlier=True, - prev_event_ids=[fake_prev_event_id], + prev_event_ids=[prev_event_id_for_state_chain], # Make sure to use a copy of this list because we modify it # later in the loop here. Otherwise it will be the same # reference and also update in the event when we append later. @@ -245,6 +250,8 @@ class RoomBatchHandler: state_event_ids_at_start.append(event_id) auth_event_ids.append(event_id) + # Connect all the state in a floating chain + prev_event_id_for_state_chain = event_id return state_event_ids_at_start @@ -289,6 +296,10 @@ class RoomBatchHandler: for ev in events_to_create: assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) + assert self.hs.is_mine_id(ev["sender"]), "User must be our own: %s" % ( + ev["sender"], + ) + event_dict = { "type": ev["type"], "origin_server_ts": ev["origin_server_ts"], @@ -311,6 +322,19 @@ class RoomBatchHandler: historical=True, depth=inherited_depth, ) + + assert context._state_group + + # Normally this is done when persisting the event but we have to + # pre-emptively do it here because we create all the events first, + # then persist them in another pass below. And we want to share + # state_groups across the whole batch so this lookup needs to work + # for the next event in the batch in this loop. + await self.store.store_state_group_id_for_event_id( + event_id=event.event_id, + state_group_id=context._state_group, + ) + logger.debug( "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", event, @@ -318,10 +342,6 @@ class RoomBatchHandler: auth_event_ids, ) - assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( - event.sender, - ) - events_to_persist.append((event, context)) event_id = event.event_id diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 8810f048ba..52b2de388f 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -196,63 +196,12 @@ class UserDirectoryHandler(StateDeltasHandler): room_id, prev_event_id, event_id, typ ) elif typ == EventTypes.Member: - change = await self._get_key_change( + await self._handle_room_membership_event( + room_id, prev_event_id, event_id, - key_name="membership", - public_value=Membership.JOIN, + state_key, ) - - is_remote = not self.is_mine_id(state_key) - if change is MatchChange.now_false: - # Need to check if the server left the room entirely, if so - # we might need to remove all the users in that room - is_in_room = await self.store.is_host_joined( - room_id, self.server_name - ) - if not is_in_room: - logger.debug("Server left room: %r", room_id) - # Fetch all the users that we marked as being in user - # directory due to being in the room and then check if - # need to remove those users or not - user_ids = await self.store.get_users_in_dir_due_to_room( - room_id - ) - - for user_id in user_ids: - await self._handle_remove_user(room_id, user_id) - continue - else: - logger.debug("Server is still in room: %r", room_id) - - include_in_dir = ( - is_remote - or await self.store.should_include_local_user_in_dir(state_key) - ) - if include_in_dir: - if change is MatchChange.no_change: - # Handle any profile changes for remote users. - # (For local users we are not forced to scan membership - # events; instead the rest of the application calls - # `handle_local_profile_change`.) - if is_remote: - await self._handle_profile_change( - state_key, room_id, prev_event_id, event_id - ) - continue - - if change is MatchChange.now_true: # The user joined - # This may be the first time we've seen a remote user. If - # so, ensure we have a directory entry for them. (We don't - # need to do this for local users: their directory entry - # is created at the point of registration. - if is_remote: - await self._upsert_directory_entry_for_remote_user( - state_key, event_id - ) - await self._track_user_joined_room(room_id, state_key) - else: # The user left - await self._handle_remove_user(room_id, state_key) else: logger.debug("Ignoring irrelevant type: %r", typ) @@ -326,6 +275,72 @@ class UserDirectoryHandler(StateDeltasHandler): for user_id in users_in_room: await self._track_user_joined_room(room_id, user_id) + async def _handle_room_membership_event( + self, + room_id: str, + prev_event_id: str, + event_id: str, + state_key: str, + ) -> None: + """Process a single room membershp event. + + We have to do two things: + + 1. Update the room-sharing tables. + This applies to remote users and non-excluded local users. + 2. Update the user_directory and user_directory_search tables. + This applies to remote users only, because we only become aware of + the (and any profile changes) by listening to these events. + The rest of the application knows exactly when local users are + created or their profile changed---it will directly call methods + on this class. + """ + joined = await self._get_key_change( + prev_event_id, + event_id, + key_name="membership", + public_value=Membership.JOIN, + ) + + # Both cases ignore excluded local users, so start by discarding them. + is_remote = not self.is_mine_id(state_key) + if not is_remote and not await self.store.should_include_local_user_in_dir( + state_key + ): + return + + if joined is MatchChange.now_false: + # Need to check if the server left the room entirely, if so + # we might need to remove all the users in that room + is_in_room = await self.store.is_host_joined(room_id, self.server_name) + if not is_in_room: + logger.debug("Server left room: %r", room_id) + # Fetch all the users that we marked as being in user + # directory due to being in the room and then check if + # need to remove those users or not + user_ids = await self.store.get_users_in_dir_due_to_room(room_id) + + for user_id in user_ids: + await self._handle_remove_user(room_id, user_id) + else: + logger.debug("Server is still in room: %r", room_id) + await self._handle_remove_user(room_id, state_key) + elif joined is MatchChange.no_change: + # Handle any profile changes for remote users. + # (For local users the rest of the application calls + # `handle_local_profile_change`.) + if is_remote: + await self._handle_possible_remote_profile_change( + state_key, room_id, prev_event_id, event_id + ) + elif joined is MatchChange.now_true: # The user joined + # This may be the first time we've seen a remote user. If + # so, ensure we have a directory entry for them. (For local users, + # the rest of the application calls `handle_local_profile_change`.) + if is_remote: + await self._upsert_directory_entry_for_remote_user(state_key, event_id) + await self._track_user_joined_room(room_id, state_key) + async def _upsert_directory_entry_for_remote_user( self, user_id: str, event_id: str ) -> None: @@ -386,7 +401,12 @@ class UserDirectoryHandler(StateDeltasHandler): await self.store.add_users_who_share_private_room(room_id, to_insert) async def _handle_remove_user(self, room_id: str, user_id: str) -> None: - """Called when we might need to remove user from directory + """Called when when someone leaves a room. The user may be local or remote. + + (If the person who left was the last local user in this room, the server + is no longer in the room. We call this function to forget that the remaining + remote users are in the room, even though they haven't left. So the name is + a little misleading!) Args: room_id: The room ID that user left or stopped being public that @@ -403,7 +423,7 @@ class UserDirectoryHandler(StateDeltasHandler): if len(rooms_user_is_in) == 0: await self.store.remove_from_user_dir(user_id) - async def _handle_profile_change( + async def _handle_possible_remote_profile_change( self, user_id: str, room_id: str, @@ -411,7 +431,8 @@ class UserDirectoryHandler(StateDeltasHandler): event_id: Optional[str], ) -> None: """Check member event changes for any profile changes and update the - database if there are. + database if there are. This is intended for remote users only. The caller + is responsible for checking that the given user is remote. """ if not prev_event_id or not event_id: return diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 8ae21bc43c..ab7ef8f950 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -45,6 +45,7 @@ from synapse.http.servlet import parse_json_object_from_request from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process +from synapse.rest.client.login import LoginResponse from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.roommember import ProfileInfo from synapse.storage.state import StateFilter @@ -83,6 +84,8 @@ __all__ = [ "DirectServeJsonResource", "ModuleApi", "PRESENCE_ALL_USERS", + "LoginResponse", + "JsonDict", ] logger = logging.getLogger(__name__) @@ -139,6 +142,7 @@ class ModuleApi: self._spam_checker = hs.get_spam_checker() self._account_validity_handler = hs.get_account_validity_handler() self._third_party_event_rules = hs.get_third_party_event_rules() + self._password_auth_provider = hs.get_password_auth_provider() self._presence_router = hs.get_presence_router() ################################################################################# @@ -164,6 +168,11 @@ class ModuleApi: """Registers callbacks for presence router capabilities.""" return self._presence_router.register_presence_router_callbacks + @property + def register_password_auth_provider_callbacks(self): + """Registers callbacks for password auth provider capabilities.""" + return self._password_auth_provider.register_password_auth_provider_callbacks + def register_web_resource(self, path: str, resource: IResource): """Registers a web resource to be served at the given path. @@ -773,9 +782,9 @@ class ModuleApi: # Sanitize some of the data. We don't want to return tokens. return [ UserIpAndAgent( - ip=str(data["ip"]), - user_agent=str(data["user_agent"]), - last_seen=int(data["last_seen"]), + ip=data["ip"], + user_agent=data["user_agent"], + last_seen=data["last_seen"], ) for data in raw_data ] diff --git a/synapse/module_api/errors.py b/synapse/module_api/errors.py index 98ea911a81..1db900e41f 100644 --- a/synapse/module_api/errors.py +++ b/synapse/module_api/errors.py @@ -14,9 +14,16 @@ """Exception types which are exposed as part of the stable module API""" -from synapse.api.errors import ( # noqa: F401 +from synapse.api.errors import ( InvalidClientCredentialsError, RedirectException, SynapseError, ) -from synapse.config._base import ConfigError # noqa: F401 +from synapse.config._base import ConfigError + +__all__ = [ + "InvalidClientCredentialsError", + "RedirectException", + "SynapseError", + "ConfigError", +] diff --git a/synapse/py.typed b/synapse/py.typed new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/synapse/py.typed diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index 0b0711c03c..d695c18be2 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -232,12 +232,12 @@ class RelationPaginationServlet(RestServlet): # Similarly, we don't allow relations to be applied to relations, so we # return the original relations without any aggregations on top of them # here. - events = await self._event_serializer.serialize_events( + serialized_events = await self._event_serializer.serialize_events( events, now, bundle_aggregations=False ) return_value = pagination_chunk.to_dict() - return_value["chunk"] = events + return_value["chunk"] = serialized_events return_value["original_event"] = original_event return 200, return_value @@ -416,10 +416,10 @@ class RelationAggregationGroupPaginationServlet(RestServlet): ) now = self.clock.time_msec() - events = await self._event_serializer.serialize_events(events, now) + serialized_events = await self._event_serializer.serialize_events(events, now) return_value = result.to_dict() - return_value["chunk"] = events + return_value["chunk"] = serialized_events return 200, return_value diff --git a/synapse/rest/client/room_batch.py b/synapse/rest/client/room_batch.py index 38ad4c2447..99f8156ad0 100644 --- a/synapse/rest/client/room_batch.py +++ b/synapse/rest/client/room_batch.py @@ -32,7 +32,6 @@ from synapse.http.servlet import ( from synapse.http.site import SynapseRequest from synapse.rest.client.transactions import HttpTransactionCache from synapse.types import JsonDict -from synapse.util.stringutils import random_string if TYPE_CHECKING: from synapse.server import HomeServer @@ -160,11 +159,6 @@ class RoomBatchSendEventRestServlet(RestServlet): base_insertion_event = None if batch_id_from_query: batch_id_to_connect_to = batch_id_from_query - # All but the first base insertion event should point at a fake - # event, which causes the HS to ask for the state at the start of - # the batch later. - fake_prev_event_id = "$" + random_string(43) - prev_event_ids = [fake_prev_event_id] # Otherwise, create an insertion event to act as a starting point. # # We don't always have an insertion event to start hanging more history @@ -173,8 +167,6 @@ class RoomBatchSendEventRestServlet(RestServlet): # an insertion event), in which case we just create a new insertion event # that can then get pointed to by a "marker" event later. else: - prev_event_ids = prev_event_ids_from_query - base_insertion_event_dict = ( self.room_batch_handler.create_insertion_event_dict( sender=requester.user.to_string(), @@ -182,7 +174,7 @@ class RoomBatchSendEventRestServlet(RestServlet): origin_server_ts=last_event_in_batch["origin_server_ts"], ) ) - base_insertion_event_dict["prev_events"] = prev_event_ids.copy() + base_insertion_event_dict["prev_events"] = prev_event_ids_from_query.copy() ( base_insertion_event, @@ -203,6 +195,11 @@ class RoomBatchSendEventRestServlet(RestServlet): EventContentFields.MSC2716_NEXT_BATCH_ID ] + # Also connect the historical event chain to the end of the floating + # state chain, which causes the HS to ask for the state at the start of + # the batch later. + prev_event_ids = [state_event_ids_at_start[-1]] + # Create and persist all of the historical events as well as insertion # and batch meta events to make the batch navigable in the DAG. event_ids, next_batch_id = await self.room_batch_handler.handle_batch_of_events( diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 08bd85f664..bec77088ee 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -16,12 +16,15 @@ import functools import os import re -from typing import Any, Callable, List +from typing import Any, Callable, List, TypeVar, cast NEW_FORMAT_ID_RE = re.compile(r"^\d\d\d\d-\d\d-\d\d") -def _wrap_in_base_path(func: Callable[..., str]) -> Callable[..., str]: +F = TypeVar("F", bound=Callable[..., str]) + + +def _wrap_in_base_path(func: F) -> F: """Takes a function that returns a relative path and turns it into an absolute path based on the location of the primary media store """ @@ -31,7 +34,7 @@ def _wrap_in_base_path(func: Callable[..., str]) -> Callable[..., str]: path = func(self, *args, **kwargs) return os.path.join(self.base_path, path) - return _wrapped + return cast(F, _wrapped) class MediaFilePaths: @@ -45,23 +48,6 @@ class MediaFilePaths: def __init__(self, primary_base_path: str): self.base_path = primary_base_path - def default_thumbnail_rel( - self, - default_top_level: str, - default_sub_type: str, - width: int, - height: int, - content_type: str, - method: str, - ) -> str: - top_level_type, sub_type = content_type.split("/") - file_name = "%i-%i-%s-%s-%s" % (width, height, top_level_type, sub_type, method) - return os.path.join( - "default_thumbnails", default_top_level, default_sub_type, file_name - ) - - default_thumbnail = _wrap_in_base_path(default_thumbnail_rel) - def local_media_filepath_rel(self, media_id: str) -> str: return os.path.join("local_content", media_id[0:2], media_id[2:4], media_id[4:]) diff --git a/synapse/rest/media/v1/oembed.py b/synapse/rest/media/v1/oembed.py index 78b1603f19..2a59552c20 100644 --- a/synapse/rest/media/v1/oembed.py +++ b/synapse/rest/media/v1/oembed.py @@ -17,7 +17,6 @@ from typing import TYPE_CHECKING, List, Optional import attr -from synapse.http.client import SimpleHttpClient from synapse.types import JsonDict from synapse.util import json_decoder @@ -48,7 +47,7 @@ class OEmbedProvider: requesting/parsing oEmbed content. """ - def __init__(self, hs: "HomeServer", client: SimpleHttpClient): + def __init__(self, hs: "HomeServer"): self._oembed_patterns = {} for oembed_endpoint in hs.config.oembed.oembed_patterns: api_endpoint = oembed_endpoint.api_endpoint @@ -69,7 +68,6 @@ class OEmbedProvider: # Iterate through each URL pattern and point it to the endpoint. for pattern in oembed_endpoint.url_patterns: self._oembed_patterns[pattern] = api_endpoint - self._client = client def get_oembed_url(self, url: str) -> Optional[str]: """ @@ -139,10 +137,11 @@ class OEmbedProvider: # oEmbed responses *must* be UTF-8 according to the spec. oembed = json_decoder.decode(raw_body.decode("utf-8")) - # Ensure there's a version of 1.0. - oembed_version = oembed["version"] - if oembed_version != "1.0": - raise RuntimeError(f"Invalid version: {oembed_version}") + # The version is a required string field, but not always provided, + # or sometimes provided as a float. Be lenient. + oembed_version = oembed.get("version", "1.0") + if oembed_version != "1.0" and oembed_version != 1: + raise RuntimeError(f"Invalid oEmbed version: {oembed_version}") # Ensure the cache age is None or an int. cache_age = oembed.get("cache_age") diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 1fe0fc8aa9..5bddd21ef1 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -140,7 +140,7 @@ class PreviewUrlResource(DirectServeJsonResource): self.primary_base_path = media_repo.primary_base_path self.media_storage = media_storage - self._oembed = OEmbedProvider(hs, self.client) + self._oembed = OEmbedProvider(hs) # We run the background jobs if we're the instance specified (or no # instance is specified, where we assume there is only one instance diff --git a/synapse/server.py b/synapse/server.py index 5bc045d615..a64c846d1c 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -65,7 +65,7 @@ from synapse.handlers.account_data import AccountDataHandler from synapse.handlers.account_validity import AccountValidityHandler from synapse.handlers.admin import AdminHandler from synapse.handlers.appservice import ApplicationServicesHandler -from synapse.handlers.auth import AuthHandler, MacaroonGenerator +from synapse.handlers.auth import AuthHandler, MacaroonGenerator, PasswordAuthProvider from synapse.handlers.cas import CasHandler from synapse.handlers.deactivate_account import DeactivateAccountHandler from synapse.handlers.device import DeviceHandler, DeviceWorkerHandler @@ -688,6 +688,10 @@ class HomeServer(metaclass=abc.ABCMeta): return ThirdPartyEventRules(self) @cache_in_self + def get_password_auth_provider(self) -> PasswordAuthProvider: + return PasswordAuthProvider() + + @cache_in_self def get_room_member_handler(self) -> RoomMemberHandler: if self.config.worker.worker_app: return RoomMemberWorkerHandler(self) diff --git a/synapse/storage/databases/main/client_ips.py b/synapse/storage/databases/main/client_ips.py index 6c1ef09049..b81d9218ce 100644 --- a/synapse/storage/databases/main/client_ips.py +++ b/synapse/storage/databases/main/client_ips.py @@ -13,14 +13,26 @@ # limitations under the License. import logging -from typing import Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Union, cast + +from typing_extensions import TypedDict from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool, make_tuple_comparison_clause -from synapse.types import UserID +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, + make_tuple_comparison_clause, +) +from synapse.storage.databases.main.monthly_active_users import MonthlyActiveUsersStore +from synapse.storage.types import Connection +from synapse.types import JsonDict, UserID from synapse.util.caches.lrucache import LruCache +if TYPE_CHECKING: + from synapse.server import HomeServer + logger = logging.getLogger(__name__) # Number of msec of granularity to store the user IP 'last seen' time. Smaller @@ -29,8 +41,31 @@ logger = logging.getLogger(__name__) LAST_SEEN_GRANULARITY = 120 * 1000 +class DeviceLastConnectionInfo(TypedDict): + """Metadata for the last connection seen for a user and device combination""" + + # These types must match the columns in the `devices` table + user_id: str + device_id: str + + ip: Optional[str] + user_agent: Optional[str] + last_seen: Optional[int] + + +class LastConnectionInfo(TypedDict): + """Metadata for the last connection seen for an access token and IP combination""" + + # These types must match the columns in the `user_ips` table + access_token: str + ip: str + + user_agent: str + last_seen: int + + class ClientIpBackgroundUpdateStore(SQLBaseStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): super().__init__(database, db_conn, hs) self.db_pool.updates.register_background_index_update( @@ -81,8 +116,10 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): "devices_last_seen", self._devices_last_seen_update ) - async def _remove_user_ip_nonunique(self, progress, batch_size): - def f(conn): + async def _remove_user_ip_nonunique( + self, progress: JsonDict, batch_size: int + ) -> int: + def f(conn: LoggingDatabaseConnection) -> None: txn = conn.cursor() txn.execute("DROP INDEX IF EXISTS user_ips_user_ip") txn.close() @@ -93,14 +130,14 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): ) return 1 - async def _analyze_user_ip(self, progress, batch_size): + async def _analyze_user_ip(self, progress: JsonDict, batch_size: int) -> int: # Background update to analyze user_ips table before we run the # deduplication background update. The table may not have been analyzed # for ages due to the table locks. # # This will lock out the naive upserts to user_ips while it happens, but # the analyze should be quick (28GB table takes ~10s) - def user_ips_analyze(txn): + def user_ips_analyze(txn: LoggingTransaction) -> None: txn.execute("ANALYZE user_ips") await self.db_pool.runInteraction("user_ips_analyze", user_ips_analyze) @@ -109,16 +146,16 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): return 1 - async def _remove_user_ip_dupes(self, progress, batch_size): + async def _remove_user_ip_dupes(self, progress: JsonDict, batch_size: int) -> int: # This works function works by scanning the user_ips table in batches # based on `last_seen`. For each row in a batch it searches the rest of # the table to see if there are any duplicates, if there are then they # are removed and replaced with a suitable row. # Fetch the start of the batch - begin_last_seen = progress.get("last_seen", 0) + begin_last_seen: int = progress.get("last_seen", 0) - def get_last_seen(txn): + def get_last_seen(txn: LoggingTransaction) -> Optional[int]: txn.execute( """ SELECT last_seen FROM user_ips @@ -129,7 +166,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): """, (begin_last_seen, batch_size), ) - row = txn.fetchone() + row = cast(Optional[Tuple[int]], txn.fetchone()) if row: return row[0] else: @@ -149,7 +186,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): end_last_seen, ) - def remove(txn): + def remove(txn: LoggingTransaction) -> None: # This works by looking at all entries in the given time span, and # then for each (user_id, access_token, ip) tuple in that range # checking for any duplicates in the rest of the table (via a join). @@ -161,10 +198,12 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): # Define the search space, which requires handling the last batch in # a different way + args: Tuple[int, ...] if last: clause = "? <= last_seen" args = (begin_last_seen,) else: + assert end_last_seen is not None clause = "? <= last_seen AND last_seen < ?" args = (begin_last_seen, end_last_seen) @@ -189,7 +228,9 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): ), args, ) - res = txn.fetchall() + res = cast( + List[Tuple[str, str, str, Optional[str], str, int, int]], txn.fetchall() + ) # We've got some duplicates for i in res: @@ -278,13 +319,15 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): return batch_size - async def _devices_last_seen_update(self, progress, batch_size): + async def _devices_last_seen_update( + self, progress: JsonDict, batch_size: int + ) -> int: """Background update to insert last seen info into devices table""" - last_user_id = progress.get("last_user_id", "") - last_device_id = progress.get("last_device_id", "") + last_user_id: str = progress.get("last_user_id", "") + last_device_id: str = progress.get("last_device_id", "") - def _devices_last_seen_update_txn(txn): + def _devices_last_seen_update_txn(txn: LoggingTransaction) -> int: # This consists of two queries: # # 1. The sub-query searches for the next N devices and joins @@ -296,6 +339,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): # we'll just end up updating the same device row multiple # times, which is fine. + where_args: List[Union[str, int]] where_clause, where_args = make_tuple_comparison_clause( [("user_id", last_user_id), ("device_id", last_device_id)], ) @@ -319,7 +363,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): } txn.execute(sql, where_args + [batch_size]) - rows = txn.fetchall() + rows = cast(List[Tuple[int, str, str, str, str]], txn.fetchall()) if not rows: return 0 @@ -350,7 +394,7 @@ class ClientIpBackgroundUpdateStore(SQLBaseStore): class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): - def __init__(self, database: DatabasePool, db_conn, hs): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): super().__init__(database, db_conn, hs) self.user_ips_max_age = hs.config.server.user_ips_max_age @@ -359,7 +403,7 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): self._clock.looping_call(self._prune_old_user_ips, 5 * 1000) @wrap_as_background_process("prune_old_user_ips") - async def _prune_old_user_ips(self): + async def _prune_old_user_ips(self) -> None: """Removes entries in user IPs older than the configured period.""" if self.user_ips_max_age is None: @@ -394,9 +438,9 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): ) """ - timestamp = self.clock.time_msec() - self.user_ips_max_age + timestamp = self._clock.time_msec() - self.user_ips_max_age - def _prune_old_user_ips_txn(txn): + def _prune_old_user_ips_txn(txn: LoggingTransaction) -> None: txn.execute(sql, (timestamp,)) await self.db_pool.runInteraction( @@ -405,7 +449,7 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): async def get_last_client_ip_by_device( self, user_id: str, device_id: Optional[str] - ) -> Dict[Tuple[str, str], dict]: + ) -> Dict[Tuple[str, str], DeviceLastConnectionInfo]: """For each device_id listed, give the user_ip it was last seen on. The result might be slightly out of date as client IPs are inserted in batches. @@ -423,26 +467,32 @@ class ClientIpWorkerStore(ClientIpBackgroundUpdateStore): if device_id is not None: keyvalues["device_id"] = device_id - res = await self.db_pool.simple_select_list( - table="devices", - keyvalues=keyvalues, - retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + res = cast( + List[DeviceLastConnectionInfo], + await self.db_pool.simple_select_list( + table="devices", + keyvalues=keyvalues, + retcols=("user_id", "ip", "user_agent", "device_id", "last_seen"), + ), ) return {(d["user_id"], d["device_id"]): d for d in res} -class ClientIpStore(ClientIpWorkerStore): - def __init__(self, database: DatabasePool, db_conn, hs): +class ClientIpStore(ClientIpWorkerStore, MonthlyActiveUsersStore): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): - self.client_ip_last_seen = LruCache( + # (user_id, access_token, ip,) -> last_seen + self.client_ip_last_seen = LruCache[Tuple[str, str, str], int]( cache_name="client_ip_last_seen", max_size=50000 ) super().__init__(database, db_conn, hs) # (user_id, access_token, ip,) -> (user_agent, device_id, last_seen) - self._batch_row_update = {} + self._batch_row_update: Dict[ + Tuple[str, str, str], Tuple[str, Optional[str], int] + ] = {} self._client_ip_looper = self._clock.looping_call( self._update_client_ips_batch, 5 * 1000 @@ -452,8 +502,14 @@ class ClientIpStore(ClientIpWorkerStore): ) async def insert_client_ip( - self, user_id, access_token, ip, user_agent, device_id, now=None - ): + self, + user_id: str, + access_token: str, + ip: str, + user_agent: str, + device_id: Optional[str], + now: Optional[int] = None, + ) -> None: if not now: now = int(self._clock.time_msec()) key = (user_id, access_token, ip) @@ -485,7 +541,11 @@ class ClientIpStore(ClientIpWorkerStore): "_update_client_ips_batch", self._update_client_ips_batch_txn, to_update ) - def _update_client_ips_batch_txn(self, txn, to_update): + def _update_client_ips_batch_txn( + self, + txn: LoggingTransaction, + to_update: Mapping[Tuple[str, str, str], Tuple[str, Optional[str], int]], + ) -> None: if "user_ips" in self.db_pool._unsafe_to_upsert_tables or ( not self.database_engine.can_native_upsert ): @@ -525,7 +585,7 @@ class ClientIpStore(ClientIpWorkerStore): async def get_last_client_ip_by_device( self, user_id: str, device_id: Optional[str] - ) -> Dict[Tuple[str, str], dict]: + ) -> Dict[Tuple[str, str], DeviceLastConnectionInfo]: """For each device_id listed, give the user_ip it was last seen on Args: @@ -561,12 +621,12 @@ class ClientIpStore(ClientIpWorkerStore): async def get_user_ip_and_agents( self, user: UserID, since_ts: int = 0 - ) -> List[Dict[str, Union[str, int]]]: + ) -> List[LastConnectionInfo]: """ Fetch IP/User Agent connection since a given timestamp. """ user_id = user.to_string() - results = {} + results: Dict[Tuple[str, str], Tuple[str, int]] = {} for key in self._batch_row_update: ( @@ -579,7 +639,7 @@ class ClientIpStore(ClientIpWorkerStore): if last_seen >= since_ts: results[(access_token, ip)] = (user_agent, last_seen) - def get_recent(txn): + def get_recent(txn: LoggingTransaction) -> List[Tuple[str, str, str, int]]: txn.execute( """ SELECT access_token, ip, user_agent, last_seen FROM user_ips @@ -589,7 +649,7 @@ class ClientIpStore(ClientIpWorkerStore): """, (since_ts, user_id), ) - return txn.fetchall() + return cast(List[Tuple[str, str, str, int]], txn.fetchall()) rows = await self.db_pool.runInteraction( desc="get_user_ip_and_agents", func=get_recent diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 19f55c19c5..37439f8562 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2069,12 +2069,14 @@ class PersistEventsStore: state_groups[event.event_id] = context.state_group - self.db_pool.simple_insert_many_txn( + self.db_pool.simple_upsert_many_txn( txn, table="event_to_state_groups", - values=[ - {"state_group": state_group_id, "event_id": event_id} - for event_id, state_group_id in state_groups.items() + key_names=["event_id"], + key_values=[[event_id] for event_id, _ in state_groups.items()], + value_names=["state_group"], + value_values=[ + [state_group_id] for _, state_group_id in state_groups.items() ], ) diff --git a/synapse/storage/databases/main/room_batch.py b/synapse/storage/databases/main/room_batch.py index 300a563c9e..dcbce8fdcf 100644 --- a/synapse/storage/databases/main/room_batch.py +++ b/synapse/storage/databases/main/room_batch.py @@ -36,3 +36,16 @@ class RoomBatchStore(SQLBaseStore): retcol="event_id", allow_none=True, ) + + async def store_state_group_id_for_event_id( + self, event_id: str, state_group_id: int + ) -> Optional[str]: + { + await self.db_pool.simple_upsert( + table="event_to_state_groups", + keyvalues={"event_id": event_id}, + values={"state_group": state_group_id, "event_id": event_id}, + # Unique constraint on event_id so we don't have to lock + lock=False, + ) + } diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 11ca47ea28..1629d2a53c 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -549,6 +549,8 @@ def _apply_module_schemas( database_engine: config: application config """ + # This is the old way for password_auth_provider modules to make changes + # to the database. This should instead be done using the module API for (mod, _config) in config.authproviders.password_providers: if not hasattr(mod, "get_db_schema_files"): continue diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 1aee741a8b..a1d2332326 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 64 # remember to update the list below when updating +SCHEMA_VERSION = 65 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -41,6 +41,10 @@ Changes in SCHEMA_VERSION = 63: Changes in SCHEMA_VERSION = 64: - MSC2716: Rename related tables and columns from "chunks" to "batches". + +Changes in SCHEMA_VERSION = 65: + - MSC2716: Remove unique event_id constraint from insertion_event_edges + because an insertion event can have multiple edges. """ diff --git a/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql new file mode 100644 index 0000000000..98b25daf45 --- /dev/null +++ b/synapse/storage/schema/main/delta/65/01msc2716_insertion_event_edges.sql @@ -0,0 +1,19 @@ +/* Copyright 2021 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Recreate the insertion_event_edges event_id index without the unique constraint +-- because an insertion event can have multiple edges. +DROP INDEX insertion_event_edges_event_id; +CREATE INDEX IF NOT EXISTS insertion_event_edges_event_id ON insertion_event_edges(event_id); |