From 36df9c5e36cbad2a378d922085453726a21ae80c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Fri, 5 May 2023 12:13:50 -0400 Subject: Implement MSC4009 to widen the allowed Matrix ID grammar (#15536) Behind a configuration flag this adds + to the list of allowed characters in Matrix IDs. The main feature this enables is using full E.164 phone numbers as Matrix IDs. --- synapse/config/experimental.py | 3 +++ synapse/handlers/register.py | 27 ++++++++++++++------------- synapse/handlers/sso.py | 6 ++++-- synapse/types/__init__.py | 21 +++++++++++++++++++-- 4 files changed, 40 insertions(+), 17 deletions(-) (limited to 'synapse') diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index cab7ccf4b7..514d87cb2c 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -199,3 +199,6 @@ class ExperimentalConfig(Config): # MSC3970: Scope transaction IDs to devices self.msc3970_enabled = experimental.get("msc3970_enabled", False) + + # MSC4009: E.164 Matrix IDs + self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 61c4b833bd..c80946c2e9 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -46,7 +46,7 @@ from synapse.replication.http.register import ( ReplicationRegisterServlet, ) from synapse.spam_checker_api import RegistrationBehaviour -from synapse.types import RoomAlias, UserID, create_requester +from synapse.types import GUEST_USER_ID_PATTERN, RoomAlias, UserID, create_requester from synapse.types.state import StateFilter if TYPE_CHECKING: @@ -143,10 +143,15 @@ class RegistrationHandler: assigned_user_id: Optional[str] = None, inhibit_user_in_use_error: bool = False, ) -> None: - if types.contains_invalid_mxid_characters(localpart): + if types.contains_invalid_mxid_characters( + localpart, self.hs.config.experimental.msc4009_e164_mxids + ): + extra_chars = ( + "=_-./+" if self.hs.config.experimental.msc4009_e164_mxids else "=_-./" + ) raise SynapseError( 400, - "User ID can only contain characters a-z, 0-9, or '=_-./'", + f"User ID can only contain characters a-z, 0-9, or '{extra_chars}'", Codes.INVALID_USERNAME, ) @@ -195,16 +200,12 @@ class RegistrationHandler: errcode=Codes.FORBIDDEN, ) - if guest_access_token is None: - try: - int(localpart) - raise SynapseError( - 400, - "Numeric user IDs are reserved for guest users.", - errcode=Codes.INVALID_USERNAME, - ) - except ValueError: - pass + if guest_access_token is None and GUEST_USER_ID_PATTERN.fullmatch(localpart): + raise SynapseError( + 400, + "Numeric user IDs are reserved for guest users.", + errcode=Codes.INVALID_USERNAME, + ) async def register_user( self, diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 92c3742625..25fd2eb3a1 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -225,6 +225,8 @@ class SsoHandler: self._consent_at_registration = hs.config.consent.user_consent_at_registration + self._e164_mxids = hs.config.experimental.msc4009_e164_mxids + def register_identity_provider(self, p: SsoIdentityProvider) -> None: p_id = p.idp_id assert p_id not in self._identity_providers @@ -711,7 +713,7 @@ class SsoHandler: # Since the localpart is provided via a potentially untrusted module, # ensure the MXID is valid before registering. if not attributes.localpart or contains_invalid_mxid_characters( - attributes.localpart + attributes.localpart, self._e164_mxids ): raise MappingException("localpart is invalid: %s" % (attributes.localpart,)) @@ -944,7 +946,7 @@ class SsoHandler: localpart, ) - if contains_invalid_mxid_characters(localpart): + if contains_invalid_mxid_characters(localpart, self._e164_mxids): raise SynapseError(400, "localpart is invalid: %s" % (localpart,)) user_id = UserID(localpart, self._server_name).to_string() user_infos = await self._store.get_users_by_id_case_insensitive(user_id) diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py index 5cee9c3194..325219656a 100644 --- a/synapse/types/__init__.py +++ b/synapse/types/__init__.py @@ -335,18 +335,35 @@ class EventID(DomainSpecificString): mxid_localpart_allowed_characters = set( "_-./=" + string.ascii_lowercase + string.digits ) +# MSC4007 adds the + to the allowed characters. +# +# TODO If this was accepted, update the SSO code to support this, see the callers +# of map_username_to_mxid_localpart. +extended_mxid_localpart_allowed_characters = mxid_localpart_allowed_characters | {"+"} + +# Guest user IDs are purely numeric. +GUEST_USER_ID_PATTERN = re.compile(r"^\d+$") -def contains_invalid_mxid_characters(localpart: str) -> bool: +def contains_invalid_mxid_characters( + localpart: str, use_extended_character_set: bool +) -> bool: """Check for characters not allowed in an mxid or groupid localpart Args: localpart: the localpart to be checked + use_extended_character_set: True to use the extended allowed characters + from MSC4009. Returns: True if there are any naughty characters """ - return any(c not in mxid_localpart_allowed_characters for c in localpart) + allowed_characters = ( + extended_mxid_localpart_allowed_characters + if use_extended_character_set + else mxid_localpart_allowed_characters + ) + return any(c not in allowed_characters for c in localpart) UPPER_CASE_PATTERN = re.compile(b"[A-Z_]") -- cgit 1.4.1