diff --git a/changelog.d/15536.feature b/changelog.d/15536.feature
new file mode 100644
index 0000000000..824c24575f
--- /dev/null
+++ b/changelog.d/15536.feature
@@ -0,0 +1 @@
+Implement [MSC4009](https://github.com/matrix-org/matrix-spec-proposals/pull/4009) to expand the supported characters in Matrix IDs.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index cab7ccf4b7..514d87cb2c 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -199,3 +199,6 @@ class ExperimentalConfig(Config):
# MSC3970: Scope transaction IDs to devices
self.msc3970_enabled = experimental.get("msc3970_enabled", False)
+
+ # MSC4009: E.164 Matrix IDs
+ self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False)
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 61c4b833bd..c80946c2e9 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -46,7 +46,7 @@ from synapse.replication.http.register import (
ReplicationRegisterServlet,
)
from synapse.spam_checker_api import RegistrationBehaviour
-from synapse.types import RoomAlias, UserID, create_requester
+from synapse.types import GUEST_USER_ID_PATTERN, RoomAlias, UserID, create_requester
from synapse.types.state import StateFilter
if TYPE_CHECKING:
@@ -143,10 +143,15 @@ class RegistrationHandler:
assigned_user_id: Optional[str] = None,
inhibit_user_in_use_error: bool = False,
) -> None:
- if types.contains_invalid_mxid_characters(localpart):
+ if types.contains_invalid_mxid_characters(
+ localpart, self.hs.config.experimental.msc4009_e164_mxids
+ ):
+ extra_chars = (
+ "=_-./+" if self.hs.config.experimental.msc4009_e164_mxids else "=_-./"
+ )
raise SynapseError(
400,
- "User ID can only contain characters a-z, 0-9, or '=_-./'",
+ f"User ID can only contain characters a-z, 0-9, or '{extra_chars}'",
Codes.INVALID_USERNAME,
)
@@ -195,16 +200,12 @@ class RegistrationHandler:
errcode=Codes.FORBIDDEN,
)
- if guest_access_token is None:
- try:
- int(localpart)
- raise SynapseError(
- 400,
- "Numeric user IDs are reserved for guest users.",
- errcode=Codes.INVALID_USERNAME,
- )
- except ValueError:
- pass
+ if guest_access_token is None and GUEST_USER_ID_PATTERN.fullmatch(localpart):
+ raise SynapseError(
+ 400,
+ "Numeric user IDs are reserved for guest users.",
+ errcode=Codes.INVALID_USERNAME,
+ )
async def register_user(
self,
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 92c3742625..25fd2eb3a1 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -225,6 +225,8 @@ class SsoHandler:
self._consent_at_registration = hs.config.consent.user_consent_at_registration
+ self._e164_mxids = hs.config.experimental.msc4009_e164_mxids
+
def register_identity_provider(self, p: SsoIdentityProvider) -> None:
p_id = p.idp_id
assert p_id not in self._identity_providers
@@ -711,7 +713,7 @@ class SsoHandler:
# Since the localpart is provided via a potentially untrusted module,
# ensure the MXID is valid before registering.
if not attributes.localpart or contains_invalid_mxid_characters(
- attributes.localpart
+ attributes.localpart, self._e164_mxids
):
raise MappingException("localpart is invalid: %s" % (attributes.localpart,))
@@ -944,7 +946,7 @@ class SsoHandler:
localpart,
)
- if contains_invalid_mxid_characters(localpart):
+ if contains_invalid_mxid_characters(localpart, self._e164_mxids):
raise SynapseError(400, "localpart is invalid: %s" % (localpart,))
user_id = UserID(localpart, self._server_name).to_string()
user_infos = await self._store.get_users_by_id_case_insensitive(user_id)
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 5cee9c3194..325219656a 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -335,18 +335,35 @@ class EventID(DomainSpecificString):
mxid_localpart_allowed_characters = set(
"_-./=" + string.ascii_lowercase + string.digits
)
+# MSC4007 adds the + to the allowed characters.
+#
+# TODO If this was accepted, update the SSO code to support this, see the callers
+# of map_username_to_mxid_localpart.
+extended_mxid_localpart_allowed_characters = mxid_localpart_allowed_characters | {"+"}
+
+# Guest user IDs are purely numeric.
+GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
-def contains_invalid_mxid_characters(localpart: str) -> bool:
+def contains_invalid_mxid_characters(
+ localpart: str, use_extended_character_set: bool
+) -> bool:
"""Check for characters not allowed in an mxid or groupid localpart
Args:
localpart: the localpart to be checked
+ use_extended_character_set: True to use the extended allowed characters
+ from MSC4009.
Returns:
True if there are any naughty characters
"""
- return any(c not in mxid_localpart_allowed_characters for c in localpart)
+ allowed_characters = (
+ extended_mxid_localpart_allowed_characters
+ if use_extended_character_set
+ else mxid_localpart_allowed_characters
+ )
+ return any(c not in allowed_characters for c in localpart)
UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index aff1ec4758..73822b07a5 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -586,6 +586,19 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
d = self.store.is_support_user(user_id)
self.assertFalse(self.get_success(d))
+ def test_invalid_user_id(self) -> None:
+ invalid_user_id = "+abcd"
+ self.get_failure(
+ self.handler.register_user(localpart=invalid_user_id), SynapseError
+ )
+
+ @override_config({"experimental_features": {"msc4009_e164_mxids": True}})
+ def text_extended_user_ids(self) -> None:
+ """+ should be allowed according to MSC4009."""
+ valid_user_id = "+1234"
+ user_id = self.get_success(self.handler.register_user(localpart=valid_user_id))
+ self.assertEqual(user_id, valid_user_id)
+
def test_invalid_user_id_length(self) -> None:
invalid_user_id = "x" * 256
self.get_failure(
|