summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/15536.feature1
-rw-r--r--synapse/config/experimental.py3
-rw-r--r--synapse/handlers/register.py27
-rw-r--r--synapse/handlers/sso.py6
-rw-r--r--synapse/types/__init__.py21
-rw-r--r--tests/handlers/test_register.py13
6 files changed, 54 insertions, 17 deletions
diff --git a/changelog.d/15536.feature b/changelog.d/15536.feature
new file mode 100644
index 0000000000..824c24575f
--- /dev/null
+++ b/changelog.d/15536.feature
@@ -0,0 +1 @@
+Implement [MSC4009](https://github.com/matrix-org/matrix-spec-proposals/pull/4009) to expand the supported characters in Matrix IDs.
diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py
index cab7ccf4b7..514d87cb2c 100644
--- a/synapse/config/experimental.py
+++ b/synapse/config/experimental.py
@@ -199,3 +199,6 @@ class ExperimentalConfig(Config):
 
         # MSC3970: Scope transaction IDs to devices
         self.msc3970_enabled = experimental.get("msc3970_enabled", False)
+
+        # MSC4009: E.164 Matrix IDs
+        self.msc4009_e164_mxids = experimental.get("msc4009_e164_mxids", False)
diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py
index 61c4b833bd..c80946c2e9 100644
--- a/synapse/handlers/register.py
+++ b/synapse/handlers/register.py
@@ -46,7 +46,7 @@ from synapse.replication.http.register import (
     ReplicationRegisterServlet,
 )
 from synapse.spam_checker_api import RegistrationBehaviour
-from synapse.types import RoomAlias, UserID, create_requester
+from synapse.types import GUEST_USER_ID_PATTERN, RoomAlias, UserID, create_requester
 from synapse.types.state import StateFilter
 
 if TYPE_CHECKING:
@@ -143,10 +143,15 @@ class RegistrationHandler:
         assigned_user_id: Optional[str] = None,
         inhibit_user_in_use_error: bool = False,
     ) -> None:
-        if types.contains_invalid_mxid_characters(localpart):
+        if types.contains_invalid_mxid_characters(
+            localpart, self.hs.config.experimental.msc4009_e164_mxids
+        ):
+            extra_chars = (
+                "=_-./+" if self.hs.config.experimental.msc4009_e164_mxids else "=_-./"
+            )
             raise SynapseError(
                 400,
-                "User ID can only contain characters a-z, 0-9, or '=_-./'",
+                f"User ID can only contain characters a-z, 0-9, or '{extra_chars}'",
                 Codes.INVALID_USERNAME,
             )
 
@@ -195,16 +200,12 @@ class RegistrationHandler:
                         errcode=Codes.FORBIDDEN,
                     )
 
-        if guest_access_token is None:
-            try:
-                int(localpart)
-                raise SynapseError(
-                    400,
-                    "Numeric user IDs are reserved for guest users.",
-                    errcode=Codes.INVALID_USERNAME,
-                )
-            except ValueError:
-                pass
+        if guest_access_token is None and GUEST_USER_ID_PATTERN.fullmatch(localpart):
+            raise SynapseError(
+                400,
+                "Numeric user IDs are reserved for guest users.",
+                errcode=Codes.INVALID_USERNAME,
+            )
 
     async def register_user(
         self,
diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py
index 92c3742625..25fd2eb3a1 100644
--- a/synapse/handlers/sso.py
+++ b/synapse/handlers/sso.py
@@ -225,6 +225,8 @@ class SsoHandler:
 
         self._consent_at_registration = hs.config.consent.user_consent_at_registration
 
+        self._e164_mxids = hs.config.experimental.msc4009_e164_mxids
+
     def register_identity_provider(self, p: SsoIdentityProvider) -> None:
         p_id = p.idp_id
         assert p_id not in self._identity_providers
@@ -711,7 +713,7 @@ class SsoHandler:
         # Since the localpart is provided via a potentially untrusted module,
         # ensure the MXID is valid before registering.
         if not attributes.localpart or contains_invalid_mxid_characters(
-            attributes.localpart
+            attributes.localpart, self._e164_mxids
         ):
             raise MappingException("localpart is invalid: %s" % (attributes.localpart,))
 
@@ -944,7 +946,7 @@ class SsoHandler:
             localpart,
         )
 
-        if contains_invalid_mxid_characters(localpart):
+        if contains_invalid_mxid_characters(localpart, self._e164_mxids):
             raise SynapseError(400, "localpart is invalid: %s" % (localpart,))
         user_id = UserID(localpart, self._server_name).to_string()
         user_infos = await self._store.get_users_by_id_case_insensitive(user_id)
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 5cee9c3194..325219656a 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -335,18 +335,35 @@ class EventID(DomainSpecificString):
 mxid_localpart_allowed_characters = set(
     "_-./=" + string.ascii_lowercase + string.digits
 )
+# MSC4007 adds the + to the allowed characters.
+#
+# TODO If this was accepted, update the SSO code to support this, see the callers
+#      of map_username_to_mxid_localpart.
+extended_mxid_localpart_allowed_characters = mxid_localpart_allowed_characters | {"+"}
+
+# Guest user IDs are purely numeric.
+GUEST_USER_ID_PATTERN = re.compile(r"^\d+$")
 
 
-def contains_invalid_mxid_characters(localpart: str) -> bool:
+def contains_invalid_mxid_characters(
+    localpart: str, use_extended_character_set: bool
+) -> bool:
     """Check for characters not allowed in an mxid or groupid localpart
 
     Args:
         localpart: the localpart to be checked
+        use_extended_character_set: True to use the extended allowed characters
+            from MSC4009.
 
     Returns:
         True if there are any naughty characters
     """
-    return any(c not in mxid_localpart_allowed_characters for c in localpart)
+    allowed_characters = (
+        extended_mxid_localpart_allowed_characters
+        if use_extended_character_set
+        else mxid_localpart_allowed_characters
+    )
+    return any(c not in allowed_characters for c in localpart)
 
 
 UPPER_CASE_PATTERN = re.compile(b"[A-Z_]")
diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py
index aff1ec4758..73822b07a5 100644
--- a/tests/handlers/test_register.py
+++ b/tests/handlers/test_register.py
@@ -586,6 +586,19 @@ class RegistrationTestCase(unittest.HomeserverTestCase):
         d = self.store.is_support_user(user_id)
         self.assertFalse(self.get_success(d))
 
+    def test_invalid_user_id(self) -> None:
+        invalid_user_id = "+abcd"
+        self.get_failure(
+            self.handler.register_user(localpart=invalid_user_id), SynapseError
+        )
+
+    @override_config({"experimental_features": {"msc4009_e164_mxids": True}})
+    def text_extended_user_ids(self) -> None:
+        """+ should be allowed according to MSC4009."""
+        valid_user_id = "+1234"
+        user_id = self.get_success(self.handler.register_user(localpart=valid_user_id))
+        self.assertEqual(user_id, valid_user_id)
+
     def test_invalid_user_id_length(self) -> None:
         invalid_user_id = "x" * 256
         self.get_failure(