summary refs log tree commit diff
diff options
context:
space:
mode:
authorMathieu Velten <mathieuv@matrix.org>2023-09-08 13:37:00 +0200
committerMathieu Velten <mathieuv@matrix.org>2023-09-08 13:37:00 +0200
commit01c582ff3625300bf8f7c6c9ff4c78069816f779 (patch)
tree6d268180f6601ab981d41e6f49b71acb240ad4d5
parentMerge branch 'develop' into mv/add-mxid-validation-log (diff)
downloadsynapse-01c582ff3625300bf8f7c6c9ff4c78069816f779.tar.xz
Change to is_valid
-rw-r--r--synapse/handlers/device.py4
-rw-r--r--synapse/handlers/devicemessage.py4
-rw-r--r--synapse/handlers/e2e_keys.py4
-rw-r--r--synapse/handlers/receipts.py4
-rw-r--r--synapse/handlers/typing.py4
-rw-r--r--synapse/storage/controllers/persist_events.py4
-rw-r--r--synapse/types/__init__.py31
7 files changed, 42 insertions, 13 deletions
diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py
index 1ac58d7ece..3d05750cff 100644
--- a/synapse/handlers/device.py
+++ b/synapse/handlers/device.py
@@ -1105,9 +1105,9 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
             )
         prev_ids = [str(p) for p in prev_ids]  # They may come as ints
 
-        # The result of `validate` is not used yet because for now we only want to
+        # The result of `is_valid` is not used yet because for now we only want to
         # log invalid mxids in the wild.
-        UserID.from_string(user_id).validate(allow_historical_mxids=True)
+        UserID.is_valid(user_id, allow_historical_mxids=True)
 
         if get_domain_from_id(user_id) != origin:
             # TODO: Raise?
diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py
index a1ce84601a..2727588fc7 100644
--- a/synapse/handlers/devicemessage.py
+++ b/synapse/handlers/devicemessage.py
@@ -109,9 +109,9 @@ class DeviceMessageHandler:
                 origin,
                 sender_user_id,
             )
-        # The result of `validate` is not used yet because for now we only want to
+        # The result of `is_valid` is not used yet because for now we only want to
         # log invalid mxids in the wild.
-        UserID.from_string(sender_user_id).validate(allow_historical_mxids=True)
+        UserID.is_valid(sender_user_id, allow_historical_mxids=True)
 
         message_type = content["type"]
         message_id = content["message_id"]
diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py
index 4924404526..1e01afed17 100644
--- a/synapse/handlers/e2e_keys.py
+++ b/synapse/handlers/e2e_keys.py
@@ -1593,9 +1593,9 @@ class SigningKeyEduUpdater:
             logger.warning("Got signing key update edu for %r from %r", user_id, origin)
             return
 
-        # The result of `validate` is not used yet because for now we only want to
+        # The result of `is_valid` is not used yet because for now we only want to
         # log invalid mxids in the wild.
-        UserID.from_string(user_id).validate(allow_historical_mxids=True)
+        UserID.is_valid(user_id, allow_historical_mxids=True)
 
         room_ids = await self.store.get_rooms_for_user(user_id)
         if not room_ids:
diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py
index 09a0ed276d..628521bfda 100644
--- a/synapse/handlers/receipts.py
+++ b/synapse/handlers/receipts.py
@@ -117,9 +117,9 @@ class ReceiptsHandler:
         max_batch_id: Optional[int] = None
 
         for receipt in receipts:
-            # The result of `validate` is not used yet because for now we only want to
+            # The result of `is_valid` is not used yet because for now we only want to
             # log invalid mxids in the wild.
-            UserID.from_string(receipt.user_id).validate(allow_historical_mxids=True)
+            UserID.is_valid(receipt.user_id, allow_historical_mxids=True)
 
             res = await self.store.insert_receipt(
                 receipt.room_id,
diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py
index e3fd5d2b56..63e340a96c 100644
--- a/synapse/handlers/typing.py
+++ b/synapse/handlers/typing.py
@@ -370,9 +370,9 @@ class TypingWriterHandler(FollowerTypingHandler):
         room_id = content["room_id"]
         user_id = content["user_id"]
 
-        # The result of `validate` is not used yet because for now we only want to
+        # The result of `is_valid` is not used yet because for now we only want to
         # log invalid mxids in the wild.
-        UserID.from_string(user_id).validate(allow_historical_mxids=True)
+        UserID.is_valid(user_id, allow_historical_mxids=True)
 
         # If we're not in the room just ditch the event entirely. This is
         # probably an old server that has come back and thinks we're still in
diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py
index cea6288556..086aa60f5c 100644
--- a/synapse/storage/controllers/persist_events.py
+++ b/synapse/storage/controllers/persist_events.py
@@ -398,9 +398,9 @@ class EventsPersistenceStorageController:
         event_ids: List[str] = []
         partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {}
         for event, ctx in events_and_contexts:
-            # The result of `validate` is not used yet because for now we only want to
+            # The result of `is_valid` is not used yet because for now we only want to
             # log invalid mxids in the wild.
-            UserID.from_string(event.user_id).validate(allow_historical_mxids=True)
+            UserID.is_valid(event.user_id, allow_historical_mxids=True)
 
             partitioned.setdefault(event.room_id, []).append((event, ctx))
             event_ids.append(event.event_id)
diff --git a/synapse/types/__init__.py b/synapse/types/__init__.py
index 8df63b5319..9b7f4f0748 100644
--- a/synapse/types/__init__.py
+++ b/synapse/types/__init__.py
@@ -310,7 +310,7 @@ class DomainSpecificString(metaclass=abc.ABCMeta):
         return "%s%s:%s" % (self.SIGIL, self.localpart, self.domain)
 
     @classmethod
-    def is_valid(cls: Type[DS], s: str) -> bool:
+    def is_valid(cls: Type[DS], s: str, **kwargs: Any) -> bool:
         """Parses the input string and attempts to ensure it is valid."""
         # TODO: this does not reject an empty localpart or an overly-long string.
         # See https://spec.matrix.org/v1.2/appendices/#identifier-grammar
@@ -333,6 +333,35 @@ class UserID(DomainSpecificString):
 
     SIGIL = "@"
 
+    @classmethod
+    def is_valid(cls: Type[DS], s: str, **kwargs: Any) -> bool:
+        """"""
+        """Parses the user id str and attempts to ensure it is valid per the spec.
+
+        Args:
+            allow_historical_mxids: True to allow historical mxids, which can
+            include all printable ASCII chars minus `:`
+        Returns:
+            False if the user ID is invalid per the spec
+        """
+        allow_historical_mxids = kwargs.get("allow_historical_mxids", False)
+
+        is_valid = DomainSpecificString.is_valid(s)
+
+        if len(s.encode("utf-8")) > 255:
+            logger.warn(
+                f"User ID {s} has more than 255 bytes and is invalid per the spec"
+            )
+            is_valid = False
+        obj = UserID.from_string(s)
+        if contains_invalid_mxid_characters(obj.localpart, allow_historical_mxids):
+            logger.warn(
+                f"localpart of User ID {s} contains invalid characters per the spec"
+            )
+            is_valid = False
+
+        return is_valid
+
     def validate(self, allow_historical_mxids: Optional[bool] = False) -> bool:
         """Validate an user ID against the spec.