summary refs log tree commit diff
path: root/synapse/events/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'synapse/events/utils.py')
-rw-r--r--synapse/events/utils.py157
1 files changed, 126 insertions, 31 deletions
diff --git a/synapse/events/utils.py b/synapse/events/utils.py

index c14c7791db..e7b7b78b84 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py
@@ -22,6 +22,7 @@ from typing import ( Iterable, List, Mapping, + Match, MutableMapping, Optional, Union, @@ -46,12 +47,10 @@ if TYPE_CHECKING: from synapse.handlers.relations import BundledAggregations -# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\' -# (?<!stuff) matches if the current position in the string is not preceded -# by a match for 'stuff'. -# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as -# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar" -SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.") +# Split strings on "." but not "\." (or "\\\."). +SPLIT_FIELD_REGEX = re.compile(r"\\*\.") +# Find escaped characters, e.g. those with a \ in front of them. +ESCAPE_SEQUENCE_PATTERN = re.compile(r"\\(.)") CANONICALJSON_MAX_INT = (2**53) - 1 CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT @@ -106,7 +105,6 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic "depth", "prev_events", "auth_events", - "origin", "origin_server_ts", ] @@ -114,6 +112,10 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic if not room_version.msc2176_redaction_rules: allowed_keys.extend(["prev_state", "membership"]) + # Room versions before MSC3989 kept the origin field. + if not room_version.msc3989_redaction_rules: + allowed_keys.append("origin") + event_type = event_dict["type"] new_content = {} @@ -127,6 +129,16 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic add_fields("membership") if room_version.msc3375_redaction_rules: add_fields(EventContentFields.AUTHORISING_USER) + if room_version.msc3821_redaction_rules: + # Preserve the signed field under third_party_invite. + third_party_invite = event_dict["content"].get("third_party_invite") + if isinstance(third_party_invite, collections.abc.Mapping): + new_content["third_party_invite"] = {} + if "signed" in third_party_invite: + new_content["third_party_invite"]["signed"] = third_party_invite[ + "signed" + ] + elif event_type == EventTypes.Create: # MSC2176 rules state that create events cannot be redacted. if room_version.msc2176_redaction_rules: @@ -168,6 +180,18 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER: add_fields(EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE) + # Protect the rel_type and event_id fields under the m.relates_to field. + if room_version.msc3389_relation_redactions: + relates_to = event_dict["content"].get("m.relates_to") + if isinstance(relates_to, collections.abc.Mapping): + new_relates_to = {} + for field in ("rel_type", "event_id"): + if field in relates_to: + new_relates_to[field] = relates_to[field] + # Only include a non-empty relates_to field. + if new_relates_to: + new_content["m.relates_to"] = new_relates_to + allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys} allowed_fields["content"] = new_content @@ -228,6 +252,57 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None: sub_out_dict[key_to_move] = sub_dict[key_to_move] +def _escape_slash(m: Match[str]) -> str: + """ + Replacement function; replace a backslash-backslash or backslash-dot with the + second character. Leaves any other string alone. + """ + if m.group(1) in ("\\", "."): + return m.group(1) + return m.group(0) + + +def _split_field(field: str) -> List[str]: + """ + Splits strings on unescaped dots and removes escaping. + + Args: + field: A string representing a path to a field. + + Returns: + A list of nested fields to traverse. + """ + + # Convert the field and remove escaping: + # + # 1. "content.body.thing\.with\.dots" + # 2. ["content", "body", "thing\.with\.dots"] + # 3. ["content", "body", "thing.with.dots"] + + # Find all dots (and their preceding backslashes). If the dot is unescaped + # then emit a new field part. + result = [] + prev_start = 0 + for match in SPLIT_FIELD_REGEX.finditer(field): + # If the match is an *even* number of characters than the dot was escaped. + if len(match.group()) % 2 == 0: + continue + + # Add a new part (up to the dot, exclusive) after escaping. + result.append( + ESCAPE_SEQUENCE_PATTERN.sub( + _escape_slash, field[prev_start : match.end() - 1] + ) + ) + prev_start = match.end() + + # Add any part of the field after the last unescaped dot. (Note that if the + # character is a dot this correctly adds a blank string.) + result.append(re.sub(r"\\(.)", _escape_slash, field[prev_start:])) + + return result + + def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: """Return a new dict with only the fields in 'dictionary' which are present in 'fields'. @@ -235,7 +310,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: If there are no event fields specified then all fields are included. The entries may include '.' characters to indicate sub-fields. So ['content.body'] will include the 'body' field of the 'content' object. - A literal '.' character in a field name may be escaped using a '\'. + A literal '.' or '\' character in a field name may be escaped using a '\'. Args: dictionary: The dictionary to read from. @@ -250,13 +325,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict: # for each field, convert it: # ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]] - split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields] - - # for each element of the output array of arrays: - # remove escaping so we can use the right key names. - split_fields[:] = [ - [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields - ] + split_fields = [_split_field(f) for f in fields] output: JsonDict = {} for field_array in split_fields: @@ -336,6 +405,7 @@ def serialize_event( time_now_ms: int, *, config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG, + msc3970_enabled: bool = False, ) -> JsonDict: """Serialize event for clients @@ -343,6 +413,8 @@ def serialize_event( e time_now_ms config: Event serialization config + msc3970_enabled: Whether MSC3970 is enabled. It changes whether we should + include the `transaction_id` in the event's `unsigned` section. Returns: The serialized event dictionary. @@ -365,27 +437,43 @@ def serialize_event( if "redacted_because" in e.unsigned: d["unsigned"]["redacted_because"] = serialize_event( - e.unsigned["redacted_because"], time_now_ms, config=config + e.unsigned["redacted_because"], + time_now_ms, + config=config, + msc3970_enabled=msc3970_enabled, ) # If we have a txn_id saved in the internal_metadata, we should include it in the # unsigned section of the event if it was sent by the same session as the one # requesting the event. - # There is a special case for guests, because they only have one access token - # without associated access_token_id, so we always include the txn_id for events - # they sent. - txn_id = getattr(e.internal_metadata, "txn_id", None) + txn_id: Optional[str] = getattr(e.internal_metadata, "txn_id", None) if txn_id is not None and config.requester is not None: - event_token_id = getattr(e.internal_metadata, "token_id", None) - if config.requester.user.to_string() == e.sender and ( - ( - event_token_id is not None - and config.requester.access_token_id is not None - and event_token_id == config.requester.access_token_id + # For the MSC3970 rules to be applied, we *need* to have the device ID in the + # event internal metadata. Since we were not recording them before, if it hasn't + # been recorded, we fallback to the old behaviour. + event_device_id: Optional[str] = getattr(e.internal_metadata, "device_id", None) + if msc3970_enabled and event_device_id is not None: + if event_device_id == config.requester.device_id: + d["unsigned"]["transaction_id"] = txn_id + + else: + # The pre-MSC3970 behaviour is to only include the transaction ID if the + # event was sent from the same access token. For regular users, we can use + # the access token ID to determine this. For guests, we can't, but since + # each guest only has one access token, we can just check that the event was + # sent by the same user as the one requesting the event. + event_token_id: Optional[int] = getattr( + e.internal_metadata, "token_id", None ) - or config.requester.is_guest - ): - d["unsigned"]["transaction_id"] = txn_id + if config.requester.user.to_string() == e.sender and ( + ( + event_token_id is not None + and config.requester.access_token_id is not None + and event_token_id == config.requester.access_token_id + ) + or config.requester.is_guest + ): + d["unsigned"]["transaction_id"] = txn_id # invite_room_state and knock_room_state are a list of stripped room state events # that are meant to provide metadata about a room to an invitee/knocker. They are @@ -416,6 +504,9 @@ class EventClientSerializer: clients. """ + def __init__(self, *, msc3970_enabled: bool = False): + self._msc3970_enabled = msc3970_enabled + def serialize_event( self, event: Union[JsonDict, EventBase], @@ -440,7 +531,9 @@ class EventClientSerializer: if not isinstance(event, EventBase): return event - serialized_event = serialize_event(event, time_now, config=config) + serialized_event = serialize_event( + event, time_now, config=config, msc3970_enabled=self._msc3970_enabled + ) # Check if there are any bundled aggregations to include with the event. if bundle_aggregations: @@ -498,7 +591,9 @@ class EventClientSerializer: # `sender` of the edit; however MSC3925 proposes extending it to the whole # of the edit, which is what we do here. serialized_aggregations[RelationTypes.REPLACE] = self.serialize_event( - event_aggregations.replace, time_now, config=config + event_aggregations.replace, + time_now, + config=config, ) # Include any threaded replies to this event.