diff --git a/synapse/events/utils.py b/synapse/events/utils.py
index c14c7791db..e7b7b78b84 100644
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
@@ -22,6 +22,7 @@ from typing import (
Iterable,
List,
Mapping,
+ Match,
MutableMapping,
Optional,
Union,
@@ -46,12 +47,10 @@ if TYPE_CHECKING:
from synapse.handlers.relations import BundledAggregations
-# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
-# (?<!stuff) matches if the current position in the string is not preceded
-# by a match for 'stuff'.
-# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
-# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
-SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.")
+# Split strings on "." but not "\." (or "\\\.").
+SPLIT_FIELD_REGEX = re.compile(r"\\*\.")
+# Find escaped characters, e.g. those with a \ in front of them.
+ESCAPE_SEQUENCE_PATTERN = re.compile(r"\\(.)")
CANONICALJSON_MAX_INT = (2**53) - 1
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
@@ -106,7 +105,6 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
"depth",
"prev_events",
"auth_events",
- "origin",
"origin_server_ts",
]
@@ -114,6 +112,10 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
if not room_version.msc2176_redaction_rules:
allowed_keys.extend(["prev_state", "membership"])
+ # Room versions before MSC3989 kept the origin field.
+ if not room_version.msc3989_redaction_rules:
+ allowed_keys.append("origin")
+
event_type = event_dict["type"]
new_content = {}
@@ -127,6 +129,16 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
add_fields("membership")
if room_version.msc3375_redaction_rules:
add_fields(EventContentFields.AUTHORISING_USER)
+ if room_version.msc3821_redaction_rules:
+ # Preserve the signed field under third_party_invite.
+ third_party_invite = event_dict["content"].get("third_party_invite")
+ if isinstance(third_party_invite, collections.abc.Mapping):
+ new_content["third_party_invite"] = {}
+ if "signed" in third_party_invite:
+ new_content["third_party_invite"]["signed"] = third_party_invite[
+ "signed"
+ ]
+
elif event_type == EventTypes.Create:
# MSC2176 rules state that create events cannot be redacted.
if room_version.msc2176_redaction_rules:
@@ -168,6 +180,18 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER:
add_fields(EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE)
+ # Protect the rel_type and event_id fields under the m.relates_to field.
+ if room_version.msc3389_relation_redactions:
+ relates_to = event_dict["content"].get("m.relates_to")
+ if isinstance(relates_to, collections.abc.Mapping):
+ new_relates_to = {}
+ for field in ("rel_type", "event_id"):
+ if field in relates_to:
+ new_relates_to[field] = relates_to[field]
+ # Only include a non-empty relates_to field.
+ if new_relates_to:
+ new_content["m.relates_to"] = new_relates_to
+
allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys}
allowed_fields["content"] = new_content
@@ -228,6 +252,57 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None:
sub_out_dict[key_to_move] = sub_dict[key_to_move]
+def _escape_slash(m: Match[str]) -> str:
+ """
+ Replacement function; replace a backslash-backslash or backslash-dot with the
+ second character. Leaves any other string alone.
+ """
+ if m.group(1) in ("\\", "."):
+ return m.group(1)
+ return m.group(0)
+
+
+def _split_field(field: str) -> List[str]:
+ """
+ Splits strings on unescaped dots and removes escaping.
+
+ Args:
+ field: A string representing a path to a field.
+
+ Returns:
+ A list of nested fields to traverse.
+ """
+
+ # Convert the field and remove escaping:
+ #
+ # 1. "content.body.thing\.with\.dots"
+ # 2. ["content", "body", "thing\.with\.dots"]
+ # 3. ["content", "body", "thing.with.dots"]
+
+ # Find all dots (and their preceding backslashes). If the dot is unescaped
+ # then emit a new field part.
+ result = []
+ prev_start = 0
+ for match in SPLIT_FIELD_REGEX.finditer(field):
+ # If the match is an *even* number of characters than the dot was escaped.
+ if len(match.group()) % 2 == 0:
+ continue
+
+ # Add a new part (up to the dot, exclusive) after escaping.
+ result.append(
+ ESCAPE_SEQUENCE_PATTERN.sub(
+ _escape_slash, field[prev_start : match.end() - 1]
+ )
+ )
+ prev_start = match.end()
+
+ # Add any part of the field after the last unescaped dot. (Note that if the
+ # character is a dot this correctly adds a blank string.)
+ result.append(re.sub(r"\\(.)", _escape_slash, field[prev_start:]))
+
+ return result
+
+
def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
"""Return a new dict with only the fields in 'dictionary' which are present
in 'fields'.
@@ -235,7 +310,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
If there are no event fields specified then all fields are included.
The entries may include '.' characters to indicate sub-fields.
So ['content.body'] will include the 'body' field of the 'content' object.
- A literal '.' character in a field name may be escaped using a '\'.
+ A literal '.' or '\' character in a field name may be escaped using a '\'.
Args:
dictionary: The dictionary to read from.
@@ -250,13 +325,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
# for each field, convert it:
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
- split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
-
- # for each element of the output array of arrays:
- # remove escaping so we can use the right key names.
- split_fields[:] = [
- [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields
- ]
+ split_fields = [_split_field(f) for f in fields]
output: JsonDict = {}
for field_array in split_fields:
@@ -336,6 +405,7 @@ def serialize_event(
time_now_ms: int,
*,
config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG,
+ msc3970_enabled: bool = False,
) -> JsonDict:
"""Serialize event for clients
@@ -343,6 +413,8 @@ def serialize_event(
e
time_now_ms
config: Event serialization config
+ msc3970_enabled: Whether MSC3970 is enabled. It changes whether we should
+ include the `transaction_id` in the event's `unsigned` section.
Returns:
The serialized event dictionary.
@@ -365,27 +437,43 @@ def serialize_event(
if "redacted_because" in e.unsigned:
d["unsigned"]["redacted_because"] = serialize_event(
- e.unsigned["redacted_because"], time_now_ms, config=config
+ e.unsigned["redacted_because"],
+ time_now_ms,
+ config=config,
+ msc3970_enabled=msc3970_enabled,
)
# If we have a txn_id saved in the internal_metadata, we should include it in the
# unsigned section of the event if it was sent by the same session as the one
# requesting the event.
- # There is a special case for guests, because they only have one access token
- # without associated access_token_id, so we always include the txn_id for events
- # they sent.
- txn_id = getattr(e.internal_metadata, "txn_id", None)
+ txn_id: Optional[str] = getattr(e.internal_metadata, "txn_id", None)
if txn_id is not None and config.requester is not None:
- event_token_id = getattr(e.internal_metadata, "token_id", None)
- if config.requester.user.to_string() == e.sender and (
- (
- event_token_id is not None
- and config.requester.access_token_id is not None
- and event_token_id == config.requester.access_token_id
+ # For the MSC3970 rules to be applied, we *need* to have the device ID in the
+ # event internal metadata. Since we were not recording them before, if it hasn't
+ # been recorded, we fallback to the old behaviour.
+ event_device_id: Optional[str] = getattr(e.internal_metadata, "device_id", None)
+ if msc3970_enabled and event_device_id is not None:
+ if event_device_id == config.requester.device_id:
+ d["unsigned"]["transaction_id"] = txn_id
+
+ else:
+ # The pre-MSC3970 behaviour is to only include the transaction ID if the
+ # event was sent from the same access token. For regular users, we can use
+ # the access token ID to determine this. For guests, we can't, but since
+ # each guest only has one access token, we can just check that the event was
+ # sent by the same user as the one requesting the event.
+ event_token_id: Optional[int] = getattr(
+ e.internal_metadata, "token_id", None
)
- or config.requester.is_guest
- ):
- d["unsigned"]["transaction_id"] = txn_id
+ if config.requester.user.to_string() == e.sender and (
+ (
+ event_token_id is not None
+ and config.requester.access_token_id is not None
+ and event_token_id == config.requester.access_token_id
+ )
+ or config.requester.is_guest
+ ):
+ d["unsigned"]["transaction_id"] = txn_id
# invite_room_state and knock_room_state are a list of stripped room state events
# that are meant to provide metadata about a room to an invitee/knocker. They are
@@ -416,6 +504,9 @@ class EventClientSerializer:
clients.
"""
+ def __init__(self, *, msc3970_enabled: bool = False):
+ self._msc3970_enabled = msc3970_enabled
+
def serialize_event(
self,
event: Union[JsonDict, EventBase],
@@ -440,7 +531,9 @@ class EventClientSerializer:
if not isinstance(event, EventBase):
return event
- serialized_event = serialize_event(event, time_now, config=config)
+ serialized_event = serialize_event(
+ event, time_now, config=config, msc3970_enabled=self._msc3970_enabled
+ )
# Check if there are any bundled aggregations to include with the event.
if bundle_aggregations:
@@ -498,7 +591,9 @@ class EventClientSerializer:
# `sender` of the edit; however MSC3925 proposes extending it to the whole
# of the edit, which is what we do here.
serialized_aggregations[RelationTypes.REPLACE] = self.serialize_event(
- event_aggregations.replace, time_now, config=config
+ event_aggregations.replace,
+ time_now,
+ config=config,
)
# Include any threaded replies to this event.
|