diff --git a/changelog.d/13474.misc b/changelog.d/13474.misc
new file mode 100644
index 0000000000..d34c661fed
--- /dev/null
+++ b/changelog.d/13474.misc
@@ -0,0 +1 @@
+Add some miscellaneous comments to document sync, especially around `compute_state_delta`.
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index d827c03ad1..3ca01391c9 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -13,7 +13,17 @@
# limitations under the License.
import itertools
import logging
-from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional, Set, Tuple
+from typing import (
+ TYPE_CHECKING,
+ Any,
+ Dict,
+ FrozenSet,
+ List,
+ Optional,
+ Sequence,
+ Set,
+ Tuple,
+)
import attr
from prometheus_client import Counter
@@ -89,7 +99,7 @@ class SyncConfig:
@attr.s(slots=True, frozen=True, auto_attribs=True)
class TimelineBatch:
prev_batch: StreamToken
- events: List[EventBase]
+ events: Sequence[EventBase]
limited: bool
# A mapping of event ID to the bundled aggregations for the above events.
# This is only calculated if limited is true.
@@ -852,16 +862,26 @@ class SyncHandler:
now_token: StreamToken,
full_state: bool,
) -> MutableStateMap[EventBase]:
- """Works out the difference in state between the start of the timeline
- and the previous sync.
+ """Works out the difference in state between the end of the previous sync and
+ the start of the timeline.
Args:
room_id:
batch: The timeline batch for the room that will be sent to the user.
sync_config:
- since_token: Token of the end of the previous batch. May be None.
+ since_token: Token of the end of the previous batch. May be `None`.
now_token: Token of the end of the current batch.
full_state: Whether to force returning the full state.
+ `lazy_load_members` still applies when `full_state` is `True`.
+
+ Returns:
+ The state to return in the sync response for the room.
+
+ Clients will overlay this onto the state at the end of the previous sync to
+ arrive at the state at the start of the timeline.
+
+ Clients will then overlay state events in the timeline to arrive at the
+ state at the end of the timeline, in preparation for the next sync.
"""
# TODO(mjark) Check if the state events were received by the server
# after the previous sync, since we need to include those state
@@ -869,7 +889,8 @@ class SyncHandler:
# TODO(mjark) Check for new redactions in the state events.
with Measure(self.clock, "compute_state_delta"):
-
+ # The memberships needed for events in the timeline.
+ # Only calculated when `lazy_load_members` is on.
members_to_fetch = None
lazy_load_members = sync_config.filter_collection.lazy_load_members()
@@ -897,38 +918,46 @@ class SyncHandler:
else:
state_filter = StateFilter.all()
+ # The contribution to the room state from state events in the timeline.
+ # Only contains the last event for any given state key.
timeline_state = {
(event.type, event.state_key): event.event_id
for event in batch.events
if event.is_state()
}
+ # Now calculate the state to return in the sync response for the room.
+ # This is more or less the change in state between the end of the previous
+ # sync's timeline and the start of the current sync's timeline.
+ # See the docstring above for details.
+ state_ids: StateMap[str]
+
if full_state:
if batch:
- current_state_ids = (
+ state_at_timeline_end = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[-1].event_id, state_filter=state_filter
)
)
- state_ids = (
+ state_at_timeline_start = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[0].event_id, state_filter=state_filter
)
)
else:
- current_state_ids = await self.get_state_at(
+ state_at_timeline_end = await self.get_state_at(
room_id, stream_position=now_token, state_filter=state_filter
)
- state_ids = current_state_ids
+ state_at_timeline_start = state_at_timeline_end
state_ids = _calculate_state(
timeline_contains=timeline_state,
- timeline_start=state_ids,
- previous={},
- current=current_state_ids,
+ timeline_start=state_at_timeline_start,
+ timeline_end=state_at_timeline_end,
+ previous_timeline_end={},
lazy_load_members=lazy_load_members,
)
elif batch.limited:
@@ -968,24 +997,23 @@ class SyncHandler:
)
if batch:
- current_state_ids = (
+ state_at_timeline_end = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[-1].event_id, state_filter=state_filter
)
)
else:
- # Its not clear how we get here, but empirically we do
- # (#5407). Logging has been added elsewhere to try and
- # figure out where this state comes from.
- current_state_ids = await self.get_state_at(
+ # We can get here if the user has ignored the senders of all
+ # the recent events.
+ state_at_timeline_end = await self.get_state_at(
room_id, stream_position=now_token, state_filter=state_filter
)
state_ids = _calculate_state(
timeline_contains=timeline_state,
timeline_start=state_at_timeline_start,
- previous=state_at_previous_sync,
- current=current_state_ids,
+ timeline_end=state_at_timeline_end,
+ previous_timeline_end=state_at_previous_sync,
# we have to include LL members in case LL initial sync missed them
lazy_load_members=lazy_load_members,
)
@@ -1010,6 +1038,13 @@ class SyncHandler:
),
)
+ # At this point, if `lazy_load_members` is enabled, `state_ids` includes
+ # the memberships of all event senders in the timeline. This is because we
+ # may not have sent the memberships in a previous sync.
+
+ # When `include_redundant_members` is on, we send all the lazy-loaded
+ # memberships of event senders. Otherwise we make an effort to limit the set
+ # of memberships we send to those that we have not already sent to this client.
if lazy_load_members and not include_redundant_members:
cache_key = (sync_config.user.to_string(), sync_config.device_id)
cache = self.get_lazy_loaded_members_cache(cache_key)
@@ -2216,8 +2251,8 @@ def _action_has_highlight(actions: List[JsonDict]) -> bool:
def _calculate_state(
timeline_contains: StateMap[str],
timeline_start: StateMap[str],
- previous: StateMap[str],
- current: StateMap[str],
+ timeline_end: StateMap[str],
+ previous_timeline_end: StateMap[str],
lazy_load_members: bool,
) -> StateMap[str]:
"""Works out what state to include in a sync response.
@@ -2225,45 +2260,50 @@ def _calculate_state(
Args:
timeline_contains: state in the timeline
timeline_start: state at the start of the timeline
- previous: state at the end of the previous sync (or empty dict
+ timeline_end: state at the end of the timeline
+ previous_timeline_end: state at the end of the previous sync (or empty dict
if this is an initial sync)
- current: state at the end of the timeline
lazy_load_members: whether to return members from timeline_start
or not. assumes that timeline_start has already been filtered to
include only the members the client needs to know about.
"""
- event_id_to_key = {
- e: key
- for key, e in itertools.chain(
+ event_id_to_state_key = {
+ event_id: state_key
+ for state_key, event_id in itertools.chain(
timeline_contains.items(),
- previous.items(),
timeline_start.items(),
- current.items(),
+ timeline_end.items(),
+ previous_timeline_end.items(),
)
}
- c_ids = set(current.values())
- ts_ids = set(timeline_start.values())
- p_ids = set(previous.values())
- tc_ids = set(timeline_contains.values())
+ timeline_end_ids = set(timeline_end.values())
+ timeline_start_ids = set(timeline_start.values())
+ previous_timeline_end_ids = set(previous_timeline_end.values())
+ timeline_contains_ids = set(timeline_contains.values())
# If we are lazyloading room members, we explicitly add the membership events
# for the senders in the timeline into the state block returned by /sync,
# as we may not have sent them to the client before. We find these membership
# events by filtering them out of timeline_start, which has already been filtered
# to only include membership events for the senders in the timeline.
- # In practice, we can do this by removing them from the p_ids list,
- # which is the list of relevant state we know we have already sent to the client.
+ # In practice, we can do this by removing them from the previous_timeline_end_ids
+ # list, which is the list of relevant state we know we have already sent to the
+ # client.
# see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809
if lazy_load_members:
- p_ids.difference_update(
+ previous_timeline_end_ids.difference_update(
e for t, e in timeline_start.items() if t[0] == EventTypes.Member
)
- state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
+ state_ids = (
+ (timeline_end_ids | timeline_start_ids)
+ - previous_timeline_end_ids
+ - timeline_contains_ids
+ )
- return {event_id_to_key[e]: e for e in state_ids}
+ return {event_id_to_state_key[e]: e for e in state_ids}
@attr.s(slots=True, auto_attribs=True)
diff --git a/synapse/visibility.py b/synapse/visibility.py
index d947edde66..c810a05907 100644
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@@ -73,8 +73,8 @@ async def filter_events_for_client(
* the user is not currently a member of the room, and:
* the user has not been a member of the room since the given
events
- always_include_ids: set of event ids to specifically
- include (unless sender is ignored)
+ always_include_ids: set of event ids to specifically include, if present
+ in events (unless sender is ignored)
filter_send_to_client: Whether we're checking an event that's going to be
sent to a client. This might not always be the case since this function can
also be called to check whether a user can see the state at a given point.
|