diff --git a/synapse/state/v2.py b/synapse/state/v2.py
index 18484e2fa6..edf94e7ad6 100644
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@@ -16,11 +16,21 @@
import heapq
import itertools
import logging
-from typing import Dict, List, Optional
-
-from six import iteritems, itervalues
-
-from twisted.internet import defer
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Generator,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Set,
+ Tuple,
+ overload,
+)
+
+from typing_extensions import Literal
import synapse.state
from synapse import event_auth
@@ -28,29 +38,34 @@ from synapse.api.constants import EventTypes
from synapse.api.errors import AuthError
from synapse.api.room_versions import KNOWN_ROOM_VERSIONS
from synapse.events import EventBase
-from synapse.types import StateMap
+from synapse.types import MutableStateMap, StateMap
+from synapse.util import Clock
logger = logging.getLogger(__name__)
-@defer.inlineCallbacks
-def resolve_events_with_store(
+# We want to await to the reactor occasionally during state res when dealing
+# with large data sets, so that we don't exhaust the reactor. This is done by
+# awaiting to reactor during loops every N iterations.
+_AWAIT_AFTER_ITERATIONS = 100
+
+
+async def resolve_events_with_store(
+ clock: Clock,
room_id: str,
room_version: str,
- state_sets: List[StateMap[str]],
+ state_sets: Sequence[StateMap[str]],
event_map: Optional[Dict[str, EventBase]],
state_res_store: "synapse.state.StateResolutionStore",
-):
+) -> StateMap[str]:
"""Resolves the state using the v2 state resolution algorithm
Args:
+ clock
room_id: the room we are working in
-
room_version: The room version
-
state_sets: List of dicts of (type, state_key) -> event_id,
which are the different state groups to resolve.
-
event_map:
a dict from event_id to event, for any events that we happen to
have in flight (eg, those currently being persisted). This will be
@@ -62,8 +77,7 @@ def resolve_events_with_store(
state_res_store:
Returns:
- Deferred[dict[(str, str), str]]:
- a map from (type, state_key) to event_id.
+ A map from (type, state_key) to event_id.
"""
logger.debug("Computing conflicted state")
@@ -83,15 +97,15 @@ def resolve_events_with_store(
# Also fetch all auth events that appear in only some of the state sets'
# auth chains.
- auth_diff = yield _get_auth_chain_difference(state_sets, event_map, state_res_store)
+ auth_diff = await _get_auth_chain_difference(state_sets, event_map, state_res_store)
full_conflicted_set = set(
itertools.chain(
- itertools.chain.from_iterable(itervalues(conflicted_state)), auth_diff
+ itertools.chain.from_iterable(conflicted_state.values()), auth_diff
)
)
- events = yield state_res_store.get_events(
+ events = await state_res_store.get_events(
[eid for eid in full_conflicted_set if eid not in event_map],
allow_rejected=True,
)
@@ -114,14 +128,15 @@ def resolve_events_with_store(
eid for eid in full_conflicted_set if _is_power_event(event_map[eid])
)
- sorted_power_events = yield _reverse_topological_power_sort(
- room_id, power_events, event_map, state_res_store, full_conflicted_set
+ sorted_power_events = await _reverse_topological_power_sort(
+ clock, room_id, power_events, event_map, state_res_store, full_conflicted_set
)
logger.debug("sorted %d power events", len(sorted_power_events))
# Now sequentially auth each one
- resolved_state = yield _iterative_auth_checks(
+ resolved_state = await _iterative_auth_checks(
+ clock,
room_id,
room_version,
sorted_power_events,
@@ -135,20 +150,22 @@ def resolve_events_with_store(
# OK, so we've now resolved the power events. Now sort the remaining
# events using the mainline of the resolved power level.
+ set_power_events = set(sorted_power_events)
leftover_events = [
- ev_id for ev_id in full_conflicted_set if ev_id not in sorted_power_events
+ ev_id for ev_id in full_conflicted_set if ev_id not in set_power_events
]
logger.debug("sorting %d remaining events", len(leftover_events))
pl = resolved_state.get((EventTypes.PowerLevels, ""), None)
- leftover_events = yield _mainline_sort(
- room_id, leftover_events, pl, event_map, state_res_store
+ leftover_events = await _mainline_sort(
+ clock, room_id, leftover_events, pl, event_map, state_res_store
)
logger.debug("resolving remaining events")
- resolved_state = yield _iterative_auth_checks(
+ resolved_state = await _iterative_auth_checks(
+ clock,
room_id,
room_version,
leftover_events,
@@ -167,25 +184,29 @@ def resolve_events_with_store(
return resolved_state
-@defer.inlineCallbacks
-def _get_power_level_for_sender(room_id, event_id, event_map, state_res_store):
+async def _get_power_level_for_sender(
+ room_id: str,
+ event_id: str,
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+) -> int:
"""Return the power level of the sender of the given event according to
their auth events.
Args:
- room_id (str)
- event_id (str)
- event_map (dict[str,FrozenEvent])
- state_res_store (StateResolutionStore)
+ room_id
+ event_id
+ event_map
+ state_res_store
Returns:
- Deferred[int]
+ The power level.
"""
- event = yield _get_event(room_id, event_id, event_map, state_res_store)
+ event = await _get_event(room_id, event_id, event_map, state_res_store)
pl = None
for aid in event.auth_event_ids():
- aev = yield _get_event(
+ aev = await _get_event(
room_id, aid, event_map, state_res_store, allow_none=True
)
if aev and (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""):
@@ -195,7 +216,7 @@ def _get_power_level_for_sender(room_id, event_id, event_map, state_res_store):
if pl is None:
# Couldn't find power level. Check if they're the creator of the room
for aid in event.auth_event_ids():
- aev = yield _get_event(
+ aev = await _get_event(
room_id, aid, event_map, state_res_store, allow_none=True
)
if aev and (aev.type, aev.state_key) == (EventTypes.Create, ""):
@@ -214,38 +235,43 @@ def _get_power_level_for_sender(room_id, event_id, event_map, state_res_store):
return int(level)
-@defer.inlineCallbacks
-def _get_auth_chain_difference(state_sets, event_map, state_res_store):
+async def _get_auth_chain_difference(
+ state_sets: Sequence[StateMap[str]],
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+) -> Set[str]:
"""Compare the auth chains of each state set and return the set of events
that only appear in some but not all of the auth chains.
Args:
- state_sets (list)
- event_map (dict[str,FrozenEvent])
- state_res_store (StateResolutionStore)
+ state_sets
+ event_map
+ state_res_store
Returns:
- Deferred[set[str]]: Set of event IDs
+ Set of event IDs
"""
- difference = yield state_res_store.get_auth_chain_difference(
+ difference = await state_res_store.get_auth_chain_difference(
[set(state_set.values()) for state_set in state_sets]
)
return difference
-def _seperate(state_sets):
+def _seperate(
+ state_sets: Iterable[StateMap[str]],
+) -> Tuple[StateMap[str], StateMap[Set[str]]]:
"""Return the unconflicted and conflicted state. This is different than in
the original algorithm, as this defines a key to be conflicted if one of
the state sets doesn't have that key.
Args:
- state_sets (list)
+ state_sets
Returns:
- tuple[dict, dict]: A tuple of unconflicted and conflicted state. The
- conflicted state dict is a map from type/state_key to set of event IDs
+ A tuple of unconflicted and conflicted state. The conflicted state dict
+ is a map from type/state_key to set of event IDs
"""
unconflicted_state = {}
conflicted_state = {}
@@ -258,18 +284,20 @@ def _seperate(state_sets):
event_ids.discard(None)
conflicted_state[key] = event_ids
- return unconflicted_state, conflicted_state
+ # mypy doesn't understand that discarding None above means that conflicted
+ # state is StateMap[Set[str]], not StateMap[Set[Optional[Str]]].
+ return unconflicted_state, conflicted_state # type: ignore
-def _is_power_event(event):
+def _is_power_event(event: EventBase) -> bool:
"""Return whether or not the event is a "power event", as defined by the
v2 state resolution algorithm
Args:
- event (FrozenEvent)
+ event
Returns:
- boolean
+ True if the event is a power event.
"""
if (event.type, event.state_key) in (
(EventTypes.PowerLevels, ""),
@@ -285,21 +313,24 @@ def _is_power_event(event):
return False
-@defer.inlineCallbacks
-def _add_event_and_auth_chain_to_graph(
- graph, room_id, event_id, event_map, state_res_store, auth_diff
-):
+async def _add_event_and_auth_chain_to_graph(
+ graph: Dict[str, Set[str]],
+ room_id: str,
+ event_id: str,
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+ auth_diff: Set[str],
+) -> None:
"""Helper function for _reverse_topological_power_sort that add the event
and its auth chain (that is in the auth diff) to the graph
Args:
- graph (dict[str, set[str]]): A map from event ID to the events auth
- event IDs
- room_id (str): the room we are working in
- event_id (str): Event to add to the graph
- event_map (dict[str,FrozenEvent])
- state_res_store (StateResolutionStore)
- auth_diff (set[str]): Set of event IDs that are in the auth difference.
+ graph: A map from event ID to the events auth event IDs
+ room_id: the room we are working in
+ event_id: Event to add to the graph
+ event_map
+ state_res_store
+ auth_diff: Set of event IDs that are in the auth difference.
"""
state = [event_id]
@@ -307,7 +338,7 @@ def _add_event_and_auth_chain_to_graph(
eid = state.pop()
graph.setdefault(eid, set())
- event = yield _get_event(room_id, eid, event_map, state_res_store)
+ event = await _get_event(room_id, eid, event_map, state_res_store)
for aid in event.auth_event_ids():
if aid in auth_diff:
if aid not in graph:
@@ -316,37 +347,52 @@ def _add_event_and_auth_chain_to_graph(
graph.setdefault(eid, set()).add(aid)
-@defer.inlineCallbacks
-def _reverse_topological_power_sort(
- room_id, event_ids, event_map, state_res_store, auth_diff
-):
+async def _reverse_topological_power_sort(
+ clock: Clock,
+ room_id: str,
+ event_ids: Iterable[str],
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+ auth_diff: Set[str],
+) -> List[str]:
"""Returns a list of the event_ids sorted by reverse topological ordering,
and then by power level and origin_server_ts
Args:
- room_id (str): the room we are working in
- event_ids (list[str]): The events to sort
- event_map (dict[str,FrozenEvent])
- state_res_store (StateResolutionStore)
- auth_diff (set[str]): Set of event IDs that are in the auth difference.
+ clock
+ room_id: the room we are working in
+ event_ids: The events to sort
+ event_map
+ state_res_store
+ auth_diff: Set of event IDs that are in the auth difference.
Returns:
- Deferred[list[str]]: The sorted list
+ The sorted list
"""
- graph = {}
- for event_id in event_ids:
- yield _add_event_and_auth_chain_to_graph(
+ graph = {} # type: Dict[str, Set[str]]
+ for idx, event_id in enumerate(event_ids, start=1):
+ await _add_event_and_auth_chain_to_graph(
graph, room_id, event_id, event_map, state_res_store, auth_diff
)
+ # We await occasionally when we're working with large data sets to
+ # ensure that we don't block the reactor loop for too long.
+ if idx % _AWAIT_AFTER_ITERATIONS == 0:
+ await clock.sleep(0)
+
event_to_pl = {}
- for event_id in graph:
- pl = yield _get_power_level_for_sender(
+ for idx, event_id in enumerate(graph, start=1):
+ pl = await _get_power_level_for_sender(
room_id, event_id, event_map, state_res_store
)
event_to_pl[event_id] = pl
+ # We await occasionally when we're working with large data sets to
+ # ensure that we don't block the reactor loop for too long.
+ if idx % _AWAIT_AFTER_ITERATIONS == 0:
+ await clock.sleep(0)
+
def _get_power_order(event_id):
ev = event_map[event_id]
pl = event_to_pl[event_id]
@@ -360,33 +406,39 @@ def _reverse_topological_power_sort(
return sorted_events
-@defer.inlineCallbacks
-def _iterative_auth_checks(
- room_id, room_version, event_ids, base_state, event_map, state_res_store
-):
+async def _iterative_auth_checks(
+ clock: Clock,
+ room_id: str,
+ room_version: str,
+ event_ids: List[str],
+ base_state: StateMap[str],
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+) -> MutableStateMap[str]:
"""Sequentially apply auth checks to each event in given list, updating the
state as it goes along.
Args:
- room_id (str)
- room_version (str)
- event_ids (list[str]): Ordered list of events to apply auth checks to
- base_state (StateMap[str]): The set of state to start with
- event_map (dict[str,FrozenEvent])
- state_res_store (StateResolutionStore)
+ clock
+ room_id
+ room_version
+ event_ids: Ordered list of events to apply auth checks to
+ base_state: The set of state to start with
+ event_map
+ state_res_store
Returns:
- Deferred[StateMap[str]]: Returns the final updated state
+ Returns the final updated state
"""
- resolved_state = base_state.copy()
+ resolved_state = dict(base_state)
room_version_obj = KNOWN_ROOM_VERSIONS[room_version]
- for event_id in event_ids:
+ for idx, event_id in enumerate(event_ids, start=1):
event = event_map[event_id]
auth_events = {}
for aid in event.auth_event_ids():
- ev = yield _get_event(
+ ev = await _get_event(
room_id, aid, event_map, state_res_store, allow_none=True
)
@@ -401,7 +453,7 @@ def _iterative_auth_checks(
for key in event_auth.auth_types_for_event(event):
if key in resolved_state:
ev_id = resolved_state[key]
- ev = yield _get_event(room_id, ev_id, event_map, state_res_store)
+ ev = await _get_event(room_id, ev_id, event_map, state_res_store)
if ev.rejected_reason is None:
auth_events[key] = event_map[ev_id]
@@ -419,114 +471,173 @@ def _iterative_auth_checks(
except AuthError:
pass
+ # We await occasionally when we're working with large data sets to
+ # ensure that we don't block the reactor loop for too long.
+ if idx % _AWAIT_AFTER_ITERATIONS == 0:
+ await clock.sleep(0)
+
return resolved_state
-@defer.inlineCallbacks
-def _mainline_sort(
- room_id, event_ids, resolved_power_event_id, event_map, state_res_store
-):
+async def _mainline_sort(
+ clock: Clock,
+ room_id: str,
+ event_ids: List[str],
+ resolved_power_event_id: Optional[str],
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+) -> List[str]:
"""Returns a sorted list of event_ids sorted by mainline ordering based on
the given event resolved_power_event_id
Args:
- room_id (str): room we're working in
- event_ids (list[str]): Events to sort
- resolved_power_event_id (str): The final resolved power level event ID
- event_map (dict[str,FrozenEvent])
- state_res_store (StateResolutionStore)
+ clock
+ room_id: room we're working in
+ event_ids: Events to sort
+ resolved_power_event_id: The final resolved power level event ID
+ event_map
+ state_res_store
Returns:
- Deferred[list[str]]: The sorted list
+ The sorted list
"""
+ if not event_ids:
+ # It's possible for there to be no event IDs here to sort, so we can
+ # skip calculating the mainline in that case.
+ return []
+
mainline = []
pl = resolved_power_event_id
+ idx = 0
while pl:
mainline.append(pl)
- pl_ev = yield _get_event(room_id, pl, event_map, state_res_store)
+ pl_ev = await _get_event(room_id, pl, event_map, state_res_store)
auth_events = pl_ev.auth_event_ids()
pl = None
for aid in auth_events:
- ev = yield _get_event(
+ ev = await _get_event(
room_id, aid, event_map, state_res_store, allow_none=True
)
if ev and (ev.type, ev.state_key) == (EventTypes.PowerLevels, ""):
pl = aid
break
+ # We await occasionally when we're working with large data sets to
+ # ensure that we don't block the reactor loop for too long.
+ if idx != 0 and idx % _AWAIT_AFTER_ITERATIONS == 0:
+ await clock.sleep(0)
+
+ idx += 1
+
mainline_map = {ev_id: i + 1 for i, ev_id in enumerate(reversed(mainline))}
event_ids = list(event_ids)
order_map = {}
- for ev_id in event_ids:
- depth = yield _get_mainline_depth_for_event(
+ for idx, ev_id in enumerate(event_ids, start=1):
+ depth = await _get_mainline_depth_for_event(
event_map[ev_id], mainline_map, event_map, state_res_store
)
order_map[ev_id] = (depth, event_map[ev_id].origin_server_ts, ev_id)
+ # We await occasionally when we're working with large data sets to
+ # ensure that we don't block the reactor loop for too long.
+ if idx % _AWAIT_AFTER_ITERATIONS == 0:
+ await clock.sleep(0)
+
event_ids.sort(key=lambda ev_id: order_map[ev_id])
return event_ids
-@defer.inlineCallbacks
-def _get_mainline_depth_for_event(event, mainline_map, event_map, state_res_store):
+async def _get_mainline_depth_for_event(
+ event: EventBase,
+ mainline_map: Dict[str, int],
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+) -> int:
"""Get the mainline depths for the given event based on the mainline map
Args:
- event (FrozenEvent)
- mainline_map (dict[str, int]): Map from event_id to mainline depth for
- events in the mainline.
- event_map (dict[str,FrozenEvent])
- state_res_store (StateResolutionStore)
+ event
+ mainline_map: Map from event_id to mainline depth for events in the mainline.
+ event_map
+ state_res_store
Returns:
- Deferred[int]
+ The mainline depth
"""
room_id = event.room_id
+ tmp_event = event # type: Optional[EventBase]
# We do an iterative search, replacing `event with the power level in its
# auth events (if any)
- while event:
+ while tmp_event:
depth = mainline_map.get(event.event_id)
if depth is not None:
return depth
- auth_events = event.auth_event_ids()
- event = None
+ auth_events = tmp_event.auth_event_ids()
+ tmp_event = None
for aid in auth_events:
- aev = yield _get_event(
+ aev = await _get_event(
room_id, aid, event_map, state_res_store, allow_none=True
)
if aev and (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""):
- event = aev
+ tmp_event = aev
break
# Didn't find a power level auth event, so we just return 0
return 0
-@defer.inlineCallbacks
-def _get_event(room_id, event_id, event_map, state_res_store, allow_none=False):
+@overload
+async def _get_event(
+ room_id: str,
+ event_id: str,
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+ allow_none: Literal[False] = False,
+) -> EventBase:
+ ...
+
+
+@overload
+async def _get_event(
+ room_id: str,
+ event_id: str,
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+ allow_none: Literal[True],
+) -> Optional[EventBase]:
+ ...
+
+
+async def _get_event(
+ room_id: str,
+ event_id: str,
+ event_map: Dict[str, EventBase],
+ state_res_store: "synapse.state.StateResolutionStore",
+ allow_none: bool = False,
+) -> Optional[EventBase]:
"""Helper function to look up event in event_map, falling back to looking
it up in the store
Args:
- room_id (str)
- event_id (str)
- event_map (dict[str,FrozenEvent])
- state_res_store (StateResolutionStore)
- allow_none (bool): if the event is not found, return None rather than raising
+ room_id
+ event_id
+ event_map
+ state_res_store
+ allow_none: if the event is not found, return None rather than raising
an exception
Returns:
- Deferred[Optional[FrozenEvent]]
+ The event, or none if the event does not exist (and allow_none is True).
"""
if event_id not in event_map:
- events = yield state_res_store.get_events([event_id], allow_rejected=True)
+ events = await state_res_store.get_events([event_id], allow_rejected=True)
event_map.update(events)
event = event_map.get(event_id)
@@ -543,7 +654,9 @@ def _get_event(room_id, event_id, event_map, state_res_store, allow_none=False):
return event
-def lexicographical_topological_sort(graph, key):
+def lexicographical_topological_sort(
+ graph: Dict[str, Set[str]], key: Callable[[str], Any]
+) -> Generator[str, None, None]:
"""Performs a lexicographic reverse topological sort on the graph.
This returns a reverse topological sort (i.e. if node A references B then B
@@ -553,26 +666,26 @@ def lexicographical_topological_sort(graph, key):
NOTE: `graph` is modified during the sort.
Args:
- graph (dict[str, set[str]]): A representation of the graph where each
- node is a key in the dict and its value are the nodes edges.
- key (func): A function that takes a node and returns a value that is
- comparable and used to order nodes
+ graph: A representation of the graph where each node is a key in the
+ dict and its value are the nodes edges.
+ key: A function that takes a node and returns a value that is comparable
+ and used to order nodes
Yields:
- str: The next node in the topological sort
+ The next node in the topological sort
"""
# Note, this is basically Kahn's algorithm except we look at nodes with no
# outgoing edges, c.f.
# https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
outdegree_map = graph
- reverse_graph = {}
+ reverse_graph = {} # type: Dict[str, Set[str]]
# Lists of nodes with zero out degree. Is actually a tuple of
# `(key(node), node)` so that sorting does the right thing
zero_outdegree = []
- for node, edges in iteritems(graph):
+ for node, edges in graph.items():
if len(edges) == 0:
zero_outdegree.append((key(node), node))
|