diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index b22495c1f9..836e137296 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -19,13 +19,14 @@ from collections import namedtuple
from six import iteritems, itervalues
+import attr
from frozendict import frozendict
from twisted.internet import defer
from synapse.api.constants import EventTypes, RoomVersions
from synapse.events.snapshot import EventContext
-from synapse.state import v1
+from synapse.state import v1, v2
from synapse.util.async_helpers import Linearizer
from synapse.util.caches import get_cache_factor_for
from synapse.util.caches.expiringcache import ExpiringCache
@@ -372,15 +373,10 @@ class StateHandler(object):
result = yield self._state_resolution_handler.resolve_state_groups(
room_id, room_version, state_groups_ids, None,
- self._state_map_factory,
+ state_res_store=StateResolutionStore(self.store),
)
defer.returnValue(result)
- def _state_map_factory(self, ev_ids):
- return self.store.get_events(
- ev_ids, get_prev_content=False, check_redacted=False,
- )
-
@defer.inlineCallbacks
def resolve_events(self, room_version, state_sets, event):
logger.info(
@@ -401,7 +397,7 @@ class StateHandler(object):
new_state = yield resolve_events_with_factory(
room_version, state_set_ids,
event_map=state_map,
- state_map_factory=self._state_map_factory
+ state_res_store=StateResolutionStore(self.store),
)
new_state = {
@@ -436,7 +432,7 @@ class StateResolutionHandler(object):
@defer.inlineCallbacks
@log_function
def resolve_state_groups(
- self, room_id, room_version, state_groups_ids, event_map, state_map_factory,
+ self, room_id, room_version, state_groups_ids, event_map, state_res_store,
):
"""Resolves conflicts between a set of state groups
@@ -454,9 +450,11 @@ class StateResolutionHandler(object):
a dict from event_id to event, for any events that we happen to
have in flight (eg, those currently being persisted). This will be
used as a starting point fof finding the state we need; any missing
- events will be requested via state_map_factory.
+ events will be requested via state_res_store.
+
+ If None, all events will be fetched via state_res_store.
- If None, all events will be fetched via state_map_factory.
+ state_res_store (StateResolutionStore)
Returns:
Deferred[_StateCacheEntry]: resolved state
@@ -502,7 +500,7 @@ class StateResolutionHandler(object):
room_version,
list(itervalues(state_groups_ids)),
event_map=event_map,
- state_map_factory=state_map_factory,
+ state_res_store=state_res_store,
)
# if the new state matches any of the input state groups, we can
@@ -583,7 +581,7 @@ def _make_state_cache_entry(
)
-def resolve_events_with_factory(room_version, state_sets, event_map, state_map_factory):
+def resolve_events_with_factory(room_version, state_sets, event_map, state_res_store):
"""
Args:
room_version(str): Version of the room
@@ -599,17 +597,19 @@ def resolve_events_with_factory(room_version, state_sets, event_map, state_map_f
If None, all events will be fetched via state_map_factory.
- state_map_factory(func): will be called
- with a list of event_ids that are needed, and should return with
- a Deferred of dict of event_id to event.
+ state_res_store (StateResolutionStore)
Returns
Deferred[dict[(str, str), str]]:
a map from (type, state_key) to event_id.
"""
- if room_version in (RoomVersions.V1, RoomVersions.VDH_TEST,):
+ if room_version == RoomVersions.V1:
return v1.resolve_events_with_factory(
- state_sets, event_map, state_map_factory,
+ state_sets, event_map, state_res_store.get_events,
+ )
+ elif room_version == RoomVersions.VDH_TEST:
+ return v2.resolve_events_with_factory(
+ state_sets, event_map, state_res_store,
)
else:
# This should only happen if we added a version but forgot to add it to
@@ -617,3 +617,54 @@ def resolve_events_with_factory(room_version, state_sets, event_map, state_map_f
raise Exception(
"No state resolution algorithm defined for version %r" % (room_version,)
)
+
+
+@attr.s
+class StateResolutionStore(object):
+ """Interface that allows state resolution algorithms to access the database
+ in well defined way.
+
+ Args:
+ store (DataStore)
+ """
+
+ store = attr.ib()
+
+ def get_events(self, event_ids, allow_rejected=False):
+ """Get events from the database
+
+ Args:
+ event_ids (list): The event_ids of the events to fetch
+ allow_rejected (bool): If True return rejected events.
+
+ Returns:
+ Deferred[dict[str, FrozenEvent]]: Dict from event_id to event.
+ """
+
+ return self.store.get_events(
+ event_ids,
+ check_redacted=False,
+ get_prev_content=False,
+ allow_rejected=allow_rejected,
+ )
+
+ def get_auth_chain(self, event_ids):
+ """Gets the full auth chain for a set of events (including rejected
+ events).
+
+ Includes the given event IDs in the result.
+
+ Note that:
+ 1. All events must be state events.
+ 2. For v1 rooms this may not have the full auth chain in the
+ presence of rejected events
+
+ Args:
+ event_ids (list): The event IDs of the events to fetch the auth
+ chain for. Must be state events.
+
+ Returns:
+ Deferred[list[str]]: List of event IDs of the auth chain.
+ """
+
+ return self.store.get_auth_chain_ids(event_ids, include_given=True)
diff --git a/synapse/state/v2.py b/synapse/state/v2.py
new file mode 100644
index 0000000000..d034b4f38e
--- /dev/null
+++ b/synapse/state/v2.py
@@ -0,0 +1,544 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import heapq
+import itertools
+import logging
+
+from six import iteritems, itervalues
+
+from twisted.internet import defer
+
+from synapse import event_auth
+from synapse.api.constants import EventTypes
+from synapse.api.errors import AuthError
+
+logger = logging.getLogger(__name__)
+
+
+@defer.inlineCallbacks
+def resolve_events_with_factory(state_sets, event_map, state_res_store):
+ """Resolves the state using the v2 state resolution algorithm
+
+ Args:
+ state_sets(list): List of dicts of (type, state_key) -> event_id,
+ which are the different state groups to resolve.
+
+ event_map(dict[str,FrozenEvent]|None):
+ a dict from event_id to event, for any events that we happen to
+ have in flight (eg, those currently being persisted). This will be
+ used as a starting point fof finding the state we need; any missing
+ events will be requested via state_res_store.
+
+ If None, all events will be fetched via state_res_store.
+
+ state_res_store (StateResolutionStore)
+
+ Returns
+ Deferred[dict[(str, str), str]]:
+ a map from (type, state_key) to event_id.
+ """
+
+ logger.debug("Computing conflicted state")
+
+ # First split up the un/conflicted state
+ unconflicted_state, conflicted_state = _seperate(state_sets)
+
+ if not conflicted_state:
+ defer.returnValue(unconflicted_state)
+
+ logger.debug("%d conflicted state entries", len(conflicted_state))
+ logger.debug("Calculating auth chain difference")
+
+ # Also fetch all auth events that appear in only some of the state sets'
+ # auth chains.
+ auth_diff = yield _get_auth_chain_difference(
+ state_sets, event_map, state_res_store,
+ )
+
+ full_conflicted_set = set(itertools.chain(
+ itertools.chain.from_iterable(itervalues(conflicted_state)),
+ auth_diff,
+ ))
+
+ events = yield state_res_store.get_events([
+ eid for eid in full_conflicted_set
+ if eid not in event_map
+ ], allow_rejected=True)
+ event_map.update(events)
+
+ full_conflicted_set = set(eid for eid in full_conflicted_set if eid in event_map)
+
+ logger.debug("%d full_conflicted_set entries", len(full_conflicted_set))
+
+ # Get and sort all the power events (kicks/bans/etc)
+ power_events = (
+ eid for eid in full_conflicted_set
+ if _is_power_event(event_map[eid])
+ )
+
+ sorted_power_events = yield _reverse_topological_power_sort(
+ power_events,
+ event_map,
+ state_res_store,
+ full_conflicted_set,
+ )
+
+ logger.debug("sorted %d power events", len(sorted_power_events))
+
+ # Now sequentially auth each one
+ resolved_state = yield _iterative_auth_checks(
+ sorted_power_events, unconflicted_state, event_map,
+ state_res_store,
+ )
+
+ logger.debug("resolved power events")
+
+ # OK, so we've now resolved the power events. Now sort the remaining
+ # events using the mainline of the resolved power level.
+
+ leftover_events = [
+ ev_id
+ for ev_id in full_conflicted_set
+ if ev_id not in sorted_power_events
+ ]
+
+ logger.debug("sorting %d remaining events", len(leftover_events))
+
+ pl = resolved_state.get((EventTypes.PowerLevels, ""), None)
+ leftover_events = yield _mainline_sort(
+ leftover_events, pl, event_map, state_res_store,
+ )
+
+ logger.debug("resolving remaining events")
+
+ resolved_state = yield _iterative_auth_checks(
+ leftover_events, resolved_state, event_map,
+ state_res_store,
+ )
+
+ logger.debug("resolved")
+
+ # We make sure that unconflicted state always still applies.
+ resolved_state.update(unconflicted_state)
+
+ logger.debug("done")
+
+ defer.returnValue(resolved_state)
+
+
+@defer.inlineCallbacks
+def _get_power_level_for_sender(event_id, event_map, state_res_store):
+ """Return the power level of the sender of the given event according to
+ their auth events.
+
+ Args:
+ event_id (str)
+ event_map (dict[str,FrozenEvent])
+ state_res_store (StateResolutionStore)
+
+ Returns:
+ Deferred[int]
+ """
+ event = yield _get_event(event_id, event_map, state_res_store)
+
+ pl = None
+ for aid, _ in event.auth_events:
+ aev = yield _get_event(aid, event_map, state_res_store)
+ if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""):
+ pl = aev
+ break
+
+ if pl is None:
+ # Couldn't find power level. Check if they're the creator of the room
+ for aid, _ in event.auth_events:
+ aev = yield _get_event(aid, event_map, state_res_store)
+ if (aev.type, aev.state_key) == (EventTypes.Create, ""):
+ if aev.content.get("creator") == event.sender:
+ defer.returnValue(100)
+ break
+ defer.returnValue(0)
+
+ level = pl.content.get("users", {}).get(event.sender)
+ if level is None:
+ level = pl.content.get("users_default", 0)
+
+ if level is None:
+ defer.returnValue(0)
+ else:
+ defer.returnValue(int(level))
+
+
+@defer.inlineCallbacks
+def _get_auth_chain_difference(state_sets, event_map, state_res_store):
+ """Compare the auth chains of each state set and return the set of events
+ that only appear in some but not all of the auth chains.
+
+ Args:
+ state_sets (list)
+ event_map (dict[str,FrozenEvent])
+ state_res_store (StateResolutionStore)
+
+ Returns:
+ Deferred[set[str]]: Set of event IDs
+ """
+ common = set(itervalues(state_sets[0])).intersection(
+ *(itervalues(s) for s in state_sets[1:])
+ )
+
+ auth_sets = []
+ for state_set in state_sets:
+ auth_ids = set(
+ eid
+ for key, eid in iteritems(state_set)
+ if (key[0] in (
+ EventTypes.Member,
+ EventTypes.ThirdPartyInvite,
+ ) or key in (
+ (EventTypes.PowerLevels, ''),
+ (EventTypes.Create, ''),
+ (EventTypes.JoinRules, ''),
+ )) and eid not in common
+ )
+
+ auth_chain = yield state_res_store.get_auth_chain(auth_ids)
+ auth_ids.update(auth_chain)
+
+ auth_sets.append(auth_ids)
+
+ intersection = set(auth_sets[0]).intersection(*auth_sets[1:])
+ union = set().union(*auth_sets)
+
+ defer.returnValue(union - intersection)
+
+
+def _seperate(state_sets):
+ """Return the unconflicted and conflicted state. This is different than in
+ the original algorithm, as this defines a key to be conflicted if one of
+ the state sets doesn't have that key.
+
+ Args:
+ state_sets (list)
+
+ Returns:
+ tuple[dict, dict]: A tuple of unconflicted and conflicted state. The
+ conflicted state dict is a map from type/state_key to set of event IDs
+ """
+ unconflicted_state = {}
+ conflicted_state = {}
+
+ for key in set(itertools.chain.from_iterable(state_sets)):
+ event_ids = set(state_set.get(key) for state_set in state_sets)
+ if len(event_ids) == 1:
+ unconflicted_state[key] = event_ids.pop()
+ else:
+ event_ids.discard(None)
+ conflicted_state[key] = event_ids
+
+ return unconflicted_state, conflicted_state
+
+
+def _is_power_event(event):
+ """Return whether or not the event is a "power event", as defined by the
+ v2 state resolution algorithm
+
+ Args:
+ event (FrozenEvent)
+
+ Returns:
+ boolean
+ """
+ if (event.type, event.state_key) in (
+ (EventTypes.PowerLevels, ""),
+ (EventTypes.JoinRules, ""),
+ (EventTypes.Create, ""),
+ ):
+ return True
+
+ if event.type == EventTypes.Member:
+ if event.membership in ('leave', 'ban'):
+ return event.sender != event.state_key
+
+ return False
+
+
+@defer.inlineCallbacks
+def _add_event_and_auth_chain_to_graph(graph, event_id, event_map,
+ state_res_store, auth_diff):
+ """Helper function for _reverse_topological_power_sort that add the event
+ and its auth chain (that is in the auth diff) to the graph
+
+ Args:
+ graph (dict[str, set[str]]): A map from event ID to the events auth
+ event IDs
+ event_id (str): Event to add to the graph
+ event_map (dict[str,FrozenEvent])
+ state_res_store (StateResolutionStore)
+ auth_diff (set[str]): Set of event IDs that are in the auth difference.
+ """
+
+ state = [event_id]
+ while state:
+ eid = state.pop()
+ graph.setdefault(eid, set())
+
+ event = yield _get_event(eid, event_map, state_res_store)
+ for aid, _ in event.auth_events:
+ if aid in auth_diff:
+ if aid not in graph:
+ state.append(aid)
+
+ graph.setdefault(eid, set()).add(aid)
+
+
+@defer.inlineCallbacks
+def _reverse_topological_power_sort(event_ids, event_map, state_res_store, auth_diff):
+ """Returns a list of the event_ids sorted by reverse topological ordering,
+ and then by power level and origin_server_ts
+
+ Args:
+ event_ids (list[str]): The events to sort
+ event_map (dict[str,FrozenEvent])
+ state_res_store (StateResolutionStore)
+ auth_diff (set[str]): Set of event IDs that are in the auth difference.
+
+ Returns:
+ Deferred[list[str]]: The sorted list
+ """
+
+ graph = {}
+ for event_id in event_ids:
+ yield _add_event_and_auth_chain_to_graph(
+ graph, event_id, event_map, state_res_store, auth_diff,
+ )
+
+ event_to_pl = {}
+ for event_id in graph:
+ pl = yield _get_power_level_for_sender(event_id, event_map, state_res_store)
+ event_to_pl[event_id] = pl
+
+ def _get_power_order(event_id):
+ ev = event_map[event_id]
+ pl = event_to_pl[event_id]
+
+ return -pl, ev.origin_server_ts, event_id
+
+ # Note: graph is modified during the sort
+ it = lexicographical_topological_sort(
+ graph,
+ key=_get_power_order,
+ )
+ sorted_events = list(it)
+
+ defer.returnValue(sorted_events)
+
+
+@defer.inlineCallbacks
+def _iterative_auth_checks(event_ids, base_state, event_map, state_res_store):
+ """Sequentially apply auth checks to each event in given list, updating the
+ state as it goes along.
+
+ Args:
+ event_ids (list[str]): Ordered list of events to apply auth checks to
+ base_state (dict[tuple[str, str], str]): The set of state to start with
+ event_map (dict[str,FrozenEvent])
+ state_res_store (StateResolutionStore)
+
+ Returns:
+ Deferred[dict[tuple[str, str], str]]: Returns the final updated state
+ """
+ resolved_state = base_state.copy()
+
+ for event_id in event_ids:
+ event = event_map[event_id]
+
+ auth_events = {}
+ for aid, _ in event.auth_events:
+ ev = yield _get_event(aid, event_map, state_res_store)
+
+ if ev.rejected_reason is None:
+ auth_events[(ev.type, ev.state_key)] = ev
+
+ for key in event_auth.auth_types_for_event(event):
+ if key in resolved_state:
+ ev_id = resolved_state[key]
+ ev = yield _get_event(ev_id, event_map, state_res_store)
+
+ if ev.rejected_reason is None:
+ auth_events[key] = event_map[ev_id]
+
+ try:
+ event_auth.check(
+ event, auth_events,
+ do_sig_check=False,
+ do_size_check=False
+ )
+
+ resolved_state[(event.type, event.state_key)] = event_id
+ except AuthError:
+ pass
+
+ defer.returnValue(resolved_state)
+
+
+@defer.inlineCallbacks
+def _mainline_sort(event_ids, resolved_power_event_id, event_map,
+ state_res_store):
+ """Returns a sorted list of event_ids sorted by mainline ordering based on
+ the given event resolved_power_event_id
+
+ Args:
+ event_ids (list[str]): Events to sort
+ resolved_power_event_id (str): The final resolved power level event ID
+ event_map (dict[str,FrozenEvent])
+ state_res_store (StateResolutionStore)
+
+ Returns:
+ Deferred[list[str]]: The sorted list
+ """
+ mainline = []
+ pl = resolved_power_event_id
+ while pl:
+ mainline.append(pl)
+ pl_ev = yield _get_event(pl, event_map, state_res_store)
+ auth_events = pl_ev.auth_events
+ pl = None
+ for aid, _ in auth_events:
+ ev = yield _get_event(aid, event_map, state_res_store)
+ if (ev.type, ev.state_key) == (EventTypes.PowerLevels, ""):
+ pl = aid
+ break
+
+ mainline_map = {ev_id: i + 1 for i, ev_id in enumerate(reversed(mainline))}
+
+ event_ids = list(event_ids)
+
+ order_map = {}
+ for ev_id in event_ids:
+ depth = yield _get_mainline_depth_for_event(
+ event_map[ev_id], mainline_map,
+ event_map, state_res_store,
+ )
+ order_map[ev_id] = (depth, event_map[ev_id].origin_server_ts, ev_id)
+
+ event_ids.sort(key=lambda ev_id: order_map[ev_id])
+
+ defer.returnValue(event_ids)
+
+
+@defer.inlineCallbacks
+def _get_mainline_depth_for_event(event, mainline_map, event_map, state_res_store):
+ """Get the mainline depths for the given event based on the mainline map
+
+ Args:
+ event (FrozenEvent)
+ mainline_map (dict[str, int]): Map from event_id to mainline depth for
+ events in the mainline.
+ event_map (dict[str,FrozenEvent])
+ state_res_store (StateResolutionStore)
+
+ Returns:
+ Deferred[int]
+ """
+
+ # We do an iterative search, replacing `event with the power level in its
+ # auth events (if any)
+ while event:
+ depth = mainline_map.get(event.event_id)
+ if depth is not None:
+ defer.returnValue(depth)
+
+ auth_events = event.auth_events
+ event = None
+
+ for aid, _ in auth_events:
+ aev = yield _get_event(aid, event_map, state_res_store)
+ if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""):
+ event = aev
+ break
+
+ # Didn't find a power level auth event, so we just return 0
+ defer.returnValue(0)
+
+
+@defer.inlineCallbacks
+def _get_event(event_id, event_map, state_res_store):
+ """Helper function to look up event in event_map, falling back to looking
+ it up in the store
+
+ Args:
+ event_id (str)
+ event_map (dict[str,FrozenEvent])
+ state_res_store (StateResolutionStore)
+
+ Returns:
+ Deferred[FrozenEvent]
+ """
+ if event_id not in event_map:
+ events = yield state_res_store.get_events([event_id], allow_rejected=True)
+ event_map.update(events)
+ defer.returnValue(event_map[event_id])
+
+
+def lexicographical_topological_sort(graph, key):
+ """Performs a lexicographic reverse topological sort on the graph.
+
+ This returns a reverse topological sort (i.e. if node A references B then B
+ appears before A in the sort), with ties broken lexicographically based on
+ return value of the `key` function.
+
+ NOTE: `graph` is modified during the sort.
+
+ Args:
+ graph (dict[str, set[str]]): A representation of the graph where each
+ node is a key in the dict and its value are the nodes edges.
+ key (func): A function that takes a node and returns a value that is
+ comparable and used to order nodes
+
+ Yields:
+ str: The next node in the topological sort
+ """
+
+ # Note, this is basically Kahn's algorithm except we look at nodes with no
+ # outgoing edges, c.f.
+ # https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
+ outdegree_map = graph
+ reverse_graph = {}
+
+ # Lists of nodes with zero out degree. Is actually a tuple of
+ # `(key(node), node)` so that sorting does the right thing
+ zero_outdegree = []
+
+ for node, edges in iteritems(graph):
+ if len(edges) == 0:
+ zero_outdegree.append((key(node), node))
+
+ reverse_graph.setdefault(node, set())
+ for edge in edges:
+ reverse_graph.setdefault(edge, set()).add(node)
+
+ # heapq is a built in implementation of a sorted queue.
+ heapq.heapify(zero_outdegree)
+
+ while zero_outdegree:
+ _, node = heapq.heappop(zero_outdegree)
+
+ for parent in reverse_graph[node]:
+ out = outdegree_map[parent]
+ out.discard(node)
+ if len(out) == 0:
+ heapq.heappush(zero_outdegree, (key(parent), parent))
+
+ yield node
|