Merge branch 'develop' into request_logging

Conflicts: setup.py synapse/storage/_base.py synapse/util/async.py
author: Mark Haines <mark.haines@matrix.org> 2014-11-14 11:16:50 +0000
committer: Mark Haines <mark.haines@matrix.org> 2014-11-14 11:16:50 +0000
commit: e903c941cb1bed18026f00ed1d3495a8d172f13a (patch)
tree: 894da7441d913361b70da4cc13cd73ead86d2e67 /synapse/state.py
parent: Remove unused 'context' variables to appease pyflakes (diff)
parent: Add notification-service unit tests. (diff)
download: synapse-e903c941cb1bed18026f00ed1d3495a8d172f13a.tar.xz
1 files changed, 159 insertions, 183 deletions
diff --git a/synapse/state.py b/synapse/state.py
index 9db84c9b5c..1c999e4d79 100644
--- a/synapse/state.py
+++ b/synapse/state.py
@@ -16,11 +16,13 @@
 
 from twisted.internet import defer
 
-from synapse.federation.pdu_codec import encode_event_id, decode_event_id
 from synapse.util.logutils import log_function
+from synapse.util.async import run_on_reactor
+from synapse.api.events.room import RoomPowerLevelsEvent
 
 from collections import namedtuple
 
+import copy
 import logging
 import hashlib
 
@@ -35,230 +37,204 @@ KeyStateTuple = namedtuple("KeyStateTuple", ("context", "type", "state_key"))
 
 
 class StateHandler(object):
-    """ Repsonsible for doing state conflict resolution.
+    """ Responsible for doing state conflict resolution.
     """
 
     def __init__(self, hs):
         self.store = hs.get_datastore()
-        self._replication = hs.get_replication_layer()
-        self.server_name = hs.hostname
 
     @defer.inlineCallbacks
     @log_function
-    def handle_new_event(self, event, snapshot):
-        """ Given an event this works out if a) we have sufficient power level
-        to update the state and b) works out what the prev_state should be.
-
-        Returns:
-            Deferred: Resolved with a boolean indicating if we succesfully
-            updated the state.
-
-        Raised:
-            AuthError
+    def annotate_event_with_state(self, event, old_state=None):
+        """ Annotates the event with the current state events as of that event.
+
+        This method adds three new attributes to the event:
+            * `state_events`: The state up to and including the event. Encoded
+              as a dict mapping tuple (type, state_key) -> event.
+            * `old_state_events`: The state up to, but excluding, the event.
+              Encoded similarly as `state_events`.
+            * `state_group`: If there is an existing state group that can be
+              used, then return that. Otherwise return `None`. See state
+              storage for more information.
+
+        If the argument `old_state` is given (in the form of a list of
+        events), then they are used as a the values for `old_state_events` and
+        the value for `state_events` is generated from it. `state_group` is
+        set to None.
+
+        This needs to be called before persisting the event.
         """
-        # This needs to be done in a transaction.
-
-        if not hasattr(event, "state_key"):
-            return
+        yield run_on_reactor()
 
-        key = KeyStateTuple(
-            event.room_id,
-            event.type,
-            _get_state_key_from_event(event)
-        )
+        if old_state:
+            event.state_group = None
+            event.old_state_events = {
+                (s.type, s.state_key): s for s in old_state
+            }
+            event.state_events = event.old_state_events
 
-        # Now I need to fill out the prev state and work out if it has auth
-        # (w.r.t. to power levels)
+            if hasattr(event, "state_key"):
+                event.state_events[(event.type, event.state_key)] = event
 
-        snapshot.fill_out_prev_events(event)
+            defer.returnValue(False)
+            return
 
-        event.prev_events = [
-            e for e in event.prev_events if e != event.event_id
-        ]
+        if hasattr(event, "outlier") and event.outlier:
+            event.state_group = None
+            event.old_state_events = None
+            event.state_events = {}
+            defer.returnValue(False)
+            return
 
-        current_state = snapshot.prev_state_pdu
+        ids = [e for e, _ in event.prev_events]
 
-        if current_state:
-            event.prev_state = encode_event_id(
-                current_state.pdu_id, current_state.origin
-            )
+        ret = yield self.resolve_state_groups(ids)
+        state_group, new_state = ret
 
-        # TODO check current_state to see if the min power level is less
-        # than the power level of the user
-        # power_level = self._get_power_level_for_event(event)
+        event.old_state_events = copy.deepcopy(new_state)
 
-        pdu_id, origin = decode_event_id(event.event_id, self.server_name)
+        if hasattr(event, "state_key"):
+            key = (event.type, event.state_key)
+            if key in new_state:
+                event.replaces_state = new_state[key].event_id
+            new_state[key] = event
+        elif state_group:
+            event.state_group = state_group
+            event.state_events = new_state
+            defer.returnValue(False)
 
-        yield self.store.update_current_state(
-            pdu_id=pdu_id,
-            origin=origin,
-            context=key.context,
-            pdu_type=key.type,
-            state_key=key.state_key
-        )
+        event.state_group = None
+        event.state_events = new_state
 
-        defer.returnValue(True)
+        defer.returnValue(hasattr(event, "state_key"))
 
     @defer.inlineCallbacks
-    @log_function
-    def handle_new_state(self, new_pdu):
-        """ Apply conflict resolution to `new_pdu`.
+    def get_current_state(self, room_id, event_type=None, state_key=""):
+        """ Returns the current state for the room as a list. This is done by
+        calling `get_latest_events_in_room` to get the leading edges of the
+        event graph and then resolving any of the state conflicts.
 
-        This should be called on every new state pdu, regardless of whether or
-        not there is a conflict.
+        This is equivalent to getting the state of an event that were to send
+        next before receiving any new events.
 
-        This function is safe against the race of it getting called with two
-        `PDU`s trying to update the same state.
+        If `event_type` is specified, then the method returns only the one
+        event (or None) with that `event_type` and `state_key`.
         """
+        events = yield self.store.get_latest_events_in_room(room_id)
 
-        # This needs to be done in a transaction.
-
-        is_new = yield self._handle_new_state(new_pdu)
-
-        logger.debug("is_new: %s %s %s", is_new, new_pdu.pdu_id, new_pdu.origin)
+        event_ids = [
+            e_id
+            for e_id, _, _ in events
+        ]
 
-        if is_new:
-            yield self.store.update_current_state(
-                pdu_id=new_pdu.pdu_id,
-                origin=new_pdu.origin,
-                context=new_pdu.context,
-                pdu_type=new_pdu.pdu_type,
-                state_key=new_pdu.state_key
-            )
+        res = yield self.resolve_state_groups(event_ids)
 
-        defer.returnValue(is_new)
+        if event_type:
+            defer.returnValue(res[1].get((event_type, state_key)))
+            return
 
-    def _get_power_level_for_event(self, event):
-        # return self._persistence.get_power_level_for_user(event.room_id,
-            # event.sender)
-        return event.power_level
+        defer.returnValue(res[1].values())
 
     @defer.inlineCallbacks
     @log_function
-    def _handle_new_state(self, new_pdu):
-        tree, missing_branch = yield self.store.get_unresolved_state_tree(
-            new_pdu
-        )
-        new_branch, current_branch = tree
+    def resolve_state_groups(self, event_ids):
+        """ Given a list of event_ids this method fetches the state at each
+        event, resolves conflicts between them and returns them.
 
-        logger.debug(
-            "_handle_new_state new=%s, current=%s",
-            new_branch, current_branch
+        Return format is a tuple: (`state_group`, `state_events`), where the
+        first is the name of a state group if one and only one is involved,
+        otherwise `None`.
+        """
+        state_groups = yield self.store.get_state_groups(
+            event_ids
         )
 
-        if missing_branch is not None:
-            # We're missing some PDUs. Fetch them.
-            # TODO (erikj): Limit this.
-            missing_prev = tree[missing_branch][-1]
-
-            pdu_id = missing_prev.prev_state_id
-            origin = missing_prev.prev_state_origin
-
-            is_missing = yield self.store.get_pdu(pdu_id, origin) is None
-            if not is_missing:
-                raise Exception("Conflict resolution failed")
-
-            yield self._replication.get_pdu(
-                destination=missing_prev.origin,
-                pdu_origin=origin,
-                pdu_id=pdu_id,
-                outlier=True
-            )
-
-            updated_current = yield self._handle_new_state(new_pdu)
-            defer.returnValue(updated_current)
-
-        if not current_branch:
-            # There is no current state
-            defer.returnValue(True)
-            return
-
-        n = new_branch[-1]
-        c = current_branch[-1]
-
-        common_ancestor = n.pdu_id == c.pdu_id and n.origin == c.origin
-
-        if common_ancestor:
-            # We found a common ancestor!
-
-            if len(current_branch) == 1:
-                # This is a direct clobber so we can just...
-                defer.returnValue(True)
+        group_names = set(state_groups.keys())
+        if len(group_names) == 1:
+            name, state_list = state_groups.items().pop()
+            state = {
+                (e.type, e.state_key): e
+                for e in state_list
+            }
+            defer.returnValue((name, state))
+
+        state = {}
+        for group, g_state in state_groups.items():
+            for s in g_state:
+                state.setdefault(
+                    (s.type, s.state_key),
+                    {}
+                )[s.event_id] = s
+
+        unconflicted_state = {
+            k: v.values()[0] for k, v in state.items()
+            if len(v.values()) == 1
+        }
+
+        conflicted_state = {
+            k: v.values()
+            for k, v in state.items()
+            if len(v.values()) > 1
+        }
+
+        try:
+            new_state = {}
+            new_state.update(unconflicted_state)
+            for key, events in conflicted_state.items():
+                new_state[key] = self._resolve_state_events(events)
+        except:
+            logger.exception("Failed to resolve state")
+            raise
+
+        defer.returnValue((None, new_state))
+
+    def _get_power_level_from_event_state(self, event, user_id):
+        if hasattr(event, "old_state_events") and event.old_state_events:
+            key = (RoomPowerLevelsEvent.TYPE, "", )
+            power_level_event = event.old_state_events.get(key)
+            level = None
+            if power_level_event:
+                level = power_level_event.content.get("users", {}).get(
+                    user_id
+                )
+                if not level:
+                    level = power_level_event.content.get("users_default", 0)
 
+            return level
         else:
-            # We didn't find a common ancestor. This is probably fine.
-            pass
+            return 0
 
-        result = yield self._do_conflict_res(
-            new_branch, current_branch, common_ancestor
-        )
-        defer.returnValue(result)
+    @log_function
+    def _resolve_state_events(self, events):
+        curr_events = events
 
-    @defer.inlineCallbacks
-    def _do_conflict_res(self, new_branch, current_branch, common_ancestor):
-        conflict_res = [
-            self._do_power_level_conflict_res,
-            self._do_chain_length_conflict_res,
-            self._do_hash_conflict_res,
+        new_powers = [
+            self._get_power_level_from_event_state(e, e.user_id)
+            for e in curr_events
         ]
 
-        for algo in conflict_res:
-            new_res, curr_res = yield defer.maybeDeferred(
-                algo,
-                new_branch, current_branch, common_ancestor
-            )
-
-            if new_res < curr_res:
-                defer.returnValue(False)
-            elif new_res > curr_res:
-                defer.returnValue(True)
-
-        raise Exception("Conflict resolution failed.")
-
-    @defer.inlineCallbacks
-    def _do_power_level_conflict_res(self, new_branch, current_branch,
-                                     common_ancestor):
-        new_powers_deferreds = []
-        for e in new_branch[:-1] if common_ancestor else new_branch:
-            if hasattr(e, "user_id"):
-                new_powers_deferreds.append(
-                    self.store.get_power_level(e.context, e.user_id)
-                )
-
-        current_powers_deferreds = []
-        for e in current_branch[:-1] if common_ancestor else current_branch:
-            if hasattr(e, "user_id"):
-                current_powers_deferreds.append(
-                    self.store.get_power_level(e.context, e.user_id)
-                )
-
-        new_powers = yield defer.gatherResults(
-            new_powers_deferreds,
-            consumeErrors=True
-        )
-
-        current_powers = yield defer.gatherResults(
-            current_powers_deferreds,
-            consumeErrors=True
-        )
+        new_powers = [
+            int(p) if p else 0 for p in new_powers
+        ]
 
-        max_power_new = max(new_powers)
-        max_power_current = max(current_powers)
+        max_power = max(new_powers)
 
-        defer.returnValue(
-            (max_power_new, max_power_current)
-        )
-
-    def _do_chain_length_conflict_res(self, new_branch, current_branch,
-                                      common_ancestor):
-        return (len(new_branch), len(current_branch))
+        curr_events = [
+            z[0] for z in zip(curr_events, new_powers)
+            if z[1] == max_power
+        ]
 
-    def _do_hash_conflict_res(self, new_branch, current_branch,
-                              common_ancestor):
-        new_str = "".join([p.pdu_id + p.origin for p in new_branch])
-        c_str = "".join([p.pdu_id + p.origin for p in current_branch])
+        if not curr_events:
+            raise RuntimeError("Max didn't get a max?")
+        elif len(curr_events) == 1:
+            return curr_events[0]
 
+        # TODO: For now, just choose the one with the largest event_id.
         return (
-            hashlib.sha1(new_str).hexdigest(),
-            hashlib.sha1(c_str).hexdigest()
+            sorted(
+                curr_events,
+                key=lambda e: hashlib.sha1(
+                    e.event_id + e.user_id + e.room_id + e.type
+                ).hexdigest()
+            )[0]
         )
author	Mark Haines <mark.haines@matrix.org>	2014-11-14 11:16:50 +0000
committer	Mark Haines <mark.haines@matrix.org>	2014-11-14 11:16:50 +0000
commit	e903c941cb1bed18026f00ed1d3495a8d172f13a (patch)
tree	894da7441d913361b70da4cc13cd73ead86d2e67 /synapse/state.py
parent	Remove unused 'context' variables to appease pyflakes (diff)
parent	Add notification-service unit tests. (diff)
download	synapse-e903c941cb1bed18026f00ed1d3495a8d172f13a.tar.xz