Use dictionary cache to do group -> state fetching

author: Erik Johnston <erik@matrix.org> 2015-08-05 15:06:51 +0100
committer: Erik Johnston <erik@matrix.org> 2015-08-05 15:11:42 +0100
commit: 07507643cb6a2fde1a87d229f8d77525627a0632 (patch)
tree: 0f776b47e8987cbfaad3be0170601b72e7f9e79c /synapse/storage
parent: Move DictionaryCache (diff)
download: synapse-07507643cb6a2fde1a87d229f8d77525627a0632.tar.xz
3 files changed, 153 insertions, 80 deletions
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 7b76ee3b73..803b9d599d 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -18,6 +18,7 @@ from synapse.api.errors import StoreError
 from synapse.util.logutils import log_function
 from synapse.util.logcontext import preserve_context_over_fn, LoggingContext
 from synapse.util.lrucache import LruCache
+from synapse.util.dictionary_cache import DictionaryCache
 import synapse.metrics
 
 from util.id_generators import IdGenerator, StreamIdGenerator
@@ -87,23 +88,33 @@ class Cache(object):
                 )
 
     def get(self, *keyargs):
-        if len(keyargs) != self.keylen:
-            raise ValueError("Expected a key to have %d items", self.keylen)
+        try:
+            if len(keyargs) != self.keylen:
+                raise ValueError("Expected a key to have %d items", self.keylen)
 
-        val = self.cache.get(keyargs, self.sentinel)
-        if val is not self.sentinel:
-            cache_counter.inc_hits(self.name)
-            return val
+            val = self.cache.get(keyargs, self.sentinel)
+            if val is not self.sentinel:
+                cache_counter.inc_hits(self.name)
+                return val
 
-        cache_counter.inc_misses(self.name)
-        raise KeyError()
+            cache_counter.inc_misses(self.name)
+            raise KeyError()
+        except KeyError:
+            raise
+        except:
+            logger.exception("Cache.get failed for %s" % (self.name,))
+            raise
 
     def update(self, sequence, *args):
-        self.check_thread()
-        if self.sequence == sequence:
-            # Only update the cache if the caches sequence number matches the
-            # number that the cache had before the SELECT was started (SYN-369)
-            self.prefill(*args)
+        try:
+            self.check_thread()
+            if self.sequence == sequence:
+                # Only update the cache if the caches sequence number matches the
+                # number that the cache had before the SELECT was started (SYN-369)
+                self.prefill(*args)
+        except:
+            logger.exception("Cache.update failed for %s" % (self.name,))
+            raise
 
     def prefill(self, *args):  # because I can't  *keyargs, value
         keyargs = args[:-1]
@@ -327,6 +338,8 @@ class SQLBaseStore(object):
         self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True,
                                       max_entries=hs.config.event_cache_size)
 
+        self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000)
+
         self._event_fetch_lock = threading.Condition()
         self._event_fetch_list = []
         self._event_fetch_ongoing = 0
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 91a5ae86a4..a967b3d44b 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -45,52 +45,38 @@ class StateStore(SQLBaseStore):
     """
 
     @defer.inlineCallbacks
-    def get_state_groups(self, event_ids):
+    def get_state_groups(self, room_id, event_ids):
         """ Get the state groups for the given list of event_ids
 
         The return value is a dict mapping group names to lists of events.
         """
 
-        def f(txn):
-            groups = set()
-            for event_id in event_ids:
-                group = self._simple_select_one_onecol_txn(
-                    txn,
-                    table="event_to_state_groups",
-                    keyvalues={"event_id": event_id},
-                    retcol="state_group",
-                    allow_none=True,
-                )
-                if group:
-                    groups.add(group)
-
-            res = {}
-            for group in groups:
-                state_ids = self._simple_select_onecol_txn(
-                    txn,
-                    table="state_groups_state",
-                    keyvalues={"state_group": group},
-                    retcol="event_id",
-                )
-
-                res[group] = state_ids
+        event_and_groups = yield defer.gatherResults(
+            [
+                self._get_state_group_for_event(
+                    room_id, event_id,
+                ).addCallback(lambda group, event_id: (event_id, group), event_id)
+                for event_id in event_ids
+            ],
+            consumeErrors=True,
+        ).addErrback(unwrapFirstError)
 
-            return res
+        groups = set(group for _, group in event_and_groups if group)
 
-        states = yield self.runInteraction(
-            "get_state_groups",
-            f,
-        )
-
-        state_list = yield defer.gatherResults(
+        group_to_state = yield defer.gatherResults(
             [
-                self._fetch_events_for_group(group, vals)
-                for group, vals in states.items()
+                self._get_state_for_group(
+                    group,
+                ).addCallback(lambda state_dict, group: (group, state_dict), group)
+                for group in groups
             ],
             consumeErrors=True,
-        )
+        ).addErrback(unwrapFirstError)
 
-        defer.returnValue(dict(state_list))
+        defer.returnValue({
+            group: state_map.values()
+            for group, state_map in group_to_state
+        })
 
     @cached(num_args=1)
     def _fetch_events_for_group(self, key, events):
@@ -207,16 +193,25 @@ class StateStore(SQLBaseStore):
         events = yield self._get_events(event_ids, get_prev_content=False)
         defer.returnValue(events)
 
-    @cached(num_args=3, lru=True)
-    def _get_state_groups_from_group(self, room_id, group, types):
+    @cached(num_args=2, lru=True, max_entries=10000)
+    def _get_state_groups_from_group(self, group, types):
         def f(txn):
+            if types is not None:
+                where_clause = "AND (%s)" % (
+                    " OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
+                )
+            else:
+                where_clause = ""
+
             sql = (
                 "SELECT event_id FROM state_groups_state WHERE"
-                " room_id = ? AND state_group = ? AND (%s)"
-            ) % (" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),)
+                " state_group = ? %s"
+            ) % (where_clause,)
+
+            args = [group]
+            if types is not None:
+                args.extend([i for typ in types for i in typ])
 
-            args = [room_id, group]
-            args.extend([i for typ in types for i in typ])
             txn.execute(sql, args)
 
             return group, [
@@ -229,7 +224,7 @@ class StateStore(SQLBaseStore):
             f,
         )
 
-    @cached(num_args=3, lru=True, max_entries=100000)
+    @cached(num_args=3, lru=True, max_entries=20000)
     def _get_state_for_event_id(self, room_id, event_id, types):
         def f(txn):
             type_and_state_sql = " OR ".join([
@@ -280,40 +275,33 @@ class StateStore(SQLBaseStore):
             deferred: A list of dicts corresponding to the event_ids given.
             The dicts are mappings from (type, state_key) -> state_events
         """
-        set_types = frozenset(types)
-        res = yield defer.gatherResults(
+        event_and_groups = yield defer.gatherResults(
             [
-                self._get_state_for_event_id(
-                    room_id, event_id, set_types,
-                )
+                self._get_state_group_for_event(
+                    room_id, event_id,
+                ).addCallback(lambda group, event_id: (event_id, group), event_id)
                 for event_id in event_ids
             ],
             consumeErrors=True,
         ).addErrback(unwrapFirstError)
 
-        event_to_state_ids = dict(res)
+        groups = set(group for _, group in event_and_groups)
 
-        event_dict = yield self._get_events(
+        res = yield defer.gatherResults(
             [
-                item
-                for lst in event_to_state_ids.values()
-                for item in lst
+                self._get_state_for_group(
+                    group, types
+                ).addCallback(lambda state_dict, group: (group, state_dict), group)
+                for group in groups
             ],
-            get_prev_content=False
-        ).addCallback(
-            lambda evs: {ev.event_id: ev for ev in evs}
-        )
+            consumeErrors=True,
+        ).addErrback(unwrapFirstError)
+
+        group_to_state = dict(res)
 
         event_to_state = {
-            event_id: {
-                (ev.type, ev.state_key): ev
-                for ev in [
-                    event_dict[state_id]
-                    for state_id in state_ids
-                    if state_id in event_dict
-                ]
-            }
-            for event_id, state_ids in event_to_state_ids.items()
+            event_id: group_to_state[group]
+            for event_id, group in event_and_groups
         }
 
         defer.returnValue([
@@ -321,6 +309,79 @@ class StateStore(SQLBaseStore):
             for event in event_ids
         ])
 
+    @cached(num_args=2, lru=True, max_entries=100000)
+    def _get_state_group_for_event(self, room_id, event_id):
+        return self._simple_select_one_onecol(
+            table="event_to_state_groups",
+            keyvalues={
+                "event_id": event_id,
+            },
+            retcol="state_group",
+            allow_none=True,
+            desc="_get_state_group_for_event",
+        )
+
+    @defer.inlineCallbacks
+    def _get_state_for_group(self, group, types=None):
+        is_all, state_dict = self._state_group_cache.get(group)
+
+        type_to_key = {}
+        missing_types = set()
+        if types is not None:
+            for typ, state_key in types:
+                if state_key is None:
+                    type_to_key[typ] = None
+                    missing_types.add((typ, state_key))
+                else:
+                    if type_to_key.get(typ, object()) is not None:
+                        type_to_key.setdefault(typ, set()).add(state_key)
+
+                    if (typ, state_key) not in state_dict:
+                        missing_types.add((typ, state_key))
+
+        if is_all and types is None:
+            defer.returnValue(state_dict)
+
+        if is_all or (types is not None and not missing_types):
+            def include(typ, state_key):
+                sentinel = object()
+                valid_state_keys = type_to_key.get(typ, sentinel)
+                if valid_state_keys is sentinel:
+                    return False
+                if valid_state_keys is None:
+                    return True
+                if state_key in valid_state_keys:
+                    return True
+                return False
+
+            defer.returnValue({
+                k: v
+                for k, v in state_dict.items()
+                if include(k[0], k[1])
+            })
+
+        # Okay, so we have some missing_types, lets fetch them.
+        cache_seq_num = self._state_group_cache.sequence
+        _, state_ids = yield self._get_state_groups_from_group(
+            group,
+            frozenset(types) if types else None
+        )
+        state_events = yield self._get_events(state_ids, get_prev_content=False)
+        state_dict = {
+            (e.type, e.state_key): e
+            for e in state_events
+        }
+
+        # Update the cache
+        self._state_group_cache.update(
+            cache_seq_num,
+            key=group,
+            value=state_dict,
+            full=(types is None),
+        )
+
+        defer.returnValue(state_dict)
+
 
 def _make_group_id(clock):
     return str(int(clock.time_msec())) + random_string(5)
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index af45fc5619..9db259d5fc 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -300,8 +300,7 @@ class StreamStore(SQLBaseStore):
         defer.returnValue((events, token))
 
     @defer.inlineCallbacks
-    def get_recent_events_for_room(self, room_id, limit, end_token,
-                                   with_feedback=False, from_token=None):
+    def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None):
         # TODO (erikj): Handle compressed feedback
 
         end_token = RoomStreamToken.parse_stream_token(end_token)
author	Erik Johnston <erik@matrix.org>	2015-08-05 15:06:51 +0100
committer	Erik Johnston <erik@matrix.org>	2015-08-05 15:11:42 +0100
commit	07507643cb6a2fde1a87d229f8d77525627a0632 (patch)
tree	0f776b47e8987cbfaad3be0170601b72e7f9e79c /synapse/storage
parent	Move DictionaryCache (diff)
download	synapse-07507643cb6a2fde1a87d229f8d77525627a0632.tar.xz