Merge pull request #3383 from matrix-org/rav/hacky_speedup_state_group_cache

Improve StateGroupCache efficiency for wildcard lookups
author: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> 2018-06-22 14:01:55 +0100
committer: GitHub <noreply@github.com> 2018-06-22 14:01:55 +0100
commit: bb018d0b5be0f6584ea2b5b274ea4093de47809f (patch)
tree: bc4da02b414bd4d193b414522d894a63b452548e /synapse/storage/state.py
parent: Merge pull request #3382 from matrix-org/rav/optimise_state_groups (diff)
parent: Disable partial state group caching for wildcard lookups (diff)
download: synapse-bb018d0b5be0f6584ea2b5b274ea4093de47809f.tar.xz
1 files changed, 43 insertions, 13 deletions
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 986a20400c..cd9821c270 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -526,10 +526,23 @@ class StateGroupWorkerStore(SQLBaseStore):
 
     @defer.inlineCallbacks
     def _get_state_for_groups(self, groups, types=None):
-        """Given list of groups returns dict of group -> list of state events
-        with matching types. `types` is a list of `(type, state_key)`, where
-        a `state_key` of None matches all state_keys. If `types` is None then
-        all events are returned.
+        """Gets the state at each of a list of state groups, optionally
+        filtering by type/state_key
+
+        Args:
+            groups (iterable[int]): list of state groups for which we want
+                to get the state.
+            types (None|iterable[(str, None|str)]):
+                indicates the state type/keys required. If None, the whole
+                state is fetched and returned.
+
+                Otherwise, each entry should be a `(type, state_key)` tuple to
+                include in the response. A `state_key` of None is a wildcard
+                meaning that we require all state with that type.
+
+        Returns:
+            Deferred[dict[int, dict[(type, state_key), EventBase]]]
+                a dictionary mapping from state group to state dictionary.
         """
         if types:
             types = frozenset(types)
@@ -538,7 +551,7 @@ class StateGroupWorkerStore(SQLBaseStore):
         if types is not None:
             for group in set(groups):
                 state_dict_ids, _, got_all = self._get_some_state_from_cache(
-                    group, types
+                    group, types,
                 )
                 results[group] = state_dict_ids
 
@@ -559,22 +572,40 @@ class StateGroupWorkerStore(SQLBaseStore):
             # Okay, so we have some missing_types, lets fetch them.
             cache_seq_num = self._state_group_cache.sequence
 
+            # the DictionaryCache knows if it has *all* the state, but
+            # does not know if it has all of the keys of a particular type,
+            # which makes wildcard lookups expensive unless we have a complete
+            # cache. Hence, if we are doing a wildcard lookup, populate the
+            # cache fully so that we can do an efficient lookup next time.
+
+            if types and any(k is None for (t, k) in types):
+                types_to_fetch = None
+            else:
+                types_to_fetch = types
+
             group_to_state_dict = yield self._get_state_groups_from_groups(
-                missing_groups, types
+                missing_groups, types_to_fetch,
             )
 
-            # Now we want to update the cache with all the things we fetched
-            # from the database.
             for group, group_state_dict in iteritems(group_to_state_dict):
                 state_dict = results[group]
-                state_dict.update(group_state_dict)
 
+                # update the result, filtering by `types`.
+                if types:
+                    for k, v in iteritems(group_state_dict):
+                        (typ, _) = k
+                        if k in types or (typ, None) in types:
+                            state_dict[k] = v
+                else:
+                    state_dict.update(group_state_dict)
+
+                # update the cache with all the things we fetched from the
+                # database.
                 self._state_group_cache.update(
                     cache_seq_num,
                     key=group,
-                    value=state_dict,
-                    full=(types is None),
-                    known_absent=types,
+                    value=group_state_dict,
+                    fetched_keys=types_to_fetch,
                 )
 
         defer.returnValue(results)
@@ -681,7 +712,6 @@ class StateGroupWorkerStore(SQLBaseStore):
                 self._state_group_cache.sequence,
                 key=state_group,
                 value=dict(current_state_ids),
-                full=True,
             )
 
             return state_group
author	Richard van der Hoff <1389908+richvdh@users.noreply.github.com>	2018-06-22 14:01:55 +0100
committer	GitHub <noreply@github.com>	2018-06-22 14:01:55 +0100
commit	bb018d0b5be0f6584ea2b5b274ea4093de47809f (patch)
tree	bc4da02b414bd4d193b414522d894a63b452548e /synapse/storage/state.py
parent	Merge pull request #3382 from matrix-org/rav/optimise_state_groups (diff)
parent	Disable partial state group caching for wildcard lookups (diff)
download	synapse-bb018d0b5be0f6584ea2b5b274ea4093de47809f.tar.xz