diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index b27b3ae144..4b971efdba 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -21,15 +21,17 @@ from six.moves import range
from twisted.internet import defer
+from synapse.api.constants import EventTypes
+from synapse.api.errors import NotFoundError
+from synapse.storage._base import SQLBaseStore
from synapse.storage.background_updates import BackgroundUpdateStore
from synapse.storage.engines import PostgresEngine
+from synapse.storage.events_worker import EventsWorkerStore
from synapse.util.caches import get_cache_factor_for, intern_string
from synapse.util.caches.descriptors import cached, cachedList
from synapse.util.caches.dictionary_cache import DictionaryCache
from synapse.util.stringutils import to_ascii
-from ._base import SQLBaseStore
-
logger = logging.getLogger(__name__)
@@ -46,7 +48,8 @@ class _GetStateGroupDelta(namedtuple("_GetStateGroupDelta", ("prev_group", "delt
return len(self.delta_ids) if self.delta_ids else 0
-class StateGroupWorkerStore(SQLBaseStore):
+# this inherits from EventsWorkerStore because it calls self.get_events
+class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
"""The parts of StateGroupStore that can be called from workers.
"""
@@ -57,10 +60,69 @@ class StateGroupWorkerStore(SQLBaseStore):
def __init__(self, db_conn, hs):
super(StateGroupWorkerStore, self).__init__(db_conn, hs)
+ # Originally the state store used a single DictionaryCache to cache the
+ # event IDs for the state types in a given state group to avoid hammering
+ # on the state_group* tables.
+ #
+ # The point of using a DictionaryCache is that it can cache a subset
+ # of the state events for a given state group (i.e. a subset of the keys for a
+ # given dict which is an entry in the cache for a given state group ID).
+ #
+ # However, this poses problems when performing complicated queries
+ # on the store - for instance: "give me all the state for this group, but
+ # limit members to this subset of users", as DictionaryCache's API isn't
+ # rich enough to say "please cache any of these fields, apart from this subset".
+ # This is problematic when lazy loading members, which requires this behaviour,
+ # as without it the cache has no choice but to speculatively load all
+ # state events for the group, which negates the efficiency being sought.
+ #
+ # Rather than overcomplicating DictionaryCache's API, we instead split the
+ # state_group_cache into two halves - one for tracking non-member events,
+ # and the other for tracking member_events. This means that lazy loading
+ # queries can be made in a cache-friendly manner by querying both caches
+ # separately and then merging the result. So for the example above, you
+ # would query the members cache for a specific subset of state keys
+ # (which DictionaryCache will handle efficiently and fine) and the non-members
+ # cache for all state (which DictionaryCache will similarly handle fine)
+ # and then just merge the results together.
+ #
+ # We size the non-members cache to be smaller than the members cache as the
+ # vast majority of state in Matrix (today) is member events.
+
self._state_group_cache = DictionaryCache(
- "*stateGroupCache*", 500000 * get_cache_factor_for("stateGroupCache")
+ "*stateGroupCache*",
+ # TODO: this hasn't been tuned yet
+ 50000 * get_cache_factor_for("stateGroupCache")
+ )
+ self._state_group_members_cache = DictionaryCache(
+ "*stateGroupMembersCache*",
+ 500000 * get_cache_factor_for("stateGroupMembersCache")
)
+ @defer.inlineCallbacks
+ def get_room_version(self, room_id):
+ """Get the room_version of a given room
+
+ Args:
+ room_id (str)
+
+ Returns:
+ Deferred[str]
+
+ Raises:
+ NotFoundError if the room is unknown
+ """
+ # for now we do this by looking at the create event. We may want to cache this
+ # more intelligently in future.
+ state_ids = yield self.get_current_state_ids(room_id)
+ create_id = state_ids.get((EventTypes.Create, ""))
+
+ if not create_id:
+ raise NotFoundError("Unknown room")
+
+ create_event = yield self.get_event(create_id)
+ defer.returnValue(create_event.content.get("room_version", "1"))
+
@cached(max_entries=100000, iterable=True)
def get_current_state_ids(self, room_id):
"""Get the current state event ids for a room based on the
@@ -89,6 +151,69 @@ class StateGroupWorkerStore(SQLBaseStore):
_get_current_state_ids_txn,
)
+ # FIXME: how should this be cached?
+ def get_filtered_current_state_ids(self, room_id, types, filtered_types=None):
+ """Get the current state event of a given type for a room based on the
+ current_state_events table. This may not be as up-to-date as the result
+ of doing a fresh state resolution as per state_handler.get_current_state
+ Args:
+ room_id (str)
+ types (list[(Str, (Str|None))]): List of (type, state_key) tuples
+ which are used to filter the state fetched. `state_key` may be
+ None, which matches any `state_key`
+ filtered_types (list[Str]|None): List of types to apply the above filter to.
+ Returns:
+ deferred: dict of (type, state_key) -> event
+ """
+
+ include_other_types = False if filtered_types is None else True
+
+ def _get_filtered_current_state_ids_txn(txn):
+ results = {}
+ sql = """SELECT type, state_key, event_id FROM current_state_events
+ WHERE room_id = ? %s"""
+ # Turns out that postgres doesn't like doing a list of OR's and
+ # is about 1000x slower, so we just issue a query for each specific
+ # type seperately.
+ if types:
+ clause_to_args = [
+ (
+ "AND type = ? AND state_key = ?",
+ (etype, state_key)
+ ) if state_key is not None else (
+ "AND type = ?",
+ (etype,)
+ )
+ for etype, state_key in types
+ ]
+
+ if include_other_types:
+ unique_types = set(filtered_types)
+ clause_to_args.append(
+ (
+ "AND type <> ? " * len(unique_types),
+ list(unique_types)
+ )
+ )
+ else:
+ # If types is None we fetch all the state, and so just use an
+ # empty where clause with no extra args.
+ clause_to_args = [("", [])]
+ for where_clause, where_args in clause_to_args:
+ args = [room_id]
+ args.extend(where_args)
+ txn.execute(sql % (where_clause,), args)
+ for row in txn:
+ typ, state_key, event_id = row
+ key = (intern_string(typ), intern_string(state_key))
+ results[key] = event_id
+ return results
+
+ return self.runInteraction(
+ "get_filtered_current_state_ids",
+ _get_filtered_current_state_ids_txn,
+ )
+
@cached(max_entries=10000, iterable=True)
def get_state_group_delta(self, state_group):
"""Given a state group try to return a previous group and a delta between
@@ -185,7 +310,7 @@ class StateGroupWorkerStore(SQLBaseStore):
})
@defer.inlineCallbacks
- def _get_state_groups_from_groups(self, groups, types):
+ def _get_state_groups_from_groups(self, groups, types, members=None):
"""Returns the state groups for a given set of groups, filtering on
types of state events.
@@ -194,6 +319,9 @@ class StateGroupWorkerStore(SQLBaseStore):
types (Iterable[str, str|None]|None): list of 2-tuples of the form
(`type`, `state_key`), where a `state_key` of `None` matches all
state_keys for the `type`. If None, all types are returned.
+ members (bool|None): If not None, then, in addition to any filtering
+ implied by types, the results are also filtered to only include
+ member events (if True), or to exclude member events (if False)
Returns:
dictionary state_group -> (dict of (type, state_key) -> event id)
@@ -204,14 +332,14 @@ class StateGroupWorkerStore(SQLBaseStore):
for chunk in chunks:
res = yield self.runInteraction(
"_get_state_groups_from_groups",
- self._get_state_groups_from_groups_txn, chunk, types,
+ self._get_state_groups_from_groups_txn, chunk, types, members,
)
results.update(res)
defer.returnValue(results)
def _get_state_groups_from_groups_txn(
- self, txn, groups, types=None,
+ self, txn, groups, types=None, members=None,
):
results = {group: {} for group in groups}
@@ -249,6 +377,11 @@ class StateGroupWorkerStore(SQLBaseStore):
%s
""")
+ if members is True:
+ sql += " AND type = '%s'" % (EventTypes.Member,)
+ elif members is False:
+ sql += " AND type <> '%s'" % (EventTypes.Member,)
+
# Turns out that postgres doesn't like doing a list of OR's and
# is about 1000x slower, so we just issue a query for each specific
# type seperately.
@@ -296,6 +429,11 @@ class StateGroupWorkerStore(SQLBaseStore):
else:
where_clause = ""
+ if members is True:
+ where_clause += " AND type = '%s'" % EventTypes.Member
+ elif members is False:
+ where_clause += " AND type <> '%s'" % EventTypes.Member
+
# We don't use WITH RECURSIVE on sqlite3 as there are distributions
# that ship with an sqlite3 version that doesn't support it (e.g. wheezy)
for group in groups:
@@ -362,8 +500,7 @@ class StateGroupWorkerStore(SQLBaseStore):
If None, `types` filtering is applied to all events.
Returns:
- deferred: A list of dicts corresponding to the event_ids given.
- The dicts are mappings from (type, state_key) -> state_events
+ deferred: A dict of (event_id) -> (type, state_key) -> [state_events]
"""
event_to_groups = yield self._get_state_group_for_events(
event_ids,
@@ -391,7 +528,8 @@ class StateGroupWorkerStore(SQLBaseStore):
@defer.inlineCallbacks
def get_state_ids_for_events(self, event_ids, types=None, filtered_types=None):
"""
- Get the state dicts corresponding to a list of events
+ Get the state dicts corresponding to a list of events, containing the event_ids
+ of the state events (as opposed to the events themselves)
Args:
event_ids(list(str)): events whose state should be returned
@@ -404,7 +542,7 @@ class StateGroupWorkerStore(SQLBaseStore):
If None, `types` filtering is applied to all events.
Returns:
- A deferred dict from event_id -> (type, state_key) -> state_event
+ A deferred dict from event_id -> (type, state_key) -> event_id
"""
event_to_groups = yield self._get_state_group_for_events(
event_ids,
@@ -490,10 +628,11 @@ class StateGroupWorkerStore(SQLBaseStore):
defer.returnValue({row["event_id"]: row["state_group"] for row in rows})
- def _get_some_state_from_cache(self, group, types, filtered_types=None):
+ def _get_some_state_from_cache(self, cache, group, types, filtered_types=None):
"""Checks if group is in cache. See `_get_state_for_groups`
Args:
+ cache(DictionaryCache): the state group cache to use
group(int): The state group to lookup
types(list[str, str|None]): List of 2-tuples of the form
(`type`, `state_key`), where a `state_key` of `None` matches all
@@ -507,11 +646,11 @@ class StateGroupWorkerStore(SQLBaseStore):
requests state from the cache, if False we need to query the DB for the
missing state.
"""
- is_all, known_absent, state_dict_ids = self._state_group_cache.get(group)
+ is_all, known_absent, state_dict_ids = cache.get(group)
type_to_key = {}
- # tracks whether any of ourrequested types are missing from the cache
+ # tracks whether any of our requested types are missing from the cache
missing_types = False
for typ, state_key in types:
@@ -558,7 +697,7 @@ class StateGroupWorkerStore(SQLBaseStore):
if include(k[0], k[1])
}, got_all
- def _get_all_state_from_cache(self, group):
+ def _get_all_state_from_cache(self, cache, group):
"""Checks if group is in cache. See `_get_state_for_groups`
Returns 2-tuple (`state_dict`, `got_all`). `got_all` is a bool
@@ -566,9 +705,10 @@ class StateGroupWorkerStore(SQLBaseStore):
cache, if False we need to query the DB for the missing state.
Args:
+ cache(DictionaryCache): the state group cache to use
group: The state group to lookup
"""
- is_all, _, state_dict_ids = self._state_group_cache.get(group)
+ is_all, _, state_dict_ids = cache.get(group)
return state_dict_ids, is_all
@@ -595,6 +735,62 @@ class StateGroupWorkerStore(SQLBaseStore):
Deferred[dict[int, dict[(type, state_key), EventBase]]]
a dictionary mapping from state group to state dictionary.
"""
+ if types is not None:
+ non_member_types = [t for t in types if t[0] != EventTypes.Member]
+
+ if filtered_types is not None and EventTypes.Member not in filtered_types:
+ # we want all of the membership events
+ member_types = None
+ else:
+ member_types = [t for t in types if t[0] == EventTypes.Member]
+
+ else:
+ non_member_types = None
+ member_types = None
+
+ non_member_state = yield self._get_state_for_groups_using_cache(
+ groups, self._state_group_cache, non_member_types, filtered_types,
+ )
+ # XXX: we could skip this entirely if member_types is []
+ member_state = yield self._get_state_for_groups_using_cache(
+ # we set filtered_types=None as member_state only ever contain members.
+ groups, self._state_group_members_cache, member_types, None,
+ )
+
+ state = non_member_state
+ for group in groups:
+ state[group].update(member_state[group])
+
+ defer.returnValue(state)
+
+ @defer.inlineCallbacks
+ def _get_state_for_groups_using_cache(
+ self, groups, cache, types=None, filtered_types=None
+ ):
+ """Gets the state at each of a list of state groups, optionally
+ filtering by type/state_key, querying from a specific cache.
+
+ Args:
+ groups (iterable[int]): list of state groups for which we want
+ to get the state.
+ cache (DictionaryCache): the cache of group ids to state dicts which
+ we will pass through - either the normal state cache or the specific
+ members state cache.
+ types (None|iterable[(str, None|str)]):
+ indicates the state type/keys required. If None, the whole
+ state is fetched and returned.
+
+ Otherwise, each entry should be a `(type, state_key)` tuple to
+ include in the response. A `state_key` of None is a wildcard
+ meaning that we require all state with that type.
+ filtered_types(list[str]|None): Only apply filtering via `types` to this
+ list of event types. Other types of events are returned unfiltered.
+ If None, `types` filtering is applied to all events.
+
+ Returns:
+ Deferred[dict[int, dict[(type, state_key), EventBase]]]
+ a dictionary mapping from state group to state dictionary.
+ """
if types:
types = frozenset(types)
results = {}
@@ -602,7 +798,7 @@ class StateGroupWorkerStore(SQLBaseStore):
if types is not None:
for group in set(groups):
state_dict_ids, got_all = self._get_some_state_from_cache(
- group, types, filtered_types
+ cache, group, types, filtered_types
)
results[group] = state_dict_ids
@@ -611,7 +807,7 @@ class StateGroupWorkerStore(SQLBaseStore):
else:
for group in set(groups):
state_dict_ids, got_all = self._get_all_state_from_cache(
- group
+ cache, group
)
results[group] = state_dict_ids
@@ -620,8 +816,8 @@ class StateGroupWorkerStore(SQLBaseStore):
missing_groups.append(group)
if missing_groups:
- # Okay, so we have some missing_types, lets fetch them.
- cache_seq_num = self._state_group_cache.sequence
+ # Okay, so we have some missing_types, let's fetch them.
+ cache_seq_num = cache.sequence
# the DictionaryCache knows if it has *all* the state, but
# does not know if it has all of the keys of a particular type,
@@ -635,7 +831,7 @@ class StateGroupWorkerStore(SQLBaseStore):
types_to_fetch = types
group_to_state_dict = yield self._get_state_groups_from_groups(
- missing_groups, types_to_fetch
+ missing_groups, types_to_fetch, cache == self._state_group_members_cache,
)
for group, group_state_dict in iteritems(group_to_state_dict):
@@ -655,7 +851,7 @@ class StateGroupWorkerStore(SQLBaseStore):
# update the cache with all the things we fetched from the
# database.
- self._state_group_cache.update(
+ cache.update(
cache_seq_num,
key=group,
value=group_state_dict,
@@ -757,15 +953,33 @@ class StateGroupWorkerStore(SQLBaseStore):
],
)
- # Prefill the state group cache with this group.
+ # Prefill the state group caches with this group.
# It's fine to use the sequence like this as the state group map
# is immutable. (If the map wasn't immutable then this prefill could
# race with another update)
+
+ current_member_state_ids = {
+ s: ev
+ for (s, ev) in iteritems(current_state_ids)
+ if s[0] == EventTypes.Member
+ }
+ txn.call_after(
+ self._state_group_members_cache.update,
+ self._state_group_members_cache.sequence,
+ key=state_group,
+ value=dict(current_member_state_ids),
+ )
+
+ current_non_member_state_ids = {
+ s: ev
+ for (s, ev) in iteritems(current_state_ids)
+ if s[0] != EventTypes.Member
+ }
txn.call_after(
self._state_group_cache.update,
self._state_group_cache.sequence,
key=state_group,
- value=dict(current_state_ids),
+ value=dict(current_non_member_state_ids),
)
return state_group
|