diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 7b76ee3b73..803b9d599d 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -18,6 +18,7 @@ from synapse.api.errors import StoreError
from synapse.util.logutils import log_function
from synapse.util.logcontext import preserve_context_over_fn, LoggingContext
from synapse.util.lrucache import LruCache
+from synapse.util.dictionary_cache import DictionaryCache
import synapse.metrics
from util.id_generators import IdGenerator, StreamIdGenerator
@@ -87,23 +88,33 @@ class Cache(object):
)
def get(self, *keyargs):
- if len(keyargs) != self.keylen:
- raise ValueError("Expected a key to have %d items", self.keylen)
+ try:
+ if len(keyargs) != self.keylen:
+ raise ValueError("Expected a key to have %d items", self.keylen)
- val = self.cache.get(keyargs, self.sentinel)
- if val is not self.sentinel:
- cache_counter.inc_hits(self.name)
- return val
+ val = self.cache.get(keyargs, self.sentinel)
+ if val is not self.sentinel:
+ cache_counter.inc_hits(self.name)
+ return val
- cache_counter.inc_misses(self.name)
- raise KeyError()
+ cache_counter.inc_misses(self.name)
+ raise KeyError()
+ except KeyError:
+ raise
+ except:
+ logger.exception("Cache.get failed for %s" % (self.name,))
+ raise
def update(self, sequence, *args):
- self.check_thread()
- if self.sequence == sequence:
- # Only update the cache if the caches sequence number matches the
- # number that the cache had before the SELECT was started (SYN-369)
- self.prefill(*args)
+ try:
+ self.check_thread()
+ if self.sequence == sequence:
+ # Only update the cache if the caches sequence number matches the
+ # number that the cache had before the SELECT was started (SYN-369)
+ self.prefill(*args)
+ except:
+ logger.exception("Cache.update failed for %s" % (self.name,))
+ raise
def prefill(self, *args): # because I can't *keyargs, value
keyargs = args[:-1]
@@ -327,6 +338,8 @@ class SQLBaseStore(object):
self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True,
max_entries=hs.config.event_cache_size)
+ self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000)
+
self._event_fetch_lock = threading.Condition()
self._event_fetch_list = []
self._event_fetch_ongoing = 0
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 91a5ae86a4..a967b3d44b 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -45,52 +45,38 @@ class StateStore(SQLBaseStore):
"""
@defer.inlineCallbacks
- def get_state_groups(self, event_ids):
+ def get_state_groups(self, room_id, event_ids):
""" Get the state groups for the given list of event_ids
The return value is a dict mapping group names to lists of events.
"""
- def f(txn):
- groups = set()
- for event_id in event_ids:
- group = self._simple_select_one_onecol_txn(
- txn,
- table="event_to_state_groups",
- keyvalues={"event_id": event_id},
- retcol="state_group",
- allow_none=True,
- )
- if group:
- groups.add(group)
-
- res = {}
- for group in groups:
- state_ids = self._simple_select_onecol_txn(
- txn,
- table="state_groups_state",
- keyvalues={"state_group": group},
- retcol="event_id",
- )
-
- res[group] = state_ids
+ event_and_groups = yield defer.gatherResults(
+ [
+ self._get_state_group_for_event(
+ room_id, event_id,
+ ).addCallback(lambda group, event_id: (event_id, group), event_id)
+ for event_id in event_ids
+ ],
+ consumeErrors=True,
+ ).addErrback(unwrapFirstError)
- return res
+ groups = set(group for _, group in event_and_groups if group)
- states = yield self.runInteraction(
- "get_state_groups",
- f,
- )
-
- state_list = yield defer.gatherResults(
+ group_to_state = yield defer.gatherResults(
[
- self._fetch_events_for_group(group, vals)
- for group, vals in states.items()
+ self._get_state_for_group(
+ group,
+ ).addCallback(lambda state_dict, group: (group, state_dict), group)
+ for group in groups
],
consumeErrors=True,
- )
+ ).addErrback(unwrapFirstError)
- defer.returnValue(dict(state_list))
+ defer.returnValue({
+ group: state_map.values()
+ for group, state_map in group_to_state
+ })
@cached(num_args=1)
def _fetch_events_for_group(self, key, events):
@@ -207,16 +193,25 @@ class StateStore(SQLBaseStore):
events = yield self._get_events(event_ids, get_prev_content=False)
defer.returnValue(events)
- @cached(num_args=3, lru=True)
- def _get_state_groups_from_group(self, room_id, group, types):
+ @cached(num_args=2, lru=True, max_entries=10000)
+ def _get_state_groups_from_group(self, group, types):
def f(txn):
+ if types is not None:
+ where_clause = "AND (%s)" % (
+ " OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
+ )
+ else:
+ where_clause = ""
+
sql = (
"SELECT event_id FROM state_groups_state WHERE"
- " room_id = ? AND state_group = ? AND (%s)"
- ) % (" OR ".join(["(type = ? AND state_key = ?)"] * len(types)),)
+ " state_group = ? %s"
+ ) % (where_clause,)
+
+ args = [group]
+ if types is not None:
+ args.extend([i for typ in types for i in typ])
- args = [room_id, group]
- args.extend([i for typ in types for i in typ])
txn.execute(sql, args)
return group, [
@@ -229,7 +224,7 @@ class StateStore(SQLBaseStore):
f,
)
- @cached(num_args=3, lru=True, max_entries=100000)
+ @cached(num_args=3, lru=True, max_entries=20000)
def _get_state_for_event_id(self, room_id, event_id, types):
def f(txn):
type_and_state_sql = " OR ".join([
@@ -280,40 +275,33 @@ class StateStore(SQLBaseStore):
deferred: A list of dicts corresponding to the event_ids given.
The dicts are mappings from (type, state_key) -> state_events
"""
- set_types = frozenset(types)
- res = yield defer.gatherResults(
+ event_and_groups = yield defer.gatherResults(
[
- self._get_state_for_event_id(
- room_id, event_id, set_types,
- )
+ self._get_state_group_for_event(
+ room_id, event_id,
+ ).addCallback(lambda group, event_id: (event_id, group), event_id)
for event_id in event_ids
],
consumeErrors=True,
).addErrback(unwrapFirstError)
- event_to_state_ids = dict(res)
+ groups = set(group for _, group in event_and_groups)
- event_dict = yield self._get_events(
+ res = yield defer.gatherResults(
[
- item
- for lst in event_to_state_ids.values()
- for item in lst
+ self._get_state_for_group(
+ group, types
+ ).addCallback(lambda state_dict, group: (group, state_dict), group)
+ for group in groups
],
- get_prev_content=False
- ).addCallback(
- lambda evs: {ev.event_id: ev for ev in evs}
- )
+ consumeErrors=True,
+ ).addErrback(unwrapFirstError)
+
+ group_to_state = dict(res)
event_to_state = {
- event_id: {
- (ev.type, ev.state_key): ev
- for ev in [
- event_dict[state_id]
- for state_id in state_ids
- if state_id in event_dict
- ]
- }
- for event_id, state_ids in event_to_state_ids.items()
+ event_id: group_to_state[group]
+ for event_id, group in event_and_groups
}
defer.returnValue([
@@ -321,6 +309,79 @@ class StateStore(SQLBaseStore):
for event in event_ids
])
+ @cached(num_args=2, lru=True, max_entries=100000)
+ def _get_state_group_for_event(self, room_id, event_id):
+ return self._simple_select_one_onecol(
+ table="event_to_state_groups",
+ keyvalues={
+ "event_id": event_id,
+ },
+ retcol="state_group",
+ allow_none=True,
+ desc="_get_state_group_for_event",
+ )
+
+ @defer.inlineCallbacks
+ def _get_state_for_group(self, group, types=None):
+ is_all, state_dict = self._state_group_cache.get(group)
+
+ type_to_key = {}
+ missing_types = set()
+ if types is not None:
+ for typ, state_key in types:
+ if state_key is None:
+ type_to_key[typ] = None
+ missing_types.add((typ, state_key))
+ else:
+ if type_to_key.get(typ, object()) is not None:
+ type_to_key.setdefault(typ, set()).add(state_key)
+
+ if (typ, state_key) not in state_dict:
+ missing_types.add((typ, state_key))
+
+ if is_all and types is None:
+ defer.returnValue(state_dict)
+
+ if is_all or (types is not None and not missing_types):
+ def include(typ, state_key):
+ sentinel = object()
+ valid_state_keys = type_to_key.get(typ, sentinel)
+ if valid_state_keys is sentinel:
+ return False
+ if valid_state_keys is None:
+ return True
+ if state_key in valid_state_keys:
+ return True
+ return False
+
+ defer.returnValue({
+ k: v
+ for k, v in state_dict.items()
+ if include(k[0], k[1])
+ })
+
+ # Okay, so we have some missing_types, lets fetch them.
+ cache_seq_num = self._state_group_cache.sequence
+ _, state_ids = yield self._get_state_groups_from_group(
+ group,
+ frozenset(types) if types else None
+ )
+ state_events = yield self._get_events(state_ids, get_prev_content=False)
+ state_dict = {
+ (e.type, e.state_key): e
+ for e in state_events
+ }
+
+ # Update the cache
+ self._state_group_cache.update(
+ cache_seq_num,
+ key=group,
+ value=state_dict,
+ full=(types is None),
+ )
+
+ defer.returnValue(state_dict)
+
def _make_group_id(clock):
return str(int(clock.time_msec())) + random_string(5)
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index af45fc5619..9db259d5fc 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -300,8 +300,7 @@ class StreamStore(SQLBaseStore):
defer.returnValue((events, token))
@defer.inlineCallbacks
- def get_recent_events_for_room(self, room_id, limit, end_token,
- with_feedback=False, from_token=None):
+ def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None):
# TODO (erikj): Handle compressed feedback
end_token = RoomStreamToken.parse_stream_token(end_token)
|