diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index f7155fd8d3..90649af9e1 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -230,7 +230,11 @@ class FederationHandler(BaseHandler):
@defer.inlineCallbacks
def _filter_events_for_server(self, server_name, room_id, events):
states = yield self.store.get_state_for_events(
- room_id, [e.event_id for e in events],
+ room_id, frozenset(e.event_id for e in events),
+ types=(
+ (EventTypes.RoomHistoryVisibility, ""),
+ (EventTypes.Member, None),
+ )
)
events_and_states = zip(events, states)
@@ -503,7 +507,7 @@ class FederationHandler(BaseHandler):
event_ids = list(extremities.keys())
states = yield defer.gatherResults([
- self.state_handler.resolve_state_groups([e])
+ self.state_handler.resolve_state_groups(room_id, [e])
for e in event_ids
])
states = dict(zip(event_ids, [s[1] for s in states]))
diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py
index 9d6d4f0978..11c736f727 100644
--- a/synapse/handlers/message.py
+++ b/synapse/handlers/message.py
@@ -138,7 +138,11 @@ class MessageHandler(BaseHandler):
@defer.inlineCallbacks
def _filter_events_for_client(self, user_id, room_id, events):
states = yield self.store.get_state_for_events(
- room_id, [e.event_id for e in events],
+ room_id, frozenset(e.event_id for e in events),
+ types=(
+ (EventTypes.RoomHistoryVisibility, ""),
+ (EventTypes.Member, user_id),
+ )
)
events_and_states = zip(events, states)
@@ -401,10 +405,14 @@ class MessageHandler(BaseHandler):
except:
logger.exception("Failed to get snapshot")
- yield defer.gatherResults(
- [handle_room(e) for e in room_list],
- consumeErrors=True
- ).addErrback(unwrapFirstError)
+ # Only do N rooms at once
+ n = 5
+ d_list = [handle_room(e) for e in room_list]
+ for ds in [d_list[i:i + n] for i in range(0, len(d_list), n)]:
+ yield defer.gatherResults(
+ ds,
+ consumeErrors=True
+ ).addErrback(unwrapFirstError)
ret = {
"rooms": rooms_ret,
diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py
index 6cff6230c1..8f58774b31 100644
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@@ -295,7 +295,11 @@ class SyncHandler(BaseHandler):
@defer.inlineCallbacks
def _filter_events_for_client(self, user_id, room_id, events):
states = yield self.store.get_state_for_events(
- room_id, [e.event_id for e in events],
+ room_id, frozenset(e.event_id for e in events),
+ types=(
+ (EventTypes.RoomHistoryVisibility, ""),
+ (EventTypes.Member, user_id),
+ )
)
events_and_states = zip(events, states)
diff --git a/synapse/state.py b/synapse/state.py
index 80da90a72c..b5e5d7bbda 100644
--- a/synapse/state.py
+++ b/synapse/state.py
@@ -96,7 +96,7 @@ class StateHandler(object):
cache.ts = self.clock.time_msec()
state = cache.state
else:
- res = yield self.resolve_state_groups(event_ids)
+ res = yield self.resolve_state_groups(room_id, event_ids)
state = res[1]
if event_type:
@@ -155,13 +155,13 @@ class StateHandler(object):
if event.is_state():
ret = yield self.resolve_state_groups(
- [e for e, _ in event.prev_events],
+ event.room_id, [e for e, _ in event.prev_events],
event_type=event.type,
state_key=event.state_key,
)
else:
ret = yield self.resolve_state_groups(
- [e for e, _ in event.prev_events],
+ event.room_id, [e for e, _ in event.prev_events],
)
group, curr_state, prev_state = ret
@@ -180,7 +180,7 @@ class StateHandler(object):
@defer.inlineCallbacks
@log_function
- def resolve_state_groups(self, event_ids, event_type=None, state_key=""):
+ def resolve_state_groups(self, room_id, event_ids, event_type=None, state_key=""):
""" Given a list of event_ids this method fetches the state at each
event, resolves conflicts between them and returns them.
@@ -205,7 +205,7 @@ class StateHandler(object):
)
state_groups = yield self.store.get_state_groups(
- event_ids
+ room_id, event_ids
)
logger.debug(
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index 973e78e047..c6ce65b4cc 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -99,7 +99,7 @@ class DataStore(RoomMemberStore, RoomStore,
key = (user.to_string(), access_token, device_id, ip)
try:
- last_seen = self.client_ip_last_seen.get(*key)
+ last_seen = self.client_ip_last_seen.get(key)
except KeyError:
last_seen = None
@@ -107,7 +107,7 @@ class DataStore(RoomMemberStore, RoomStore,
if last_seen is not None and (now - last_seen) < LAST_SEEN_GRANULARITY:
defer.returnValue(None)
- self.client_ip_last_seen.prefill(*key + (now,))
+ self.client_ip_last_seen.prefill(key, now)
# It's safe not to lock here: a) no unique constraint,
# b) LAST_SEEN_GRANULARITY makes concurrent updates incredibly unlikely
diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py
index 5997603b3c..a49dbb2ef5 100644
--- a/synapse/storage/_base.py
+++ b/synapse/storage/_base.py
@@ -16,9 +16,11 @@ import logging
from synapse.api.errors import StoreError
from synapse.util.async import ObservableDeferred
+from synapse.util import unwrapFirstError
from synapse.util.logutils import log_function
from synapse.util.logcontext import preserve_context_over_fn, LoggingContext
from synapse.util.lrucache import LruCache
+from synapse.util.dictionary_cache import DictionaryCache
import synapse.metrics
from util.id_generators import IdGenerator, StreamIdGenerator
@@ -57,6 +59,9 @@ cache_counter = metrics.register_cache(
)
+_CacheSentinel = object()
+
+
class Cache(object):
def __init__(self, name, max_entries=1000, keylen=1, lru=True):
@@ -83,41 +88,38 @@ class Cache(object):
"Cache objects can only be accessed from the main thread"
)
- def get(self, *keyargs):
- if len(keyargs) != self.keylen:
- raise ValueError("Expected a key to have %d items", self.keylen)
-
- if keyargs in self.cache:
+ def get(self, keyargs, default=_CacheSentinel):
+ val = self.cache.get(keyargs, _CacheSentinel)
+ if val is not _CacheSentinel:
cache_counter.inc_hits(self.name)
- return self.cache[keyargs]
+ return val
cache_counter.inc_misses(self.name)
- raise KeyError()
- def update(self, sequence, *args):
+ if default is _CacheSentinel:
+ raise KeyError()
+ else:
+ return default
+
+ def update(self, sequence, keyargs, value):
self.check_thread()
if self.sequence == sequence:
# Only update the cache if the caches sequence number matches the
# number that the cache had before the SELECT was started (SYN-369)
- self.prefill(*args)
-
- def prefill(self, *args): # because I can't *keyargs, value
- keyargs = args[:-1]
- value = args[-1]
-
- if len(keyargs) != self.keylen:
- raise ValueError("Expected a key to have %d items", self.keylen)
+ self.prefill(keyargs, value)
+ def prefill(self, keyargs, value):
if self.max_entries is not None:
while len(self.cache) >= self.max_entries:
self.cache.popitem(last=False)
self.cache[keyargs] = value
- def invalidate(self, *keyargs):
+ def invalidate(self, keyargs):
self.check_thread()
- if len(keyargs) != self.keylen:
- raise ValueError("Expected a key to have %d items", self.keylen)
+ if not isinstance(keyargs, tuple):
+ raise ValueError("keyargs must be a tuple.")
+
# Increment the sequence number so that any SELECT statements that
# raced with the INSERT don't update the cache (SYN-369)
self.sequence += 1
@@ -168,20 +170,21 @@ class CacheDescriptor(object):
% (orig.__name__,)
)
- def __get__(self, obj, objtype=None):
- cache = Cache(
+ self.cache = Cache(
name=self.orig.__name__,
max_entries=self.max_entries,
keylen=self.num_args,
lru=self.lru,
)
+ def __get__(self, obj, objtype=None):
+
@functools.wraps(self.orig)
def wrapped(*args, **kwargs):
arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs)
- keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names]
+ keyargs = tuple(arg_dict[arg_nm] for arg_nm in self.arg_names)
try:
- cached_result_d = cache.get(*keyargs)
+ cached_result_d = self.cache.get(keyargs)
observer = cached_result_d.observe()
if DEBUG_CACHES:
@@ -203,7 +206,7 @@ class CacheDescriptor(object):
# Get the sequence number of the cache before reading from the
# database so that we can tell if the cache is invalidated
# while the SELECT is executing (SYN-369)
- sequence = cache.sequence
+ sequence = self.cache.sequence
ret = defer.maybeDeferred(
self.function_to_call,
@@ -211,19 +214,114 @@ class CacheDescriptor(object):
)
def onErr(f):
- cache.invalidate(*keyargs)
+ self.cache.invalidate(keyargs)
return f
ret.addErrback(onErr)
- ret = ObservableDeferred(ret, consumeErrors=False)
- cache.update(sequence, *(keyargs + [ret]))
+ ret = ObservableDeferred(ret, consumeErrors=True)
+ self.cache.update(sequence, keyargs, ret)
return ret.observe()
- wrapped.invalidate = cache.invalidate
- wrapped.invalidate_all = cache.invalidate_all
- wrapped.prefill = cache.prefill
+ wrapped.invalidate = self.cache.invalidate
+ wrapped.invalidate_all = self.cache.invalidate_all
+ wrapped.prefill = self.cache.prefill
+
+ obj.__dict__[self.orig.__name__] = wrapped
+
+ return wrapped
+
+
+class CacheListDescriptor(object):
+ def __init__(self, orig, cache, list_name, num_args=1, inlineCallbacks=False):
+ self.orig = orig
+
+ if inlineCallbacks:
+ self.function_to_call = defer.inlineCallbacks(orig)
+ else:
+ self.function_to_call = orig
+
+ self.num_args = num_args
+ self.list_name = list_name
+
+ self.arg_names = inspect.getargspec(orig).args[1:num_args+1]
+ self.list_pos = self.arg_names.index(self.list_name)
+
+ self.cache = cache
+
+ self.sentinel = object()
+
+ if len(self.arg_names) < self.num_args:
+ raise Exception(
+ "Not enough explicit positional arguments to key off of for %r."
+ " (@cached cannot key off of *args or **kwars)"
+ % (orig.__name__,)
+ )
+
+ if self.list_name not in self.arg_names:
+ raise Exception(
+ "Couldn't see arguments %r for %r."
+ % (self.list_name, cache.name,)
+ )
+
+ def __get__(self, obj, objtype=None):
+
+ @functools.wraps(self.orig)
+ def wrapped(*args, **kwargs):
+ arg_dict = inspect.getcallargs(self.orig, obj, *args, **kwargs)
+ keyargs = [arg_dict[arg_nm] for arg_nm in self.arg_names]
+ list_args = arg_dict[self.list_name]
+
+ cached = {}
+ missing = []
+ for arg in list_args:
+ key = list(keyargs)
+ key[self.list_pos] = arg
+
+ try:
+ res = self.cache.get(tuple(key)).observe()
+ res.addCallback(lambda r, arg: (arg, r), arg)
+ cached[arg] = res
+ except KeyError:
+ missing.append(arg)
+
+ if missing:
+ sequence = self.cache.sequence
+ args_to_call = dict(arg_dict)
+ args_to_call[self.list_name] = missing
+
+ ret_d = defer.maybeDeferred(
+ self.function_to_call,
+ **args_to_call
+ )
+
+ ret_d = ObservableDeferred(ret_d)
+
+ for arg in missing:
+ observer = ret_d.observe()
+ observer.addCallback(lambda r, arg: r[arg], arg)
+
+ observer = ObservableDeferred(observer)
+
+ key = list(keyargs)
+ key[self.list_pos] = arg
+ self.cache.update(sequence, tuple(key), observer)
+
+ def invalidate(f, key):
+ self.cache.invalidate(key)
+ return f
+ observer.addErrback(invalidate, tuple(key))
+
+ res = observer.observe()
+ res.addCallback(lambda r, arg: (arg, r), arg)
+
+ cached[arg] = res
+
+ return defer.gatherResults(
+ cached.values(),
+ consumeErrors=True,
+ ).addErrback(unwrapFirstError).addCallback(lambda res: dict(res))
obj.__dict__[self.orig.__name__] = wrapped
@@ -249,6 +347,16 @@ def cachedInlineCallbacks(max_entries=1000, num_args=1, lru=False):
)
+def cachedList(cache, list_name, num_args=1, inlineCallbacks=False):
+ return lambda orig: CacheListDescriptor(
+ orig,
+ cache=cache,
+ list_name=list_name,
+ num_args=num_args,
+ inlineCallbacks=inlineCallbacks,
+ )
+
+
class LoggingTransaction(object):
"""An object that almost-transparently proxies for the 'txn' object
passed to the constructor. Adds logging and metrics to the .execute()
@@ -369,6 +477,8 @@ class SQLBaseStore(object):
self._get_event_cache = Cache("*getEvent*", keylen=3, lru=True,
max_entries=hs.config.event_cache_size)
+ self._state_group_cache = DictionaryCache("*stateGroupCache*", 100000)
+
self._event_fetch_lock = threading.Condition()
self._event_fetch_list = []
self._event_fetch_ongoing = 0
diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py
index 2b2bdf8615..f3947bbe89 100644
--- a/synapse/storage/directory.py
+++ b/synapse/storage/directory.py
@@ -104,7 +104,7 @@ class DirectoryStore(SQLBaseStore):
},
desc="create_room_alias_association",
)
- self.get_aliases_for_room.invalidate(room_id)
+ self.get_aliases_for_room.invalidate((room_id,))
@defer.inlineCallbacks
def delete_room_alias(self, room_alias):
@@ -114,7 +114,7 @@ class DirectoryStore(SQLBaseStore):
room_alias,
)
- self.get_aliases_for_room.invalidate(room_id)
+ self.get_aliases_for_room.invalidate((room_id,))
defer.returnValue(room_id)
def _delete_room_alias_txn(self, txn, room_alias):
diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py
index 45b86c94e8..910b6598a7 100644
--- a/synapse/storage/event_federation.py
+++ b/synapse/storage/event_federation.py
@@ -362,7 +362,7 @@ class EventFederationStore(SQLBaseStore):
for room_id in events_by_room:
txn.call_after(
- self.get_latest_event_ids_in_room.invalidate, room_id
+ self.get_latest_event_ids_in_room.invalidate, (room_id,)
)
def get_backfill_events(self, room_id, event_list, limit):
@@ -505,4 +505,4 @@ class EventFederationStore(SQLBaseStore):
query = "DELETE FROM event_forward_extremities WHERE room_id = ?"
txn.execute(query, (room_id,))
- txn.call_after(self.get_latest_event_ids_in_room.invalidate, room_id)
+ txn.call_after(self.get_latest_event_ids_in_room.invalidate, (room_id,))
diff --git a/synapse/storage/events.py b/synapse/storage/events.py
index ed7ea38804..5b64918024 100644
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@@ -162,8 +162,8 @@ class EventsStore(SQLBaseStore):
if current_state:
txn.call_after(self.get_current_state_for_key.invalidate_all)
txn.call_after(self.get_rooms_for_user.invalidate_all)
- txn.call_after(self.get_users_in_room.invalidate, event.room_id)
- txn.call_after(self.get_joined_hosts_for_room.invalidate, event.room_id)
+ txn.call_after(self.get_users_in_room.invalidate, (event.room_id,))
+ txn.call_after(self.get_joined_hosts_for_room.invalidate, (event.room_id,))
txn.call_after(self.get_room_name_and_aliases, event.room_id)
self._simple_delete_txn(
@@ -430,13 +430,13 @@ class EventsStore(SQLBaseStore):
if not context.rejected:
txn.call_after(
self.get_current_state_for_key.invalidate,
- event.room_id, event.type, event.state_key
- )
+ (event.room_id, event.type, event.state_key,)
+ )
if event.type in [EventTypes.Name, EventTypes.Aliases]:
txn.call_after(
self.get_room_name_and_aliases.invalidate,
- event.room_id
+ (event.room_id,)
)
self._simple_upsert_txn(
@@ -567,8 +567,9 @@ class EventsStore(SQLBaseStore):
def _invalidate_get_event_cache(self, event_id):
for check_redacted in (False, True):
for get_prev_content in (False, True):
- self._get_event_cache.invalidate(event_id, check_redacted,
- get_prev_content)
+ self._get_event_cache.invalidate(
+ (event_id, check_redacted, get_prev_content)
+ )
def _get_event_txn(self, txn, event_id, check_redacted=True,
get_prev_content=False, allow_rejected=False):
@@ -589,7 +590,7 @@ class EventsStore(SQLBaseStore):
for event_id in events:
try:
ret = self._get_event_cache.get(
- event_id, check_redacted, get_prev_content
+ (event_id, check_redacted, get_prev_content,)
)
if allow_rejected or not ret.rejected_reason:
@@ -822,7 +823,7 @@ class EventsStore(SQLBaseStore):
ev.unsigned["prev_content"] = prev.get_dict()["content"]
self._get_event_cache.prefill(
- ev.event_id, check_redacted, get_prev_content, ev
+ (ev.event_id, check_redacted, get_prev_content), ev
)
defer.returnValue(ev)
@@ -879,7 +880,7 @@ class EventsStore(SQLBaseStore):
ev.unsigned["prev_content"] = prev.get_dict()["content"]
self._get_event_cache.prefill(
- ev.event_id, check_redacted, get_prev_content, ev
+ (ev.event_id, check_redacted, get_prev_content), ev
)
return ev
diff --git a/synapse/storage/keys.py b/synapse/storage/keys.py
index e3f98f0cde..49b8e37cfd 100644
--- a/synapse/storage/keys.py
+++ b/synapse/storage/keys.py
@@ -131,7 +131,7 @@ class KeyStore(SQLBaseStore):
desc="store_server_verify_key",
)
- self.get_all_server_verify_keys.invalidate(server_name)
+ self.get_all_server_verify_keys.invalidate((server_name,))
def store_server_keys_json(self, server_name, key_id, from_server,
ts_now_ms, ts_expires_ms, key_json_bytes):
diff --git a/synapse/storage/presence.py b/synapse/storage/presence.py
index fefcf6bce0..576cf670cc 100644
--- a/synapse/storage/presence.py
+++ b/synapse/storage/presence.py
@@ -98,7 +98,7 @@ class PresenceStore(SQLBaseStore):
updatevalues={"accepted": True},
desc="set_presence_list_accepted",
)
- self.get_presence_list_accepted.invalidate(observer_localpart)
+ self.get_presence_list_accepted.invalidate((observer_localpart,))
defer.returnValue(result)
def get_presence_list(self, observer_localpart, accepted=None):
@@ -133,4 +133,4 @@ class PresenceStore(SQLBaseStore):
"observed_user_id": observed_userid},
desc="del_presence_list",
)
- self.get_presence_list_accepted.invalidate(observer_localpart)
+ self.get_presence_list_accepted.invalidate((observer_localpart,))
diff --git a/synapse/storage/push_rule.py b/synapse/storage/push_rule.py
index a220f3632e..9b88ca7b39 100644
--- a/synapse/storage/push_rule.py
+++ b/synapse/storage/push_rule.py
@@ -151,11 +151,11 @@ class PushRuleStore(SQLBaseStore):
txn.execute(sql, (user_name, priority_class, new_rule_priority))
txn.call_after(
- self.get_push_rules_for_user.invalidate, user_name
+ self.get_push_rules_for_user.invalidate, (user_name,)
)
txn.call_after(
- self.get_push_rules_enabled_for_user.invalidate, user_name
+ self.get_push_rules_enabled_for_user.invalidate, (user_name,)
)
self._simple_insert_txn(
@@ -187,10 +187,10 @@ class PushRuleStore(SQLBaseStore):
new_rule['priority'] = new_prio
txn.call_after(
- self.get_push_rules_for_user.invalidate, user_name
+ self.get_push_rules_for_user.invalidate, (user_name,)
)
txn.call_after(
- self.get_push_rules_enabled_for_user.invalidate, user_name
+ self.get_push_rules_enabled_for_user.invalidate, (user_name,)
)
self._simple_insert_txn(
@@ -216,8 +216,8 @@ class PushRuleStore(SQLBaseStore):
desc="delete_push_rule",
)
- self.get_push_rules_for_user.invalidate(user_name)
- self.get_push_rules_enabled_for_user.invalidate(user_name)
+ self.get_push_rules_for_user.invalidate((user_name,))
+ self.get_push_rules_enabled_for_user.invalidate((user_name,))
@defer.inlineCallbacks
def set_push_rule_enabled(self, user_name, rule_id, enabled):
@@ -238,10 +238,10 @@ class PushRuleStore(SQLBaseStore):
{'id': new_id},
)
txn.call_after(
- self.get_push_rules_for_user.invalidate, user_name
+ self.get_push_rules_for_user.invalidate, (user_name,)
)
txn.call_after(
- self.get_push_rules_enabled_for_user.invalidate, user_name
+ self.get_push_rules_enabled_for_user.invalidate, (user_name,)
)
diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py
index 90e2606be2..4eaa088b36 100644
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@@ -131,7 +131,7 @@ class RegistrationStore(SQLBaseStore):
user_id
)
for r in rows:
- self.get_user_by_token.invalidate(r)
+ self.get_user_by_token.invalidate((r,))
@cached()
def get_user_by_token(self, token):
diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py
index 55dd3f6cfb..9f14f38f24 100644
--- a/synapse/storage/roommember.py
+++ b/synapse/storage/roommember.py
@@ -54,9 +54,9 @@ class RoomMemberStore(SQLBaseStore):
)
for event in events:
- txn.call_after(self.get_rooms_for_user.invalidate, event.state_key)
- txn.call_after(self.get_joined_hosts_for_room.invalidate, event.room_id)
- txn.call_after(self.get_users_in_room.invalidate, event.room_id)
+ txn.call_after(self.get_rooms_for_user.invalidate, (event.state_key,))
+ txn.call_after(self.get_joined_hosts_for_room.invalidate, (event.room_id,))
+ txn.call_after(self.get_users_in_room.invalidate, (event.room_id,))
def get_room_member(self, user_id, room_id):
"""Retrieve the current state of a room member.
diff --git a/synapse/storage/state.py b/synapse/storage/state.py
index 55c6d52890..a04731ae11 100644
--- a/synapse/storage/state.py
+++ b/synapse/storage/state.py
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from ._base import SQLBaseStore, cached, cachedInlineCallbacks
+from ._base import SQLBaseStore, cached, cachedInlineCallbacks, cachedList
from twisted.internet import defer
@@ -44,52 +44,26 @@ class StateStore(SQLBaseStore):
"""
@defer.inlineCallbacks
- def get_state_groups(self, event_ids):
+ def get_state_groups(self, room_id, event_ids):
""" Get the state groups for the given list of event_ids
The return value is a dict mapping group names to lists of events.
"""
+ if not event_ids:
+ defer.returnValue({})
- def f(txn):
- groups = set()
- for event_id in event_ids:
- group = self._simple_select_one_onecol_txn(
- txn,
- table="event_to_state_groups",
- keyvalues={"event_id": event_id},
- retcol="state_group",
- allow_none=True,
- )
- if group:
- groups.add(group)
-
- res = {}
- for group in groups:
- state_ids = self._simple_select_onecol_txn(
- txn,
- table="state_groups_state",
- keyvalues={"state_group": group},
- retcol="event_id",
- )
-
- res[group] = state_ids
-
- return res
-
- states = yield self.runInteraction(
- "get_state_groups",
- f,
+ event_to_groups = yield self._get_state_group_for_events(
+ room_id, event_ids,
)
- state_list = yield defer.gatherResults(
- [
- self._fetch_events_for_group(group, vals)
- for group, vals in states.items()
- ],
- consumeErrors=True,
- )
+ groups = set(event_to_groups.values())
- defer.returnValue(dict(state_list))
+ group_to_state = yield self._get_state_for_groups(groups)
+
+ defer.returnValue({
+ group: state_map.values()
+ for group, state_map in group_to_state.items()
+ })
@cached(num_args=1)
def _fetch_events_for_group(self, key, events):
@@ -205,64 +179,281 @@ class StateStore(SQLBaseStore):
events = yield self._get_events(event_ids, get_prev_content=False)
defer.returnValue(events)
+ def _get_state_groups_from_group(self, group, types):
+ def f(txn):
+ if types is not None:
+ where_clause = "AND (%s)" % (
+ " OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
+ )
+ else:
+ where_clause = ""
+
+ sql = (
+ "SELECT event_id FROM state_groups_state WHERE"
+ " state_group = ? %s"
+ ) % (where_clause,)
+
+ args = [group]
+ if types is not None:
+ args.extend([i for typ in types for i in typ])
+
+ txn.execute(sql, args)
+
+ return [r[0] for r in txn.fetchall()]
+
+ return self.runInteraction(
+ "_get_state_groups_from_group",
+ f,
+ )
+
+ def _get_state_groups_from_groups(self, groups_and_types):
+ def f(txn):
+ results = {}
+ for group, types in groups_and_types:
+ if types is not None:
+ where_clause = "AND (%s)" % (
+ " OR ".join(["(type = ? AND state_key = ?)"] * len(types)),
+ )
+ else:
+ where_clause = ""
+
+ sql = (
+ "SELECT event_id FROM state_groups_state WHERE"
+ " state_group = ? %s"
+ ) % (where_clause,)
+
+ args = [group]
+ if types is not None:
+ args.extend([i for typ in types for i in typ])
+
+ txn.execute(sql, args)
+
+ results[group] = [
+ r[0]
+ for r in txn.fetchall()
+ ]
+
+ return results
+
+ return self.runInteraction(
+ "_get_state_groups_from_groups",
+ f,
+ )
+
+ @cached(num_args=3, lru=True, max_entries=10000)
+ def _get_state_for_event_id(self, room_id, event_id, types):
+ def f(txn):
+ type_and_state_sql = " OR ".join([
+ "(type = ? AND state_key = ?)"
+ if typ[1] is not None
+ else "type = ?"
+ for typ in types
+ ])
+
+ sql = (
+ "SELECT e.event_id, sg.state_group, sg.event_id"
+ " FROM state_groups_state as sg"
+ " INNER JOIN event_to_state_groups as e"
+ " ON e.state_group = sg.state_group"
+ " WHERE e.event_id = ? AND (%s)"
+ ) % (type_and_state_sql,)
+
+ args = [event_id]
+ for typ, state_key in types:
+ args.extend(
+ [typ, state_key] if state_key is not None else [typ]
+ )
+ txn.execute(sql, args)
+
+ return event_id, [
+ r[0]
+ for r in txn.fetchall()
+ ]
+
+ return self.runInteraction(
+ "_get_state_for_event_id",
+ f,
+ )
+
@defer.inlineCallbacks
- def get_state_for_events(self, room_id, event_ids):
+ def get_state_for_events(self, room_id, event_ids, types):
+ """Given a list of event_ids and type tuples, return a list of state
+ dicts for each event. The state dicts will only have the type/state_keys
+ that are in the `types` list.
+
+ Args:
+ room_id (str)
+ event_ids (list)
+ types (list): List of (type, state_key) tuples which are used to
+ filter the state fetched. `state_key` may be None, which matches
+ any `state_key`
+
+ Returns:
+ deferred: A list of dicts corresponding to the event_ids given.
+ The dicts are mappings from (type, state_key) -> state_events
+ """
+ event_to_groups = yield self._get_state_group_for_events(
+ room_id, event_ids,
+ )
+
+ groups = set(event_to_groups.values())
+
+ group_to_state = yield self._get_state_for_groups(
+ groups, types
+ )
+
+ event_to_state = {
+ event_id: group_to_state[group]
+ for event_id, group in event_to_groups.items()
+ }
+
+ defer.returnValue([
+ event_to_state[event]
+ for event in event_ids
+ ])
+
+ @cached(num_args=2, lru=True, max_entries=100000)
+ def _get_state_group_for_event(self, room_id, event_id):
+ return self._simple_select_one_onecol(
+ table="event_to_state_groups",
+ keyvalues={
+ "event_id": event_id,
+ },
+ retcol="state_group",
+ allow_none=True,
+ desc="_get_state_group_for_event",
+ )
+
+ @cachedList(cache=_get_state_group_for_event.cache, list_name="event_ids", num_args=2)
+ def _get_state_group_for_events(self, room_id, event_ids):
def f(txn):
- groups = set()
- event_to_group = {}
+ results = {}
for event_id in event_ids:
- # TODO: Remove this loop.
- group = self._simple_select_one_onecol_txn(
+ results[event_id] = self._simple_select_one_onecol_txn(
txn,
table="event_to_state_groups",
- keyvalues={"event_id": event_id},
+ keyvalues={
+ "event_id": event_id,
+ },
retcol="state_group",
allow_none=True,
)
- if group:
- event_to_group[event_id] = group
- groups.add(group)
-
- group_to_state_ids = {}
- for group in groups:
- state_ids = self._simple_select_onecol_txn(
- txn,
- table="state_groups_state",
- keyvalues={"state_group": group},
- retcol="event_id",
- )
-
- group_to_state_ids[group] = state_ids
- return event_to_group, group_to_state_ids
+ return results
- res = yield self.runInteraction(
- "annotate_events_with_state_groups",
+ return self.runInteraction(
+ "_get_state_group_for_events",
f,
)
- event_to_group, group_to_state_ids = res
+ def _get_state_for_group_from_cache(self, group, types=None):
+ is_all, state_dict = self._state_group_cache.get(group)
+
+ type_to_key = {}
+ missing_types = set()
+ if types is not None:
+ for typ, state_key in types:
+ if state_key is None:
+ type_to_key[typ] = None
+ missing_types.add((typ, state_key))
+ else:
+ if type_to_key.get(typ, object()) is not None:
+ type_to_key.setdefault(typ, set()).add(state_key)
+
+ if (typ, state_key) not in state_dict:
+ missing_types.add((typ, state_key))
+
+ if is_all and types is None:
+ return state_dict, missing_types
+
+ if is_all or (types is not None and not missing_types):
+ sentinel = object()
+
+ def include(typ, state_key):
+ valid_state_keys = type_to_key.get(typ, sentinel)
+ if valid_state_keys is sentinel:
+ return False
+ if valid_state_keys is None:
+ return True
+ if state_key in valid_state_keys:
+ return True
+ return False
+
+ return {
+ k: v
+ for k, v in state_dict.items()
+ if v and include(k[0], k[1])
+ }, missing_types
+
+ return {}, missing_types
- state_list = yield defer.gatherResults(
- [
- self._fetch_events_for_group(group, vals)
- for group, vals in group_to_state_ids.items()
- ],
- consumeErrors=True,
+ @defer.inlineCallbacks
+ def _get_state_for_groups(self, groups, types=None):
+ results = {}
+ missing_groups_and_types = []
+ for group in groups:
+ state_dict, missing_types = self._get_state_for_group_from_cache(
+ group, types
+ )
+
+ if types is not None and not missing_types:
+ results[group] = {
+ key: value
+ for key, value in state_dict.items()
+ if value
+ }
+ else:
+ missing_groups_and_types.append((
+ group,
+ missing_types if types else None
+ ))
+
+ if not missing_groups_and_types:
+ defer.returnValue(results)
+
+ # Okay, so we have some missing_types, lets fetch them.
+ cache_seq_num = self._state_group_cache.sequence
+
+ group_state_dict = yield self._get_state_groups_from_groups(
+ missing_groups_and_types
)
- state_dict = {
- group: {
- (ev.type, ev.state_key): ev
- for ev in state
- }
- for group, state in state_list
+ state_events = yield self._get_events(
+ [e_id for l in group_state_dict.values() for e_id in l],
+ get_prev_content=False
+ )
+
+ state_events = {
+ e.event_id: e
+ for e in state_events
}
- defer.returnValue([
- state_dict.get(event_to_group.get(event, None), None)
- for event in event_ids
- ])
+ for group, state_ids in group_state_dict.items():
+ state_dict = {
+ key: None
+ for key in missing_types
+ }
+ evs = [state_events[e_id] for e_id in state_ids]
+ state_dict.update({
+ (e.type, e.state_key): e
+ for e in evs
+ })
+
+ # Update the cache
+ self._state_group_cache.update(
+ cache_seq_num,
+ key=group,
+ value=state_dict,
+ full=(types is None),
+ )
+
+ results[group] = {
+ key: value
+ for key, value in state_dict.items()
+ if value
+ }
+
+ defer.returnValue(results)
def _make_group_id(clock):
diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py
index af45fc5619..b59fe81004 100644
--- a/synapse/storage/stream.py
+++ b/synapse/storage/stream.py
@@ -35,7 +35,7 @@ what sort order was used:
from twisted.internet import defer
-from ._base import SQLBaseStore
+from ._base import SQLBaseStore, cachedInlineCallbacks
from synapse.api.constants import EventTypes
from synapse.types import RoomStreamToken
from synapse.util.logutils import log_function
@@ -299,9 +299,8 @@ class StreamStore(SQLBaseStore):
defer.returnValue((events, token))
- @defer.inlineCallbacks
- def get_recent_events_for_room(self, room_id, limit, end_token,
- with_feedback=False, from_token=None):
+ @cachedInlineCallbacks(num_args=4)
+ def get_recent_events_for_room(self, room_id, limit, end_token, from_token=None):
# TODO (erikj): Handle compressed feedback
end_token = RoomStreamToken.parse_stream_token(end_token)
diff --git a/synapse/util/dictionary_cache.py b/synapse/util/dictionary_cache.py
new file mode 100644
index 0000000000..38b131677c
--- /dev/null
+++ b/synapse/util/dictionary_cache.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from synapse.util.lrucache import LruCache
+from collections import namedtuple
+import threading
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+DictionaryEntry = namedtuple("DictionaryEntry", ("full", "value"))
+
+
+class DictionaryCache(object):
+
+ def __init__(self, name, max_entries=1000):
+ self.cache = LruCache(max_size=max_entries)
+
+ self.name = name
+ self.sequence = 0
+ self.thread = None
+ # caches_by_name[name] = self.cache
+
+ class Sentinel(object):
+ __slots__ = []
+
+ self.sentinel = Sentinel()
+
+ def check_thread(self):
+ expected_thread = self.thread
+ if expected_thread is None:
+ self.thread = threading.current_thread()
+ else:
+ if expected_thread is not threading.current_thread():
+ raise ValueError(
+ "Cache objects can only be accessed from the main thread"
+ )
+
+ def get(self, key, dict_keys=None):
+ try:
+ entry = self.cache.get(key, self.sentinel)
+ if entry is not self.sentinel:
+ # cache_counter.inc_hits(self.name)
+
+ if dict_keys is None:
+ return DictionaryEntry(entry.full, dict(entry.value))
+ else:
+ return DictionaryEntry(entry.full, {
+ k: entry.value[k]
+ for k in dict_keys
+ if k in entry.value
+ })
+
+ # cache_counter.inc_misses(self.name)
+ return DictionaryEntry(False, {})
+ except:
+ logger.exception("get failed")
+ raise
+
+ def invalidate(self, key):
+ self.check_thread()
+
+ # Increment the sequence number so that any SELECT statements that
+ # raced with the INSERT don't update the cache (SYN-369)
+ self.sequence += 1
+ self.cache.pop(key, None)
+
+ def invalidate_all(self):
+ self.check_thread()
+ self.sequence += 1
+ self.cache.clear()
+
+ def update(self, sequence, key, value, full=False):
+ try:
+ self.check_thread()
+ if self.sequence == sequence:
+ # Only update the cache if the caches sequence number matches the
+ # number that the cache had before the SELECT was started (SYN-369)
+ if full:
+ self._insert(key, value)
+ else:
+ self._update_or_insert(key, value)
+ except:
+ logger.exception("update failed")
+ raise
+
+ def _update_or_insert(self, key, value):
+ entry = self.cache.setdefault(key, DictionaryEntry(False, {}))
+ entry.value.update(value)
+
+ def _insert(self, key, value):
+ self.cache[key] = DictionaryEntry(True, value)
diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py
index 8fa305d18a..abee2f631d 100644
--- a/tests/storage/test__base.py
+++ b/tests/storage/test__base.py
@@ -42,12 +42,12 @@ class CacheTestCase(unittest.TestCase):
self.assertEquals(self.cache.get("foo"), 123)
def test_invalidate(self):
- self.cache.prefill("foo", 123)
- self.cache.invalidate("foo")
+ self.cache.prefill(("foo",), 123)
+ self.cache.invalidate(("foo",))
failed = False
try:
- self.cache.get("foo")
+ self.cache.get(("foo",))
except KeyError:
failed = True
@@ -141,7 +141,7 @@ class CacheDecoratorTestCase(unittest.TestCase):
self.assertEquals(callcount[0], 1)
- a.func.invalidate("foo")
+ a.func.invalidate(("foo",))
yield a.func("foo")
@@ -153,7 +153,7 @@ class CacheDecoratorTestCase(unittest.TestCase):
def func(self, key):
return key
- A().func.invalidate("what")
+ A().func.invalidate(("what",))
@defer.inlineCallbacks
def test_max_entries(self):
@@ -193,7 +193,7 @@ class CacheDecoratorTestCase(unittest.TestCase):
a = A()
- a.func.prefill("foo", ObservableDeferred(d))
+ a.func.prefill(("foo",), ObservableDeferred(d))
self.assertEquals(a.func("foo").result, d.result)
self.assertEquals(callcount[0], 0)
diff --git a/tests/test_state.py b/tests/test_state.py
index fea25f7021..5845358754 100644
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -69,7 +69,7 @@ class StateGroupStore(object):
self._next_group = 1
- def get_state_groups(self, event_ids):
+ def get_state_groups(self, room_id, event_ids):
groups = {}
for event_id in event_ids:
group = self._event_to_state_group.get(event_id)
diff --git a/tests/util/test_dict_cache.py b/tests/util/test_dict_cache.py
new file mode 100644
index 0000000000..79bc1225d6
--- /dev/null
+++ b/tests/util/test_dict_cache.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+# Copyright 2015 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from twisted.internet import defer
+from tests import unittest
+
+from synapse.util.dictionary_cache import DictionaryCache
+
+
+class DictCacheTestCase(unittest.TestCase):
+
+ def setUp(self):
+ self.cache = DictionaryCache("foobar")
+
+ def test_simple_cache_hit_full(self):
+ key = "test_simple_cache_hit_full"
+
+ v = self.cache.get(key)
+ self.assertEqual((False, {}), v)
+
+ seq = self.cache.sequence
+ test_value = {"test": "test_simple_cache_hit_full"}
+ self.cache.update(seq, key, test_value, full=True)
+
+ c = self.cache.get(key)
+ self.assertEqual(test_value, c.value)
+
+ def test_simple_cache_hit_partial(self):
+ key = "test_simple_cache_hit_partial"
+
+ seq = self.cache.sequence
+ test_value = {
+ "test": "test_simple_cache_hit_partial"
+ }
+ self.cache.update(seq, key, test_value, full=True)
+
+ c = self.cache.get(key, ["test"])
+ self.assertEqual(test_value, c.value)
+
+ def test_simple_cache_miss_partial(self):
+ key = "test_simple_cache_miss_partial"
+
+ seq = self.cache.sequence
+ test_value = {
+ "test": "test_simple_cache_miss_partial"
+ }
+ self.cache.update(seq, key, test_value, full=True)
+
+ c = self.cache.get(key, ["test2"])
+ self.assertEqual({}, c.value)
+
+ def test_simple_cache_hit_miss_partial(self):
+ key = "test_simple_cache_hit_miss_partial"
+
+ seq = self.cache.sequence
+ test_value = {
+ "test": "test_simple_cache_hit_miss_partial",
+ "test2": "test_simple_cache_hit_miss_partial2",
+ "test3": "test_simple_cache_hit_miss_partial3",
+ }
+ self.cache.update(seq, key, test_value, full=True)
+
+ c = self.cache.get(key, ["test2"])
+ self.assertEqual({"test2": "test_simple_cache_hit_miss_partial2"}, c.value)
+
+ def test_multi_insert(self):
+ key = "test_simple_cache_hit_miss_partial"
+
+ seq = self.cache.sequence
+ test_value_1 = {
+ "test": "test_simple_cache_hit_miss_partial",
+ }
+ self.cache.update(seq, key, test_value_1, full=False)
+
+ seq = self.cache.sequence
+ test_value_2 = {
+ "test2": "test_simple_cache_hit_miss_partial2",
+ }
+ self.cache.update(seq, key, test_value_2, full=False)
+
+ c = self.cache.get(key)
+ self.assertEqual(
+ {
+ "test": "test_simple_cache_hit_miss_partial",
+ "test2": "test_simple_cache_hit_miss_partial2",
+ },
+ c.value
+ )
|