From 3ee17585cd095e590096683395cfb9a017eac15e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 16 Oct 2020 15:51:57 +0100 Subject: Make LruCache register its own metrics (#8561) rather than have everything that instantiates an LruCache manage metrics separately, have LruCache do it itself. --- synapse/util/caches/deferred_cache.py | 43 +++++++++++------------------------ 1 file changed, 13 insertions(+), 30 deletions(-) (limited to 'synapse/util/caches/deferred_cache.py') diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index f728cd2cf2..91fdc8142d 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -24,7 +24,6 @@ from prometheus_client import Gauge from twisted.internet import defer from synapse.util.async_helpers import ObservableDeferred -from synapse.util.caches import register_cache from synapse.util.caches.lrucache import LruCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry @@ -54,10 +53,7 @@ class DeferredCache(Generic[KT, VT]): __slots__ = ( "cache", - "name", - "keylen", "thread", - "metrics", "_pending_deferred_cache", ) @@ -89,37 +85,27 @@ class DeferredCache(Generic[KT, VT]): cache_type() ) # type: MutableMapping[KT, CacheEntry] + def metrics_cb(): + cache_pending_metric.labels(name).set(len(self._pending_deferred_cache)) + # cache is used for completed results and maps to the result itself, rather than # a Deferred. self.cache = LruCache( max_size=max_entries, keylen=keylen, + cache_name=name, cache_type=cache_type, size_callback=(lambda d: len(d)) if iterable else None, - evicted_callback=self._on_evicted, + metrics_collection_callback=metrics_cb, apply_cache_factor_from_config=apply_cache_factor_from_config, ) - self.name = name - self.keylen = keylen self.thread = None # type: Optional[threading.Thread] - self.metrics = register_cache( - "cache", - name, - self.cache, - collect_callback=self._metrics_collection_callback, - ) @property def max_entries(self): return self.cache.max_size - def _on_evicted(self, evicted_count): - self.metrics.inc_evictions(evicted_count) - - def _metrics_collection_callback(self): - cache_pending_metric.labels(self.name).set(len(self._pending_deferred_cache)) - def check_thread(self): expected_thread = self.thread if expected_thread is None: @@ -154,21 +140,18 @@ class DeferredCache(Generic[KT, VT]): if val is not _Sentinel.sentinel: val.callbacks.update(callbacks) if update_metrics: - self.metrics.inc_hits() + m = self.cache.metrics + assert m # we always have a name, so should always have metrics + m.inc_hits() return val.deferred - val = self.cache.get(key, _Sentinel.sentinel, callbacks=callbacks) - if val is not _Sentinel.sentinel: - self.metrics.inc_hits() - return val - - if update_metrics: - self.metrics.inc_misses() - - if default is _Sentinel.sentinel: + val = self.cache.get( + key, default, callbacks=callbacks, update_metrics=update_metrics + ) + if val is _Sentinel.sentinel: raise KeyError() else: - return default + return val def set( self, -- cgit 1.5.1 From 0ec0bc3886bd72bdf2f64d455a7d777f4573a4f1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 16 Oct 2020 15:56:39 +0100 Subject: type annotations for LruCache --- synapse/api/auth.py | 4 +- synapse/push/push_rule_evaluator.py | 16 ++++---- synapse/util/caches/deferred_cache.py | 5 ++- synapse/util/caches/dictionary_cache.py | 22 ++++++---- synapse/util/caches/lrucache.py | 73 +++++++++++++++++++++++++++------ 5 files changed, 89 insertions(+), 31 deletions(-) (limited to 'synapse/util/caches/deferred_cache.py') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index eb6f418b13..bff87fabde 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -69,7 +69,9 @@ class Auth: self.store = hs.get_datastore() self.state = hs.get_state_handler() - self.token_cache = LruCache(10000, "token_cache") + self.token_cache = LruCache( + 10000, "token_cache" + ) # type: LruCache[str, Tuple[str, bool]] self._auth_blocking = AuthBlocking(self.hs) diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 4c95b149c5..854ffd625e 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -16,7 +16,7 @@ import logging import re -from typing import Any, Dict, List, Optional, Pattern, Union +from typing import Any, Dict, List, Optional, Pattern, Tuple, Union from synapse.events import EventBase from synapse.types import UserID @@ -173,19 +173,21 @@ class PushRuleEvaluatorForEvent: # Similar to _glob_matches, but do not treat display_name as a glob. r = regex_cache.get((display_name, False, True), None) if not r: - r = re.escape(display_name) - r = _re_word_boundary(r) - r = re.compile(r, flags=re.IGNORECASE) + r1 = re.escape(display_name) + r1 = _re_word_boundary(r1) + r = re.compile(r1, flags=re.IGNORECASE) regex_cache[(display_name, False, True)] = r - return r.search(body) + return bool(r.search(body)) def _get_value(self, dotted_key: str) -> Optional[str]: return self._value_cache.get(dotted_key, None) # Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches -regex_cache = LruCache(50000, "regex_push_cache") +regex_cache = LruCache( + 50000, "regex_push_cache" +) # type: LruCache[Tuple[str, bool, bool],Pattern] def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: @@ -203,7 +205,7 @@ def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: if not r: r = _glob_to_re(glob, word_boundary) regex_cache[(glob, True, word_boundary)] = r - return r.search(value) + return bool(r.search(value)) except re.error: logger.warning("Failed to parse glob to regex: %r", glob) return False diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index 91fdc8142d..4026e1f8fa 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -98,7 +98,7 @@ class DeferredCache(Generic[KT, VT]): size_callback=(lambda d: len(d)) if iterable else None, metrics_collection_callback=metrics_cb, apply_cache_factor_from_config=apply_cache_factor_from_config, - ) + ) # type: LruCache[KT, VT] self.thread = None # type: Optional[threading.Thread] @@ -240,11 +240,12 @@ class DeferredCache(Generic[KT, VT]): self.check_thread() if not isinstance(key, tuple): raise TypeError("The cache key must be a tuple not %r" % (type(key),)) + key = cast(KT, key) self.cache.del_multi(key) # if we have a pending lookup for this key, remove it from the # _pending_deferred_cache, as above - entry_dict = self._pending_deferred_cache.pop(cast(KT, key), None) + entry_dict = self._pending_deferred_cache.pop(key, None) if entry_dict is not None: for entry in iterate_tree_cache_entry(entry_dict): entry.invalidate() diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 8b426c005b..588d2d49f2 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -12,10 +12,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import enum import logging import threading from collections import namedtuple +from typing import Any from synapse.util.caches.lrucache import LruCache @@ -38,23 +39,26 @@ class DictionaryEntry(namedtuple("DictionaryEntry", ("full", "known_absent", "va return len(self.value) +class _Sentinel(enum.Enum): + # defining a sentinel in this way allows mypy to correctly handle the + # type of a dictionary lookup. + sentinel = object() + + class DictionaryCache: """Caches key -> dictionary lookups, supporting caching partial dicts, i.e. fetching a subset of dictionary keys for a particular key. """ def __init__(self, name, max_entries=1000): - self.cache = LruCache(max_size=max_entries, cache_name=name, size_callback=len) + self.cache = LruCache( + max_size=max_entries, cache_name=name, size_callback=len + ) # type: LruCache[Any, DictionaryEntry] self.name = name self.sequence = 0 self.thread = None - class Sentinel: - __slots__ = [] - - self.sentinel = Sentinel() - def check_thread(self): expected_thread = self.thread if expected_thread is None: @@ -76,8 +80,8 @@ class DictionaryCache: Returns: DictionaryEntry """ - entry = self.cache.get(key, self.sentinel) - if entry is not self.sentinel: + entry = self.cache.get(key, _Sentinel.sentinel) + if entry is not _Sentinel.sentinel: if dict_keys is None: return DictionaryEntry( entry.full, entry.known_absent, dict(entry.value) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index e4804f79e0..0eed53d3f4 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -15,12 +15,30 @@ import threading from functools import wraps -from typing import Callable, Optional, Type, Union +from typing import ( + Any, + Callable, + Generic, + Iterable, + Optional, + Type, + TypeVar, + Union, + cast, + overload, +) + +from typing_extensions import Literal from synapse.config import cache as cache_config from synapse.util.caches import CacheMetric, register_cache from synapse.util.caches.treecache import TreeCache +T = TypeVar("T") +FT = TypeVar("FT", bound=Callable[..., Any]) +KT = TypeVar("KT") +VT = TypeVar("VT") + def enumerate_leaves(node, depth): if depth == 0: @@ -42,7 +60,7 @@ class _Node: self.callbacks = callbacks -class LruCache: +class LruCache(Generic[KT, VT]): """ Least-recently-used cache, supporting prometheus metrics and invalidation callbacks. @@ -128,13 +146,13 @@ class LruCache: if metrics: metrics.inc_evictions(evicted_len) - def synchronized(f): + def synchronized(f: FT) -> FT: @wraps(f) def inner(*args, **kwargs): with lock: return f(*args, **kwargs) - return inner + return cast(FT, inner) cached_cache_len = [0] if size_callback is not None: @@ -188,8 +206,31 @@ class LruCache: node.callbacks.clear() return deleted_len + @overload + def cache_get( + key: KT, + default: Literal[None] = None, + callbacks: Iterable[Callable[[], None]] = ..., + update_metrics: bool = ..., + ) -> Optional[VT]: + ... + + @overload + def cache_get( + key: KT, + default: T, + callbacks: Iterable[Callable[[], None]] = ..., + update_metrics: bool = ..., + ) -> Union[T, VT]: + ... + @synchronized - def cache_get(key, default=None, callbacks=[], update_metrics=True): + def cache_get( + key: KT, + default=None, + callbacks: Iterable[Callable[[], None]] = [], + update_metrics: bool = True, + ): node = cache.get(key, None) if node is not None: move_node_to_front(node) @@ -203,7 +244,7 @@ class LruCache: return default @synchronized - def cache_set(key, value, callbacks=[]): + def cache_set(key: KT, value: VT, callbacks: Iterable[Callable[[], None]] = []): node = cache.get(key, None) if node is not None: # We sometimes store large objects, e.g. dicts, which cause @@ -232,7 +273,7 @@ class LruCache: evict() @synchronized - def cache_set_default(key, value): + def cache_set_default(key: KT, value: VT) -> VT: node = cache.get(key, None) if node is not None: return node.value @@ -241,8 +282,16 @@ class LruCache: evict() return value + @overload + def cache_pop(key: KT, default: Literal[None] = None) -> Union[None, VT]: + ... + + @overload + def cache_pop(key: KT, default: T) -> Union[T, VT]: + ... + @synchronized - def cache_pop(key, default=None): + def cache_pop(key: KT, default=None): node = cache.get(key, None) if node: delete_node(node) @@ -252,18 +301,18 @@ class LruCache: return default @synchronized - def cache_del_multi(key): + def cache_del_multi(key: KT) -> None: """ This will only work if constructed with cache_type=TreeCache """ popped = cache.pop(key) if popped is None: return - for leaf in enumerate_leaves(popped, keylen - len(key)): + for leaf in enumerate_leaves(popped, keylen - len(cast(tuple, key))): delete_node(leaf) @synchronized - def cache_clear(): + def cache_clear() -> None: list_root.next_node = list_root list_root.prev_node = list_root for node in cache.values(): @@ -274,7 +323,7 @@ class LruCache: cached_cache_len[0] = 0 @synchronized - def cache_contains(key): + def cache_contains(key: KT) -> bool: return key in cache self.sentinel = object() -- cgit 1.5.1 From 903d11c43a5df9f704e5dad4d14506a6470524fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Mon, 19 Oct 2020 15:00:12 +0100 Subject: Add `DeferredCache.get_immediate` method (#8568) * Add `DeferredCache.get_immediate` method A bunch of things that are currently calling `DeferredCache.get` are only really interested in the result if it's completed. We can optimise and simplify this case. * Remove unused 'default' parameter to DeferredCache.get() * another get_immediate instance --- changelog.d/8568.misc | 1 + synapse/push/bulk_push_rule_evaluator.py | 2 +- synapse/storage/databases/main/pusher.py | 2 +- synapse/storage/databases/main/receipts.py | 11 +-------- synapse/storage/databases/main/roommember.py | 2 +- synapse/util/caches/deferred_cache.py | 35 ++++++++++++++++++++-------- tests/util/caches/test_deferred_cache.py | 27 +++++++++++++++++---- 7 files changed, 53 insertions(+), 27 deletions(-) create mode 100644 changelog.d/8568.misc (limited to 'synapse/util/caches/deferred_cache.py') diff --git a/changelog.d/8568.misc b/changelog.d/8568.misc new file mode 100644 index 0000000000..0ed7db92d3 --- /dev/null +++ b/changelog.d/8568.misc @@ -0,0 +1 @@ +Add `get_immediate` method to `DeferredCache`. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index c440f2545c..a701defcdd 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -496,6 +496,6 @@ class _Invalidation(namedtuple("_Invalidation", ("cache", "room_id"))): # dedupe when we add callbacks to lru cache nodes, otherwise the number # of callbacks would grow. def __call__(self): - rules = self.cache.get(self.room_id, None, update_metrics=False) + rules = self.cache.get_immediate(self.room_id, None, update_metrics=False) if rules: rules.invalidate_all() diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index df8609b97b..7997242d90 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -303,7 +303,7 @@ class PusherStore(PusherWorkerStore): lock=False, ) - user_has_pusher = self.get_if_user_has_pusher.cache.get( + user_has_pusher = self.get_if_user_has_pusher.cache.get_immediate( (user_id,), None, update_metrics=False ) diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index 5cdf16521c..ca7917c989 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -25,7 +25,6 @@ from synapse.storage.database import DatabasePool from synapse.storage.util.id_generators import StreamIdGenerator from synapse.types import JsonDict from synapse.util import json_encoder -from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -413,18 +412,10 @@ class ReceiptsWorkerStore(SQLBaseStore, metaclass=abc.ABCMeta): if receipt_type != "m.read": return - # Returns either an ObservableDeferred or the raw result - res = self.get_users_with_read_receipts_in_room.cache.get( + res = self.get_users_with_read_receipts_in_room.cache.get_immediate( room_id, None, update_metrics=False ) - # first handle the ObservableDeferred case - if isinstance(res, ObservableDeferred): - if res.has_called(): - res = res.get_result() - else: - res = None - if res and user_id in res: # We'd only be adding to the set, so no point invalidating if the # user is already there diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 20fcdaa529..9b08b49862 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -531,7 +531,7 @@ class RoomMemberWorkerStore(EventsWorkerStore): # If we do then we can reuse that result and simply update it with # any membership changes in `delta_ids` if context.prev_group and context.delta_ids: - prev_res = self._get_joined_users_from_context.cache.get( + prev_res = self._get_joined_users_from_context.cache.get_immediate( (room_id, context.prev_group), None ) if prev_res and isinstance(prev_res, dict): diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index 4026e1f8fa..faeef75506 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -17,7 +17,16 @@ import enum import threading -from typing import Callable, Generic, Iterable, MutableMapping, Optional, TypeVar, cast +from typing import ( + Callable, + Generic, + Iterable, + MutableMapping, + Optional, + TypeVar, + Union, + cast, +) from prometheus_client import Gauge @@ -33,7 +42,7 @@ cache_pending_metric = Gauge( ["name"], ) - +T = TypeVar("T") KT = TypeVar("KT") VT = TypeVar("VT") @@ -119,21 +128,21 @@ class DeferredCache(Generic[KT, VT]): def get( self, key: KT, - default=_Sentinel.sentinel, callback: Optional[Callable[[], None]] = None, update_metrics: bool = True, - ): + ) -> Union[ObservableDeferred, VT]: """Looks the key up in the caches. Args: key(tuple) - default: What is returned if key is not in the caches. If not - specified then function throws KeyError instead callback(fn): Gets called when the entry in the cache is invalidated update_metrics (bool): whether to update the cache hit rate metrics Returns: Either an ObservableDeferred or the result itself + + Raises: + KeyError if the key is not found in the cache """ callbacks = [callback] if callback else [] val = self._pending_deferred_cache.get(key, _Sentinel.sentinel) @@ -145,13 +154,19 @@ class DeferredCache(Generic[KT, VT]): m.inc_hits() return val.deferred - val = self.cache.get( - key, default, callbacks=callbacks, update_metrics=update_metrics + val2 = self.cache.get( + key, _Sentinel.sentinel, callbacks=callbacks, update_metrics=update_metrics ) - if val is _Sentinel.sentinel: + if val2 is _Sentinel.sentinel: raise KeyError() else: - return val + return val2 + + def get_immediate( + self, key: KT, default: T, update_metrics: bool = True + ) -> Union[VT, T]: + """If we have a *completed* cached value, return it.""" + return self.cache.get(key, default, update_metrics=update_metrics) def set( self, diff --git a/tests/util/caches/test_deferred_cache.py b/tests/util/caches/test_deferred_cache.py index 9717be56b6..8a08ab6661 100644 --- a/tests/util/caches/test_deferred_cache.py +++ b/tests/util/caches/test_deferred_cache.py @@ -38,6 +38,22 @@ class DeferredCacheTestCase(unittest.TestCase): self.assertEquals(cache.get("foo"), 123) + def test_get_immediate(self): + cache = DeferredCache("test") + d1 = defer.Deferred() + cache.set("key1", d1) + + # get_immediate should return default + v = cache.get_immediate("key1", 1) + self.assertEqual(v, 1) + + # now complete the set + d1.callback(2) + + # get_immediate should return result + v = cache.get_immediate("key1", 1) + self.assertEqual(v, 2) + def test_invalidate(self): cache = DeferredCache("test") cache.prefill(("foo",), 123) @@ -80,9 +96,11 @@ class DeferredCacheTestCase(unittest.TestCase): # now do the invalidation cache.invalidate_all() - # lookup should return none - self.assertIsNone(cache.get("key1", None)) - self.assertIsNone(cache.get("key2", None)) + # lookup should fail + with self.assertRaises(KeyError): + cache.get("key1") + with self.assertRaises(KeyError): + cache.get("key2") # both callbacks should have been callbacked self.assertTrue(callback_record[0], "Invalidation callback for key1 not called") @@ -90,7 +108,8 @@ class DeferredCacheTestCase(unittest.TestCase): # letting the other lookup complete should do nothing d1.callback("result1") - self.assertIsNone(cache.get("key1", None)) + with self.assertRaises(KeyError): + cache.get("key1", None) def test_eviction(self): cache = DeferredCache( -- cgit 1.5.1 From 1f4269700c2353263a605856f28ded28501368e1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 16 Oct 2020 12:34:55 +0100 Subject: Push some deferred wrangling down into DeferredCache --- changelog.d/8572.misc | 1 + synapse/util/caches/deferred_cache.py | 57 +++++++++++++++++++++++++++----- synapse/util/caches/descriptors.py | 32 ++++-------------- tests/util/caches/test_deferred_cache.py | 18 +++++----- tests/util/caches/test_descriptors.py | 5 ++- 5 files changed, 67 insertions(+), 46 deletions(-) create mode 100644 changelog.d/8572.misc (limited to 'synapse/util/caches/deferred_cache.py') diff --git a/changelog.d/8572.misc b/changelog.d/8572.misc new file mode 100644 index 0000000000..ea2a6d340d --- /dev/null +++ b/changelog.d/8572.misc @@ -0,0 +1 @@ +Modify `DeferredCache.get()` to return `Deferred`s instead of `ObservableDeferred`s. diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index faeef75506..6c162e9f34 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -57,7 +57,7 @@ class DeferredCache(Generic[KT, VT]): """Wraps an LruCache, adding support for Deferred results. It expects that each entry added with set() will be a Deferred; likewise get() - may return an ObservableDeferred. + will return a Deferred. """ __slots__ = ( @@ -130,16 +130,22 @@ class DeferredCache(Generic[KT, VT]): key: KT, callback: Optional[Callable[[], None]] = None, update_metrics: bool = True, - ) -> Union[ObservableDeferred, VT]: + ) -> defer.Deferred: """Looks the key up in the caches. + For symmetry with set(), this method does *not* follow the synapse logcontext + rules: the logcontext will not be cleared on return, and the Deferred will run + its callbacks in the sentinel context. In other words: wrap the result with + make_deferred_yieldable() before `await`ing it. + Args: - key(tuple) - callback(fn): Gets called when the entry in the cache is invalidated + key: + callback: Gets called when the entry in the cache is invalidated update_metrics (bool): whether to update the cache hit rate metrics Returns: - Either an ObservableDeferred or the result itself + A Deferred which completes with the result. Note that this may later fail + if there is an ongoing set() operation which later completes with a failure. Raises: KeyError if the key is not found in the cache @@ -152,7 +158,7 @@ class DeferredCache(Generic[KT, VT]): m = self.cache.metrics assert m # we always have a name, so should always have metrics m.inc_hits() - return val.deferred + return val.deferred.observe() val2 = self.cache.get( key, _Sentinel.sentinel, callbacks=callbacks, update_metrics=update_metrics @@ -160,7 +166,7 @@ class DeferredCache(Generic[KT, VT]): if val2 is _Sentinel.sentinel: raise KeyError() else: - return val2 + return defer.succeed(val2) def get_immediate( self, key: KT, default: T, update_metrics: bool = True @@ -173,7 +179,36 @@ class DeferredCache(Generic[KT, VT]): key: KT, value: defer.Deferred, callback: Optional[Callable[[], None]] = None, - ) -> ObservableDeferred: + ) -> defer.Deferred: + """Adds a new entry to the cache (or updates an existing one). + + The given `value` *must* be a Deferred. + + First any existing entry for the same key is invalidated. Then a new entry + is added to the cache for the given key. + + Until the `value` completes, calls to `get()` for the key will also result in an + incomplete Deferred, which will ultimately complete with the same result as + `value`. + + If `value` completes successfully, subsequent calls to `get()` will then return + a completed deferred with the same result. If it *fails*, the cache is + invalidated and subequent calls to `get()` will raise a KeyError. + + If another call to `set()` happens before `value` completes, then (a) any + invalidation callbacks registered in the interim will be called, (b) any + `get()`s in the interim will continue to complete with the result from the + *original* `value`, (c) any future calls to `get()` will complete with the + result from the *new* `value`. + + It is expected that `value` does *not* follow the synapse logcontext rules - ie, + if it is incomplete, it runs its callbacks in the sentinel context. + + Args: + key: Key to be set + value: a deferred which will complete with a result to add to the cache + callback: An optional callback to be called when the entry is invalidated + """ if not isinstance(value, defer.Deferred): raise TypeError("not a Deferred") @@ -187,6 +222,8 @@ class DeferredCache(Generic[KT, VT]): if existing_entry: existing_entry.invalidate() + # XXX: why don't we invalidate the entry in `self.cache` yet? + self._pending_deferred_cache[key] = entry def compare_and_pop(): @@ -230,7 +267,9 @@ class DeferredCache(Generic[KT, VT]): # _pending_deferred_cache to the real cache. # observer.addCallbacks(cb, eb) - return observable + + # we return a new Deferred which will be called before any subsequent observers. + return observable.observe() def prefill(self, key: KT, value: VT, callback: Callable[[], None] = None): callbacks = [callback] if callback else [] diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 1f43886804..a4172345ef 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -23,7 +23,6 @@ from twisted.internet import defer from synapse.logging.context import make_deferred_yieldable, preserve_fn from synapse.util import unwrapFirstError -from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.deferred_cache import DeferredCache logger = logging.getLogger(__name__) @@ -156,7 +155,7 @@ class CacheDescriptor(_CacheDescriptorBase): keylen=self.num_args, tree=self.tree, iterable=self.iterable, - ) # type: DeferredCache[Tuple, Any] + ) # type: DeferredCache[CacheKey, Any] def get_cache_key_gen(args, kwargs): """Given some args/kwargs return a generator that resolves into @@ -208,26 +207,12 @@ class CacheDescriptor(_CacheDescriptorBase): kwargs["cache_context"] = _CacheContext.get_instance(cache, cache_key) try: - cached_result_d = cache.get(cache_key, callback=invalidate_callback) - - if isinstance(cached_result_d, ObservableDeferred): - observer = cached_result_d.observe() - else: - observer = defer.succeed(cached_result_d) - + ret = cache.get(cache_key, callback=invalidate_callback) except KeyError: ret = defer.maybeDeferred(preserve_fn(self.orig), obj, *args, **kwargs) + ret = cache.set(cache_key, ret, callback=invalidate_callback) - def onErr(f): - cache.invalidate(cache_key) - return f - - ret.addErrback(onErr) - - result_d = cache.set(cache_key, ret, callback=invalidate_callback) - observer = result_d.observe() - - return make_deferred_yieldable(observer) + return make_deferred_yieldable(ret) wrapped = cast(_CachedFunction, _wrapped) @@ -286,7 +271,7 @@ class CacheListDescriptor(_CacheDescriptorBase): def __get__(self, obj, objtype=None): cached_method = getattr(obj, self.cached_method_name) - cache = cached_method.cache + cache = cached_method.cache # type: DeferredCache[CacheKey, Any] num_args = cached_method.num_args @functools.wraps(self.orig) @@ -326,14 +311,11 @@ class CacheListDescriptor(_CacheDescriptorBase): for arg in list_args: try: res = cache.get(arg_to_cache_key(arg), callback=invalidate_callback) - if not isinstance(res, ObservableDeferred): - results[arg] = res - elif not res.has_succeeded(): - res = res.observe() + if not res.called: res.addCallback(update_results_dict, arg) cached_defers.append(res) else: - results[arg] = res.get_result() + results[arg] = res.result except KeyError: missing.add(arg) diff --git a/tests/util/caches/test_deferred_cache.py b/tests/util/caches/test_deferred_cache.py index 8a08ab6661..68d26128c1 100644 --- a/tests/util/caches/test_deferred_cache.py +++ b/tests/util/caches/test_deferred_cache.py @@ -13,15 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import unittest from functools import partial from twisted.internet import defer from synapse.util.caches.deferred_cache import DeferredCache +from tests.unittest import TestCase -class DeferredCacheTestCase(unittest.TestCase): + +class DeferredCacheTestCase(TestCase): def test_empty(self): cache = DeferredCache("test") failed = False @@ -36,7 +37,7 @@ class DeferredCacheTestCase(unittest.TestCase): cache = DeferredCache("test") cache.prefill("foo", 123) - self.assertEquals(cache.get("foo"), 123) + self.assertEquals(self.successResultOf(cache.get("foo")), 123) def test_get_immediate(self): cache = DeferredCache("test") @@ -82,16 +83,15 @@ class DeferredCacheTestCase(unittest.TestCase): d2 = defer.Deferred() cache.set("key2", d2, partial(record_callback, 1)) - # lookup should return observable deferreds - self.assertFalse(cache.get("key1").has_called()) - self.assertFalse(cache.get("key2").has_called()) + # lookup should return pending deferreds + self.assertFalse(cache.get("key1").called) + self.assertFalse(cache.get("key2").called) # let one of the lookups complete d2.callback("result2") - # for now at least, the cache will return real results rather than an - # observabledeferred - self.assertEqual(cache.get("key2"), "result2") + # now the cache will return a completed deferred + self.assertEqual(self.successResultOf(cache.get("key2")), "result2") # now do the invalidation cache.invalidate_all() diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py index 3d738afa7f..fc2663c02d 100644 --- a/tests/util/caches/test_descriptors.py +++ b/tests/util/caches/test_descriptors.py @@ -27,7 +27,6 @@ from synapse.logging.context import ( current_context, make_deferred_yieldable, ) -from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches import descriptors from synapse.util.caches.descriptors import cached @@ -419,9 +418,9 @@ class CacheDecoratorTestCase(unittest.HomeserverTestCase): a = A() - a.func.prefill(("foo",), ObservableDeferred(d)) + a.func.prefill(("foo",), 456) - self.assertEquals(a.func("foo").result, d.result) + self.assertEquals(a.func("foo").result, 456) self.assertEquals(callcount[0], 0) @defer.inlineCallbacks -- cgit 1.5.1 From 2b3af01791120de5d0b829395109b42870d9e465 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 16 Oct 2020 13:16:02 +0100 Subject: optimise DeferredCache.set --- changelog.d/8593.misc | 1 + synapse/util/caches/deferred_cache.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 changelog.d/8593.misc (limited to 'synapse/util/caches/deferred_cache.py') diff --git a/changelog.d/8593.misc b/changelog.d/8593.misc new file mode 100644 index 0000000000..d266ba19a4 --- /dev/null +++ b/changelog.d/8593.misc @@ -0,0 +1 @@ +Minor optimisations in caching code. diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index 6c162e9f34..fc01026285 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -214,9 +214,6 @@ class DeferredCache(Generic[KT, VT]): callbacks = [callback] if callback else [] self.check_thread() - observable = ObservableDeferred(value, consumeErrors=True) - observer = observable.observe() - entry = CacheEntry(deferred=observable, callbacks=callbacks) existing_entry = self._pending_deferred_cache.pop(key, None) if existing_entry: @@ -224,6 +221,18 @@ class DeferredCache(Generic[KT, VT]): # XXX: why don't we invalidate the entry in `self.cache` yet? + # we can save a whole load of effort if the deferred is ready. + if value.called: + self.cache.set(key, value.result, callbacks) + return value + + # otherwise, we'll add an entry to the _pending_deferred_cache for now, + # and add callbacks to add it to the cache properly later. + + observable = ObservableDeferred(value, consumeErrors=True) + observer = observable.observe() + entry = CacheEntry(deferred=observable, callbacks=callbacks) + self._pending_deferred_cache[key] = entry def compare_and_pop(): -- cgit 1.5.1 From c13820bcee5f23119b65f9386b7c03bd4a33acbe Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 21 Oct 2020 18:54:53 +0100 Subject: fix failure case --- synapse/util/caches/deferred_cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'synapse/util/caches/deferred_cache.py') diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index fc01026285..601305487c 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -31,6 +31,7 @@ from typing import ( from prometheus_client import Gauge from twisted.internet import defer +from twisted.python import failure from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.lrucache import LruCache @@ -223,7 +224,9 @@ class DeferredCache(Generic[KT, VT]): # we can save a whole load of effort if the deferred is ready. if value.called: - self.cache.set(key, value.result, callbacks) + result = value.result + if not isinstance(result, failure.Failure): + self.cache.set(key, result, callbacks) return value # otherwise, we'll add an entry to the _pending_deferred_cache for now, -- cgit 1.5.1