summary refs log tree commit diff
diff options
context:
space:
mode:
authorErik Johnston <erik@matrix.org>2019-11-22 18:19:55 +0000
committerErik Johnston <erik@matrix.org>2019-11-22 18:19:55 +0000
commit9cca5ee743c0f410d84ecfbfe4983f1d695cb503 (patch)
tree91f84a7b290a0f4b229d924f5ece4876e2b618b8
parentFix link to user_dir_populate.sql in the user directory docs (#6388) (diff)
downloadsynapse-9cca5ee743c0f410d84ecfbfe4983f1d695cb503.tar.xz
Track cache hit ratios
-rw-r--r--synapse/util/caches/descriptors.py97
1 files changed, 96 insertions, 1 deletions
diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py

index 84f5ae22c3..3a20efc566 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py
@@ -16,13 +16,16 @@ import functools import inspect import logging +import math import threading +from collections import deque from typing import Any, Tuple, Union, cast from weakref import WeakValueDictionary from six import itervalues -from prometheus_client import Gauge +from bloom_filter import BloomFilter +from prometheus_client import Gauge, Histogram from typing_extensions import Protocol from twisted.internet import defer @@ -38,6 +41,13 @@ from . import register_cache logger = logging.getLogger(__name__) +cache_size_counts = Histogram( + "synapse_util_caches_miss_rate", + "", + ["name"], + buckets=[0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2, "+Inf"], +) + CacheKey = Union[Tuple, Any] @@ -87,6 +97,8 @@ class Cache(object): "thread", "metrics", "_pending_deferred_cache", + "ratio_tracking", + "ratio_metric", ) def __init__(self, name, max_entries=1000, keylen=1, tree=False, iterable=False): @@ -104,6 +116,9 @@ class Cache(object): self.name = name self.keylen = keylen self.thread = None + + self.ratio_tracking = CacheHitRatioTracking(max_entries) + self.metrics = register_cache( "cache", name, @@ -111,6 +126,8 @@ class Cache(object): collect_callback=self._metrics_collection_callback, ) + self.ratio_metric = cache_size_counts.labels(name) + def _on_evicted(self, evicted_count): self.metrics.inc_evictions(evicted_count) @@ -148,6 +165,11 @@ class Cache(object): self.metrics.inc_hits() return val.deferred + ratio = self.ratio_tracking.add(str(hash(key))) + if ratio is None: + ratio = 10 + self.ratio_metric.observe(ratio) + val = self.cache.get(key, _CacheSentinel, callbacks=callbacks) if val is not _CacheSentinel: self.metrics.inc_hits() @@ -724,3 +746,76 @@ def cachedList(cached_method_name, list_name, num_args=None, inlineCallbacks=Fal num_args=num_args, inlineCallbacks=inlineCallbacks, ) + + +class CacheHitRatioTracking(object): + def __init__(self, target_size, buckets=20, error_rate=0.01): + self._bucket_size = 2 * target_size / buckets + self._error_rate = error_rate + + self._target_size = target_size + + self._buckets = deque( + ( + [ + BloomFilter( + max_elements=self._target_size, error_rate=self._error_rate + ), + 0, + ] + for _ in range(buckets) + ), + maxlen=buckets, + ) + + def add(self, key): + found_in_bucket = None + + for i, (bucket, _) in enumerate(self._buckets): + if key in bucket: + found_in_bucket = i + break + else: + self._buckets[i][1] += 1 + + self._buckets[0][0].add(key) + + while self._buckets[-1][1] > 2 * self._target_size: + self._buckets.pop() + + if self._buckets[0][1] > self._bucket_size: + self._buckets.appendleft( + [ + BloomFilter( + max_elements=self._target_size, error_rate=self._error_rate + ), + 0, + ] + ) + + if found_in_bucket is not None: + return self._buckets[found_in_bucket][1] / self._target_size + + +def _count_set_bits(n): + count = 0 + while n: + count += n & 1 + n >>= 1 + return count + + +def _num_bits_set(bloom): + count = 0 + for c in bloom.backend.array_: + if c == 0: + continue + count += _count_set_bits(c) + return count + + +def _get_estimate_bloom_size(bloom): + X = _num_bits_set(bloom) + m = bloom.num_bits_m + k = bloom.num_probes_k + return -(m / k) * math.log(1 - X / m)