summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--changelog.d/10829.misc1
-rw-r--r--synapse/util/caches/__init__.py31
-rw-r--r--synapse/util/caches/expiringcache.py10
-rw-r--r--synapse/util/caches/lrucache.py4
4 files changed, 31 insertions, 15 deletions
diff --git a/changelog.d/10829.misc b/changelog.d/10829.misc
new file mode 100644
index 0000000000..ac5fd6b047
--- /dev/null
+++ b/changelog.d/10829.misc
@@ -0,0 +1 @@
+Track cache eviction rates more finely in Prometheus' monitoring.
\ No newline at end of file
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index cab1bf0c15..df4d61e4b6 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -12,8 +12,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+import collections
 import logging
+import typing
+from enum import Enum, auto
 from sys import intern
 from typing import Callable, Dict, Optional, Sized
 
@@ -34,7 +36,7 @@ collectors_by_name: Dict[str, "CacheMetric"] = {}
 
 cache_size = Gauge("synapse_util_caches_cache:size", "", ["name"])
 cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
-cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
+cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name", "reason"])
 cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
 cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
 cache_memory_usage = Gauge(
@@ -46,11 +48,16 @@ cache_memory_usage = Gauge(
 response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
 response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
 response_cache_evicted = Gauge(
-    "synapse_util_caches_response_cache:evicted_size", "", ["name"]
+    "synapse_util_caches_response_cache:evicted_size", "", ["name", "reason"]
 )
 response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"])
 
 
+class EvictionReason(Enum):
+    size = auto()
+    time = auto()
+
+
 @attr.s(slots=True)
 class CacheMetric:
 
@@ -61,7 +68,9 @@ class CacheMetric:
 
     hits = attr.ib(default=0)
     misses = attr.ib(default=0)
-    evicted_size = attr.ib(default=0)
+    eviction_size_by_reason: typing.Counter[EvictionReason] = attr.ib(
+        factory=collections.Counter
+    )
     memory_usage = attr.ib(default=None)
 
     def inc_hits(self) -> None:
@@ -70,8 +79,8 @@ class CacheMetric:
     def inc_misses(self) -> None:
         self.misses += 1
 
-    def inc_evictions(self, size: int = 1) -> None:
-        self.evicted_size += size
+    def inc_evictions(self, reason: EvictionReason, size: int = 1) -> None:
+        self.eviction_size_by_reason[reason] += size
 
     def inc_memory_usage(self, memory: int) -> None:
         if self.memory_usage is None:
@@ -94,14 +103,20 @@ class CacheMetric:
             if self._cache_type == "response_cache":
                 response_cache_size.labels(self._cache_name).set(len(self._cache))
                 response_cache_hits.labels(self._cache_name).set(self.hits)
-                response_cache_evicted.labels(self._cache_name).set(self.evicted_size)
+                for reason in EvictionReason:
+                    response_cache_evicted.labels(self._cache_name, reason.name).set(
+                        self.eviction_size_by_reason[reason]
+                    )
                 response_cache_total.labels(self._cache_name).set(
                     self.hits + self.misses
                 )
             else:
                 cache_size.labels(self._cache_name).set(len(self._cache))
                 cache_hits.labels(self._cache_name).set(self.hits)
-                cache_evicted.labels(self._cache_name).set(self.evicted_size)
+                for reason in EvictionReason:
+                    cache_evicted.labels(self._cache_name, reason.name).set(
+                        self.eviction_size_by_reason[reason]
+                    )
                 cache_total.labels(self._cache_name).set(self.hits + self.misses)
                 if getattr(self._cache, "max_size", None):
                     cache_max_size.labels(self._cache_name).set(self._cache.max_size)
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index bde16b8577..c3f72aa06d 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -22,7 +22,7 @@ from typing_extensions import Literal
 from synapse.config import cache as cache_config
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.util import Clock
-from synapse.util.caches import register_cache
+from synapse.util.caches import EvictionReason, register_cache
 
 logger = logging.getLogger(__name__)
 
@@ -98,9 +98,9 @@ class ExpiringCache(Generic[KT, VT]):
         while self._max_size and len(self) > self._max_size:
             _key, value = self._cache.popitem(last=False)
             if self.iterable:
-                self.metrics.inc_evictions(len(value.value))
+                self.metrics.inc_evictions(EvictionReason.size, len(value.value))
             else:
-                self.metrics.inc_evictions()
+                self.metrics.inc_evictions(EvictionReason.size)
 
     def __getitem__(self, key: KT) -> VT:
         try:
@@ -175,9 +175,9 @@ class ExpiringCache(Generic[KT, VT]):
         for k in keys_to_delete:
             value = self._cache.pop(k)
             if self.iterable:
-                self.metrics.inc_evictions(len(value.value))
+                self.metrics.inc_evictions(EvictionReason.time, len(value.value))
             else:
-                self.metrics.inc_evictions()
+                self.metrics.inc_evictions(EvictionReason.time)
 
         logger.debug(
             "[%s] _prune_cache before: %d, after len: %d",
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 39dce9dd41..ea6e8dc8d1 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -40,7 +40,7 @@ from twisted.internet.interfaces import IReactorTime
 from synapse.config import cache as cache_config
 from synapse.metrics.background_process_metrics import wrap_as_background_process
 from synapse.util import Clock, caches
-from synapse.util.caches import CacheMetric, register_cache
+from synapse.util.caches import CacheMetric, EvictionReason, register_cache
 from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry
 from synapse.util.linked_list import ListNode
 
@@ -403,7 +403,7 @@ class LruCache(Generic[KT, VT]):
                 evicted_len = delete_node(node)
                 cache.pop(node.key, None)
                 if metrics:
-                    metrics.inc_evictions(evicted_len)
+                    metrics.inc_evictions(EvictionReason.size, evicted_len)
 
         def synchronized(f: FT) -> FT:
             @wraps(f)