diff --git a/changelog.d/9881.feature b/changelog.d/9881.feature
new file mode 100644
index 0000000000..088a517e02
--- /dev/null
+++ b/changelog.d/9881.feature
@@ -0,0 +1 @@
+Add experimental option to track memory usage of the caches.
diff --git a/mypy.ini b/mypy.ini
index a40f705b76..ea655a0d4d 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -171,3 +171,6 @@ ignore_missing_imports = True
[mypy-txacme.*]
ignore_missing_imports = True
+
+[mypy-pympler.*]
+ignore_missing_imports = True
diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py
index a3fe9a3f38..f730cdbd78 100644
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@@ -454,6 +454,7 @@ def start(config_options):
config.server.update_user_directory = False
synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts
+ synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
if config.server.gc_seconds:
synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds
diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py
index 6a823da10d..b2501ee4d7 100644
--- a/synapse/app/homeserver.py
+++ b/synapse/app/homeserver.py
@@ -341,6 +341,7 @@ def setup(config_options):
sys.exit(0)
events.USE_FROZEN_DICTS = config.use_frozen_dicts
+ synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
if config.server.gc_seconds:
synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds
diff --git a/synapse/config/cache.py b/synapse/config/cache.py
index 41b9b3f51f..91165ee1ce 100644
--- a/synapse/config/cache.py
+++ b/synapse/config/cache.py
@@ -17,6 +17,8 @@ import re
import threading
from typing import Callable, Dict
+from synapse.python_dependencies import DependencyException, check_requirements
+
from ._base import Config, ConfigError
# The prefix for all cache factor-related environment variables
@@ -189,6 +191,15 @@ class CacheConfig(Config):
)
self.cache_factors[cache] = factor
+ self.track_memory_usage = cache_config.get("track_memory_usage", False)
+ if self.track_memory_usage:
+ try:
+ check_requirements("cache_memory")
+ except DependencyException as e:
+ raise ConfigError(
+ e.message # noqa: B306, DependencyException.message is a property
+ )
+
# Resize all caches (if necessary) with the new factors we've loaded
self.resize_all_caches()
diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py
index 2de946f464..d58eeeaa74 100644
--- a/synapse/python_dependencies.py
+++ b/synapse/python_dependencies.py
@@ -116,6 +116,8 @@ CONDITIONAL_REQUIREMENTS = {
# hiredis is not a *strict* dependency, but it makes things much faster.
# (if it is not installed, we fall back to slow code.)
"redis": ["txredisapi>=1.4.7", "hiredis"],
+ # Required to use experimental `caches.track_memory_usage` config option.
+ "cache_memory": ["pympler"],
}
ALL_OPTIONAL_REQUIREMENTS = set() # type: Set[str]
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 46af7fa473..ca36f07c20 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -24,6 +24,11 @@ from synapse.config.cache import add_resizable_cache
logger = logging.getLogger(__name__)
+
+# Whether to track estimated memory usage of the LruCaches.
+TRACK_MEMORY_USAGE = False
+
+
caches_by_name = {} # type: Dict[str, Sized]
collectors_by_name = {} # type: Dict[str, CacheMetric]
@@ -32,6 +37,11 @@ cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
+cache_memory_usage = Gauge(
+ "synapse_util_caches_cache_size_bytes",
+ "Estimated memory usage of the caches",
+ ["name"],
+)
response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
@@ -52,6 +62,7 @@ class CacheMetric:
hits = attr.ib(default=0)
misses = attr.ib(default=0)
evicted_size = attr.ib(default=0)
+ memory_usage = attr.ib(default=None)
def inc_hits(self):
self.hits += 1
@@ -62,6 +73,19 @@ class CacheMetric:
def inc_evictions(self, size=1):
self.evicted_size += size
+ def inc_memory_usage(self, memory: int):
+ if self.memory_usage is None:
+ self.memory_usage = 0
+
+ self.memory_usage += memory
+
+ def dec_memory_usage(self, memory: int):
+ self.memory_usage -= memory
+
+ def clear_memory_usage(self):
+ if self.memory_usage is not None:
+ self.memory_usage = 0
+
def describe(self):
return []
@@ -81,6 +105,13 @@ class CacheMetric:
cache_total.labels(self._cache_name).set(self.hits + self.misses)
if getattr(self._cache, "max_size", None):
cache_max_size.labels(self._cache_name).set(self._cache.max_size)
+
+ if TRACK_MEMORY_USAGE:
+ # self.memory_usage can be None if nothing has been inserted
+ # into the cache yet.
+ cache_memory_usage.labels(self._cache_name).set(
+ self.memory_usage or 0
+ )
if self._collect_callback:
self._collect_callback()
except Exception as e:
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index 10b0ec6b75..1be675e014 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -32,9 +32,36 @@ from typing import (
from typing_extensions import Literal
from synapse.config import cache as cache_config
+from synapse.util import caches
from synapse.util.caches import CacheMetric, register_cache
from synapse.util.caches.treecache import TreeCache
+try:
+ from pympler.asizeof import Asizer
+
+ def _get_size_of(val: Any, *, recurse=True) -> int:
+ """Get an estimate of the size in bytes of the object.
+
+ Args:
+ val: The object to size.
+ recurse: If true will include referenced values in the size,
+ otherwise only sizes the given object.
+ """
+ # Ignore singleton values when calculating memory usage.
+ if val in ((), None, ""):
+ return 0
+
+ sizer = Asizer()
+ sizer.exclude_refs((), None, "")
+ return sizer.asizeof(val, limit=100 if recurse else 0)
+
+
+except ImportError:
+
+ def _get_size_of(val: Any, *, recurse=True) -> int:
+ return 0
+
+
# Function type: the type used for invalidation callbacks
FT = TypeVar("FT", bound=Callable[..., Any])
@@ -56,7 +83,7 @@ def enumerate_leaves(node, depth):
class _Node:
- __slots__ = ["prev_node", "next_node", "key", "value", "callbacks"]
+ __slots__ = ["prev_node", "next_node", "key", "value", "callbacks", "memory"]
def __init__(
self,
@@ -84,6 +111,16 @@ class _Node:
self.add_callbacks(callbacks)
+ self.memory = 0
+ if caches.TRACK_MEMORY_USAGE:
+ self.memory = (
+ _get_size_of(key)
+ + _get_size_of(value)
+ + _get_size_of(self.callbacks, recurse=False)
+ + _get_size_of(self, recurse=False)
+ )
+ self.memory += _get_size_of(self.memory, recurse=False)
+
def add_callbacks(self, callbacks: Collection[Callable[[], None]]) -> None:
"""Add to stored list of callbacks, removing duplicates."""
@@ -233,6 +270,9 @@ class LruCache(Generic[KT, VT]):
if size_callback:
cached_cache_len[0] += size_callback(node.value)
+ if caches.TRACK_MEMORY_USAGE and metrics:
+ metrics.inc_memory_usage(node.memory)
+
def move_node_to_front(node):
prev_node = node.prev_node
next_node = node.next_node
@@ -258,6 +298,9 @@ class LruCache(Generic[KT, VT]):
node.run_and_clear_callbacks()
+ if caches.TRACK_MEMORY_USAGE and metrics:
+ metrics.dec_memory_usage(node.memory)
+
return deleted_len
@overload
@@ -373,6 +416,9 @@ class LruCache(Generic[KT, VT]):
if size_callback:
cached_cache_len[0] = 0
+ if caches.TRACK_MEMORY_USAGE and metrics:
+ metrics.clear_memory_usage()
+
@synchronized
def cache_contains(key: KT) -> bool:
return key in cache
|