diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 46af7fa473..ca36f07c20 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -24,6 +24,11 @@ from synapse.config.cache import add_resizable_cache
logger = logging.getLogger(__name__)
+
+# Whether to track estimated memory usage of the LruCaches.
+TRACK_MEMORY_USAGE = False
+
+
caches_by_name = {} # type: Dict[str, Sized]
collectors_by_name = {} # type: Dict[str, CacheMetric]
@@ -32,6 +37,11 @@ cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"])
cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"])
cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"])
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
+cache_memory_usage = Gauge(
+ "synapse_util_caches_cache_size_bytes",
+ "Estimated memory usage of the caches",
+ ["name"],
+)
response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"])
response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"])
@@ -52,6 +62,7 @@ class CacheMetric:
hits = attr.ib(default=0)
misses = attr.ib(default=0)
evicted_size = attr.ib(default=0)
+ memory_usage = attr.ib(default=None)
def inc_hits(self):
self.hits += 1
@@ -62,6 +73,19 @@ class CacheMetric:
def inc_evictions(self, size=1):
self.evicted_size += size
+ def inc_memory_usage(self, memory: int):
+ if self.memory_usage is None:
+ self.memory_usage = 0
+
+ self.memory_usage += memory
+
+ def dec_memory_usage(self, memory: int):
+ self.memory_usage -= memory
+
+ def clear_memory_usage(self):
+ if self.memory_usage is not None:
+ self.memory_usage = 0
+
def describe(self):
return []
@@ -81,6 +105,13 @@ class CacheMetric:
cache_total.labels(self._cache_name).set(self.hits + self.misses)
if getattr(self._cache, "max_size", None):
cache_max_size.labels(self._cache_name).set(self._cache.max_size)
+
+ if TRACK_MEMORY_USAGE:
+ # self.memory_usage can be None if nothing has been inserted
+ # into the cache yet.
+ cache_memory_usage.labels(self._cache_name).set(
+ self.memory_usage or 0
+ )
if self._collect_callback:
self._collect_callback()
except Exception as e:
diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py
index a21d34fcb4..1be675e014 100644
--- a/synapse/util/caches/lrucache.py
+++ b/synapse/util/caches/lrucache.py
@@ -17,8 +17,10 @@ from functools import wraps
from typing import (
Any,
Callable,
+ Collection,
Generic,
Iterable,
+ List,
Optional,
Type,
TypeVar,
@@ -30,9 +32,36 @@ from typing import (
from typing_extensions import Literal
from synapse.config import cache as cache_config
+from synapse.util import caches
from synapse.util.caches import CacheMetric, register_cache
from synapse.util.caches.treecache import TreeCache
+try:
+ from pympler.asizeof import Asizer
+
+ def _get_size_of(val: Any, *, recurse=True) -> int:
+ """Get an estimate of the size in bytes of the object.
+
+ Args:
+ val: The object to size.
+ recurse: If true will include referenced values in the size,
+ otherwise only sizes the given object.
+ """
+ # Ignore singleton values when calculating memory usage.
+ if val in ((), None, ""):
+ return 0
+
+ sizer = Asizer()
+ sizer.exclude_refs((), None, "")
+ return sizer.asizeof(val, limit=100 if recurse else 0)
+
+
+except ImportError:
+
+ def _get_size_of(val: Any, *, recurse=True) -> int:
+ return 0
+
+
# Function type: the type used for invalidation callbacks
FT = TypeVar("FT", bound=Callable[..., Any])
@@ -54,16 +83,69 @@ def enumerate_leaves(node, depth):
class _Node:
- __slots__ = ["prev_node", "next_node", "key", "value", "callbacks"]
+ __slots__ = ["prev_node", "next_node", "key", "value", "callbacks", "memory"]
def __init__(
- self, prev_node, next_node, key, value, callbacks: Optional[set] = None
+ self,
+ prev_node,
+ next_node,
+ key,
+ value,
+ callbacks: Collection[Callable[[], None]] = (),
):
self.prev_node = prev_node
self.next_node = next_node
self.key = key
self.value = value
- self.callbacks = callbacks or set()
+
+ # Set of callbacks to run when the node gets deleted. We store as a list
+ # rather than a set to keep memory usage down (and since we expect few
+ # entries per node, the performance of checking for duplication in a
+ # list vs using a set is negligible).
+ #
+ # Note that we store this as an optional list to keep the memory
+ # footprint down. Storing `None` is free as its a singleton, while empty
+ # lists are 56 bytes (and empty sets are 216 bytes, if we did the naive
+ # thing and used sets).
+ self.callbacks = None # type: Optional[List[Callable[[], None]]]
+
+ self.add_callbacks(callbacks)
+
+ self.memory = 0
+ if caches.TRACK_MEMORY_USAGE:
+ self.memory = (
+ _get_size_of(key)
+ + _get_size_of(value)
+ + _get_size_of(self.callbacks, recurse=False)
+ + _get_size_of(self, recurse=False)
+ )
+ self.memory += _get_size_of(self.memory, recurse=False)
+
+ def add_callbacks(self, callbacks: Collection[Callable[[], None]]) -> None:
+ """Add to stored list of callbacks, removing duplicates."""
+
+ if not callbacks:
+ return
+
+ if not self.callbacks:
+ self.callbacks = []
+
+ for callback in callbacks:
+ if callback not in self.callbacks:
+ self.callbacks.append(callback)
+
+ def run_and_clear_callbacks(self) -> None:
+ """Run all callbacks and clear the stored list of callbacks. Used when
+ the node is being deleted.
+ """
+
+ if not self.callbacks:
+ return
+
+ for callback in self.callbacks:
+ callback()
+
+ self.callbacks = None
class LruCache(Generic[KT, VT]):
@@ -177,10 +259,10 @@ class LruCache(Generic[KT, VT]):
self.len = synchronized(cache_len)
- def add_node(key, value, callbacks: Optional[set] = None):
+ def add_node(key, value, callbacks: Collection[Callable[[], None]] = ()):
prev_node = list_root
next_node = prev_node.next_node
- node = _Node(prev_node, next_node, key, value, callbacks or set())
+ node = _Node(prev_node, next_node, key, value, callbacks)
prev_node.next_node = node
next_node.prev_node = node
cache[key] = node
@@ -188,6 +270,9 @@ class LruCache(Generic[KT, VT]):
if size_callback:
cached_cache_len[0] += size_callback(node.value)
+ if caches.TRACK_MEMORY_USAGE and metrics:
+ metrics.inc_memory_usage(node.memory)
+
def move_node_to_front(node):
prev_node = node.prev_node
next_node = node.next_node
@@ -211,16 +296,18 @@ class LruCache(Generic[KT, VT]):
deleted_len = size_callback(node.value)
cached_cache_len[0] -= deleted_len
- for cb in node.callbacks:
- cb()
- node.callbacks.clear()
+ node.run_and_clear_callbacks()
+
+ if caches.TRACK_MEMORY_USAGE and metrics:
+ metrics.dec_memory_usage(node.memory)
+
return deleted_len
@overload
def cache_get(
key: KT,
default: Literal[None] = None,
- callbacks: Iterable[Callable[[], None]] = ...,
+ callbacks: Collection[Callable[[], None]] = ...,
update_metrics: bool = ...,
) -> Optional[VT]:
...
@@ -229,7 +316,7 @@ class LruCache(Generic[KT, VT]):
def cache_get(
key: KT,
default: T,
- callbacks: Iterable[Callable[[], None]] = ...,
+ callbacks: Collection[Callable[[], None]] = ...,
update_metrics: bool = ...,
) -> Union[T, VT]:
...
@@ -238,13 +325,13 @@ class LruCache(Generic[KT, VT]):
def cache_get(
key: KT,
default: Optional[T] = None,
- callbacks: Iterable[Callable[[], None]] = (),
+ callbacks: Collection[Callable[[], None]] = (),
update_metrics: bool = True,
):
node = cache.get(key, None)
if node is not None:
move_node_to_front(node)
- node.callbacks.update(callbacks)
+ node.add_callbacks(callbacks)
if update_metrics and metrics:
metrics.inc_hits()
return node.value
@@ -260,10 +347,8 @@ class LruCache(Generic[KT, VT]):
# We sometimes store large objects, e.g. dicts, which cause
# the inequality check to take a long time. So let's only do
# the check if we have some callbacks to call.
- if node.callbacks and value != node.value:
- for cb in node.callbacks:
- cb()
- node.callbacks.clear()
+ if value != node.value:
+ node.run_and_clear_callbacks()
# We don't bother to protect this by value != node.value as
# generally size_callback will be cheap compared with equality
@@ -273,7 +358,7 @@ class LruCache(Generic[KT, VT]):
cached_cache_len[0] -= size_callback(node.value)
cached_cache_len[0] += size_callback(value)
- node.callbacks.update(callbacks)
+ node.add_callbacks(callbacks)
move_node_to_front(node)
node.value = value
@@ -326,12 +411,14 @@ class LruCache(Generic[KT, VT]):
list_root.next_node = list_root
list_root.prev_node = list_root
for node in cache.values():
- for cb in node.callbacks:
- cb()
+ node.run_and_clear_callbacks()
cache.clear()
if size_callback:
cached_cache_len[0] = 0
+ if caches.TRACK_MEMORY_USAGE and metrics:
+ metrics.clear_memory_usage()
+
@synchronized
def cache_contains(key: KT) -> bool:
return key in cache
diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py
index cd82777f80..4f25cd1d26 100644
--- a/synapse/util/stringutils.py
+++ b/synapse/util/stringutils.py
@@ -220,3 +220,23 @@ def strtobool(val: str) -> bool:
return False
else:
raise ValueError("invalid truth value %r" % (val,))
+
+
+_BASE62 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+
+
+def base62_encode(num: int, minwidth: int = 1) -> str:
+ """Encode a number using base62
+
+ Args:
+ num: number to be encoded
+ minwidth: width to pad to, if the number is small
+ """
+ res = ""
+ while num:
+ num, rem = divmod(num, 62)
+ res = _BASE62[rem] + res
+
+ # pad to minimum width
+ pad = "0" * (minwidth - len(res))
+ return pad + res
|