From 1fb9a2d0bf2506ca6e5343cb340a441585ca1c07 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 May 2021 16:53:45 +0100 Subject: Limit how often GC happens by time. (#9902) Synapse can be quite memory intensive, and unless care is taken to tune the GC thresholds it can end up thrashing, causing noticable performance problems for large servers. We fix this by limiting how often we GC a given generation, regardless of current counts/thresholds. This does not help with the reverse problem where the thresholds are set too high, but that should only happen in situations where they've been manually configured. Adds a `gc_min_seconds_between` config option to override the defaults. Fixes #9890. --- synapse/app/generic_worker.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'synapse/app/generic_worker.py') diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 1a15ceee81..a3fe9a3f38 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -455,6 +455,9 @@ def start(config_options): synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts + if config.server.gc_seconds: + synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds + hs = GenericWorkerServer( config.server_name, config=config, -- cgit 1.5.1 From ef889c98a6cde0cfa95f7fdaf7f99ec3c1e9bb7f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 5 May 2021 16:54:36 +0100 Subject: Optionally track memory usage of each LruCache (#9881) This will double count slightly in the presence of interned strings. It's off by default as it can consume a lot of resources. --- changelog.d/9881.feature | 1 + mypy.ini | 3 +++ synapse/app/generic_worker.py | 1 + synapse/app/homeserver.py | 1 + synapse/config/cache.py | 11 ++++++++++ synapse/python_dependencies.py | 2 ++ synapse/util/caches/__init__.py | 31 ++++++++++++++++++++++++++ synapse/util/caches/lrucache.py | 48 ++++++++++++++++++++++++++++++++++++++++- 8 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 changelog.d/9881.feature (limited to 'synapse/app/generic_worker.py') diff --git a/changelog.d/9881.feature b/changelog.d/9881.feature new file mode 100644 index 0000000000..088a517e02 --- /dev/null +++ b/changelog.d/9881.feature @@ -0,0 +1 @@ +Add experimental option to track memory usage of the caches. diff --git a/mypy.ini b/mypy.ini index a40f705b76..ea655a0d4d 100644 --- a/mypy.ini +++ b/mypy.ini @@ -171,3 +171,6 @@ ignore_missing_imports = True [mypy-txacme.*] ignore_missing_imports = True + +[mypy-pympler.*] +ignore_missing_imports = True diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index a3fe9a3f38..f730cdbd78 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -454,6 +454,7 @@ def start(config_options): config.server.update_user_directory = False synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts + synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage if config.server.gc_seconds: synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 6a823da10d..b2501ee4d7 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -341,6 +341,7 @@ def setup(config_options): sys.exit(0) events.USE_FROZEN_DICTS = config.use_frozen_dicts + synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage if config.server.gc_seconds: synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 41b9b3f51f..91165ee1ce 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -17,6 +17,8 @@ import re import threading from typing import Callable, Dict +from synapse.python_dependencies import DependencyException, check_requirements + from ._base import Config, ConfigError # The prefix for all cache factor-related environment variables @@ -189,6 +191,15 @@ class CacheConfig(Config): ) self.cache_factors[cache] = factor + self.track_memory_usage = cache_config.get("track_memory_usage", False) + if self.track_memory_usage: + try: + check_requirements("cache_memory") + except DependencyException as e: + raise ConfigError( + e.message # noqa: B306, DependencyException.message is a property + ) + # Resize all caches (if necessary) with the new factors we've loaded self.resize_all_caches() diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 2de946f464..d58eeeaa74 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -116,6 +116,8 @@ CONDITIONAL_REQUIREMENTS = { # hiredis is not a *strict* dependency, but it makes things much faster. # (if it is not installed, we fall back to slow code.) "redis": ["txredisapi>=1.4.7", "hiredis"], + # Required to use experimental `caches.track_memory_usage` config option. + "cache_memory": ["pympler"], } ALL_OPTIONAL_REQUIREMENTS = set() # type: Set[str] diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 46af7fa473..ca36f07c20 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -24,6 +24,11 @@ from synapse.config.cache import add_resizable_cache logger = logging.getLogger(__name__) + +# Whether to track estimated memory usage of the LruCaches. +TRACK_MEMORY_USAGE = False + + caches_by_name = {} # type: Dict[str, Sized] collectors_by_name = {} # type: Dict[str, CacheMetric] @@ -32,6 +37,11 @@ cache_hits = Gauge("synapse_util_caches_cache:hits", "", ["name"]) cache_evicted = Gauge("synapse_util_caches_cache:evicted_size", "", ["name"]) cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"]) cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"]) +cache_memory_usage = Gauge( + "synapse_util_caches_cache_size_bytes", + "Estimated memory usage of the caches", + ["name"], +) response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"]) response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"]) @@ -52,6 +62,7 @@ class CacheMetric: hits = attr.ib(default=0) misses = attr.ib(default=0) evicted_size = attr.ib(default=0) + memory_usage = attr.ib(default=None) def inc_hits(self): self.hits += 1 @@ -62,6 +73,19 @@ class CacheMetric: def inc_evictions(self, size=1): self.evicted_size += size + def inc_memory_usage(self, memory: int): + if self.memory_usage is None: + self.memory_usage = 0 + + self.memory_usage += memory + + def dec_memory_usage(self, memory: int): + self.memory_usage -= memory + + def clear_memory_usage(self): + if self.memory_usage is not None: + self.memory_usage = 0 + def describe(self): return [] @@ -81,6 +105,13 @@ class CacheMetric: cache_total.labels(self._cache_name).set(self.hits + self.misses) if getattr(self._cache, "max_size", None): cache_max_size.labels(self._cache_name).set(self._cache.max_size) + + if TRACK_MEMORY_USAGE: + # self.memory_usage can be None if nothing has been inserted + # into the cache yet. + cache_memory_usage.labels(self._cache_name).set( + self.memory_usage or 0 + ) if self._collect_callback: self._collect_callback() except Exception as e: diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 10b0ec6b75..1be675e014 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -32,9 +32,36 @@ from typing import ( from typing_extensions import Literal from synapse.config import cache as cache_config +from synapse.util import caches from synapse.util.caches import CacheMetric, register_cache from synapse.util.caches.treecache import TreeCache +try: + from pympler.asizeof import Asizer + + def _get_size_of(val: Any, *, recurse=True) -> int: + """Get an estimate of the size in bytes of the object. + + Args: + val: The object to size. + recurse: If true will include referenced values in the size, + otherwise only sizes the given object. + """ + # Ignore singleton values when calculating memory usage. + if val in ((), None, ""): + return 0 + + sizer = Asizer() + sizer.exclude_refs((), None, "") + return sizer.asizeof(val, limit=100 if recurse else 0) + + +except ImportError: + + def _get_size_of(val: Any, *, recurse=True) -> int: + return 0 + + # Function type: the type used for invalidation callbacks FT = TypeVar("FT", bound=Callable[..., Any]) @@ -56,7 +83,7 @@ def enumerate_leaves(node, depth): class _Node: - __slots__ = ["prev_node", "next_node", "key", "value", "callbacks"] + __slots__ = ["prev_node", "next_node", "key", "value", "callbacks", "memory"] def __init__( self, @@ -84,6 +111,16 @@ class _Node: self.add_callbacks(callbacks) + self.memory = 0 + if caches.TRACK_MEMORY_USAGE: + self.memory = ( + _get_size_of(key) + + _get_size_of(value) + + _get_size_of(self.callbacks, recurse=False) + + _get_size_of(self, recurse=False) + ) + self.memory += _get_size_of(self.memory, recurse=False) + def add_callbacks(self, callbacks: Collection[Callable[[], None]]) -> None: """Add to stored list of callbacks, removing duplicates.""" @@ -233,6 +270,9 @@ class LruCache(Generic[KT, VT]): if size_callback: cached_cache_len[0] += size_callback(node.value) + if caches.TRACK_MEMORY_USAGE and metrics: + metrics.inc_memory_usage(node.memory) + def move_node_to_front(node): prev_node = node.prev_node next_node = node.next_node @@ -258,6 +298,9 @@ class LruCache(Generic[KT, VT]): node.run_and_clear_callbacks() + if caches.TRACK_MEMORY_USAGE and metrics: + metrics.dec_memory_usage(node.memory) + return deleted_len @overload @@ -373,6 +416,9 @@ class LruCache(Generic[KT, VT]): if size_callback: cached_cache_len[0] = 0 + if caches.TRACK_MEMORY_USAGE and metrics: + metrics.clear_memory_usage() + @synchronized def cache_contains(key: KT) -> bool: return key in cache -- cgit 1.5.1