diff --git a/changelog.d/3956.bugfix b/changelog.d/3956.bugfix
new file mode 100644
index 0000000000..b0828c9fc6
--- /dev/null
+++ b/changelog.d/3956.bugfix
@@ -0,0 +1 @@
+Fix exceptions from metrics handler
\ No newline at end of file
diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py
index 7b065b195e..f37d5bec08 100644
--- a/synapse/util/caches/__init__.py
+++ b/synapse/util/caches/__init__.py
@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import logging
import os
import six
@@ -20,6 +21,8 @@ from six.moves import intern
from prometheus_client.core import REGISTRY, Gauge, GaugeMetricFamily
+logger = logging.getLogger(__name__)
+
CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5))
@@ -76,16 +79,20 @@ def register_cache(cache_type, cache_name, cache):
return []
def collect(self):
- if cache_type == "response_cache":
- response_cache_size.labels(cache_name).set(len(cache))
- response_cache_hits.labels(cache_name).set(self.hits)
- response_cache_evicted.labels(cache_name).set(self.evicted_size)
- response_cache_total.labels(cache_name).set(self.hits + self.misses)
- else:
- cache_size.labels(cache_name).set(len(cache))
- cache_hits.labels(cache_name).set(self.hits)
- cache_evicted.labels(cache_name).set(self.evicted_size)
- cache_total.labels(cache_name).set(self.hits + self.misses)
+ try:
+ if cache_type == "response_cache":
+ response_cache_size.labels(cache_name).set(len(cache))
+ response_cache_hits.labels(cache_name).set(self.hits)
+ response_cache_evicted.labels(cache_name).set(self.evicted_size)
+ response_cache_total.labels(cache_name).set(self.hits + self.misses)
+ else:
+ cache_size.labels(cache_name).set(len(cache))
+ cache_hits.labels(cache_name).set(self.hits)
+ cache_evicted.labels(cache_name).set(self.evicted_size)
+ cache_total.labels(cache_name).set(self.hits + self.misses)
+ except Exception as e:
+ logger.warn("Error calculating metrics for %s: %s", cache_name, e)
+ raise
yield GaugeMetricFamily("__unused", "")
diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py
index 921a9c5b29..9af4ec4aa8 100644
--- a/synapse/util/caches/expiringcache.py
+++ b/synapse/util/caches/expiringcache.py
@@ -16,6 +16,8 @@
import logging
from collections import OrderedDict
+from six import itervalues
+
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.util.caches import register_cache
@@ -54,8 +56,6 @@ class ExpiringCache(object):
self.iterable = iterable
- self._size_estimate = 0
-
self.metrics = register_cache("expiring", cache_name, self)
if not self._expiry_ms:
@@ -74,16 +74,11 @@ class ExpiringCache(object):
now = self._clock.time_msec()
self._cache[key] = _CacheEntry(now, value)
- if self.iterable:
- self._size_estimate += len(value)
-
# Evict if there are now too many items
while self._max_len and len(self) > self._max_len:
_key, value = self._cache.popitem(last=False)
if self.iterable:
- removed_len = len(value.value)
- self.metrics.inc_evictions(removed_len)
- self._size_estimate -= removed_len
+ self.metrics.inc_evictions(len(value.value))
else:
self.metrics.inc_evictions()
@@ -134,7 +129,9 @@ class ExpiringCache(object):
for k in keys_to_delete:
value = self._cache.pop(k)
if self.iterable:
- self._size_estimate -= len(value.value)
+ self.metrics.inc_evictions(len(value.value))
+ else:
+ self.metrics.inc_evictions()
logger.debug(
"[%s] _prune_cache before: %d, after len: %d",
@@ -143,7 +140,7 @@ class ExpiringCache(object):
def __len__(self):
if self.iterable:
- return self._size_estimate
+ return sum(len(entry.value) for entry in itervalues(self._cache))
else:
return len(self._cache)
|