From b0cdf097f4393fc288d6d31bb9b37bf921a35a1c Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 16:04:46 +0000 Subject: Sprinkle some CacheCounterMetrics around the synapse.storage layer --- synapse/storage/_base.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 3ea7382760..804655e34d 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -20,6 +20,7 @@ from synapse.events.utils import prune_event from synapse.util.logutils import log_function from synapse.util.logcontext import PreserveLoggingContext, LoggingContext from synapse.util.lrucache import LruCache +import synapse.metrics from twisted.internet import defer @@ -31,6 +32,8 @@ import time logger = logging.getLogger(__name__) +metrics = synapse.metrics.get_metrics_for("synapse.storage") + sql_logger = logging.getLogger("synapse.storage.SQL") transaction_logger = logging.getLogger("synapse.storage.txn") @@ -56,6 +59,8 @@ def cached(max_entries=1000): def wrap(orig): cache = OrderedDict() + counter = metrics.register_cachecounter(orig.__name__) + def prefill(key, value): while len(cache) > max_entries: cache.popitem(last=False) @@ -65,8 +70,10 @@ def cached(max_entries=1000): @defer.inlineCallbacks def wrapped(self, key): if key in cache: + counter.inc_hits() defer.returnValue(cache[key]) + counter.inc_misses() ret = yield orig(self, key) prefill(key, ret) defer.returnValue(ret) @@ -176,6 +183,9 @@ class SQLBaseStore(object): self._get_event_counters = PerformanceCounters() self._get_event_cache = LruCache(hs.config.event_cache_size) + self._get_event_cache_counter = metrics.register_cachecounter( + "get_event" + ) def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() @@ -644,8 +654,12 @@ class SQLBaseStore(object): try: # Separate cache entries for each way to invoke _get_event_txn - return cache[(check_redacted, get_prev_content, allow_rejected)] + ret = cache[(check_redacted, get_prev_content, allow_rejected)] + + self._get_event_cache_counter.inc_hits() + return ret except KeyError: + self._get_event_cache_counter.inc_misses() pass finally: start_time = update_counter("event_cache", start_time) -- cgit 1.5.1 From 8664599af77ba0ed6268b3112174dc8e0c91101b Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 17:34:23 +0000 Subject: Rename CacheCounterMetric to just CacheMetric; add a CallbackMetric component to give the size of the cache --- synapse/metrics/__init__.py | 6 +++--- synapse/metrics/metric.py | 13 +++++++++---- synapse/storage/_base.py | 6 +++--- tests/metrics/test_metric.py | 24 +++++++++++++++--------- 4 files changed, 30 insertions(+), 19 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index d967b04eee..442fd70cdf 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -15,7 +15,7 @@ import logging -from .metric import CounterMetric, CallbackMetric, CacheCounterMetric +from .metric import CounterMetric, CallbackMetric, CacheMetric logger = logging.getLogger(__name__) @@ -57,10 +57,10 @@ class Metrics(object): return metric - def register_cachecounter(self, name, *args, **kwargs): + def register_cache(self, name, *args, **kwargs): full_name = "%s.%s" % (self.name_prefix, name) - metric = CacheCounterMetric(full_name, *args, **kwargs) + metric = CacheMetric(full_name, *args, **kwargs) self._register(metric) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 8a497fc154..7e47f86155 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -76,19 +76,24 @@ class CallbackMetric(BaseMetric): # TODO(paul): work out something we can do with keys and vectors return ["%s %d" % (self.name, self.callback())] -class CacheCounterMetric(object): +class CacheMetric(object): """A combination of two CounterMetrics, one to count cache hits and one to - count misses. + count misses, and a callback metric to yield the current size. This metric generates standard metric name pairs, so that monitoring rules can easily be applied to measure hit ratio.""" - def __init__(self, name, keys=[]): + def __init__(self, name, size_callback, keys=[]): self.name = name self.hits = CounterMetric(name + ":hits", keys=keys) self.misses = CounterMetric(name + ":misses", keys=keys) + self.size = CallbackMetric(name + ":size", + callback=size_callback, + keys=keys, + ) + def inc_hits(self, *values): self.hits.inc(*values) @@ -96,4 +101,4 @@ class CacheCounterMetric(object): self.misses.inc(*values) def render(self): - return self.hits.render() + self.misses.render() + return self.hits.render() + self.misses.render() + self.size.render() diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 804655e34d..d3c2bc7bfb 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -59,7 +59,7 @@ def cached(max_entries=1000): def wrap(orig): cache = OrderedDict() - counter = metrics.register_cachecounter(orig.__name__) + counter = metrics.register_cache(orig.__name__, lambda: len(cache)) def prefill(key, value): while len(cache) > max_entries: @@ -183,8 +183,8 @@ class SQLBaseStore(object): self._get_event_counters = PerformanceCounters() self._get_event_cache = LruCache(hs.config.event_cache_size) - self._get_event_cache_counter = metrics.register_cachecounter( - "get_event" + self._get_event_cache_counter = metrics.register_cache("get_event", + size_callback=lambda: len(self._get_event_cache), ) def start_profiling(self): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index b7bb375ce0..32fd178ed4 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -16,7 +16,7 @@ from tests import unittest from synapse.metrics.metric import ( - CounterMetric, CallbackMetric, CacheCounterMetric + CounterMetric, CallbackMetric, CacheMetric ) @@ -81,26 +81,32 @@ class CallbackMetricTestCase(unittest.TestCase): ]) -class CacheCounterMetricTestCase(unittest.TestCase): +class CacheMetricTestCase(unittest.TestCase): - def test_cachecounter(self): - counter = CacheCounterMetric("cache") + def test_cache(self): + d = dict() - self.assertEquals(counter.render(), [ + metric = CacheMetric("cache", lambda: len(d)) + + self.assertEquals(metric.render(), [ "cache:hits 0", "cache:misses 0", + "cache:size 0", ]) - counter.inc_misses() + metric.inc_misses() + d["key"] = "value" - self.assertEquals(counter.render(), [ + self.assertEquals(metric.render(), [ "cache:hits 0", "cache:misses 1", + "cache:size 1", ]) - counter.inc_hits() + metric.inc_hits() - self.assertEquals(counter.render(), [ + self.assertEquals(metric.render(), [ "cache:hits 1", "cache:misses 1", + "cache:size 1", ]) -- cgit 1.5.1 From e1a7e3564fa1e20b52c7294a8b0f28e5ecbaf672 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 18:16:54 +0000 Subject: Delete a couple of TODO markers of monitoring stats now done --- synapse/storage/_base.py | 1 - synapse/util/lrucache.py | 1 - 2 files changed, 2 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d3c2bc7bfb..d6cf88c350 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -40,7 +40,6 @@ transaction_logger = logging.getLogger("synapse.storage.txn") # TODO(paul): # * more generic key management -# * export monitoring stats # * consider other eviction strategies - LRU? def cached(max_entries=1000): """ A method decorator that applies a memoizing cache around the function. diff --git a/synapse/util/lrucache.py b/synapse/util/lrucache.py index f115f50e50..65d5792907 100644 --- a/synapse/util/lrucache.py +++ b/synapse/util/lrucache.py @@ -16,7 +16,6 @@ class LruCache(object): """Least-recently-used cache.""" - # TODO(mjark) Add hit/miss counters # TODO(mjark) Add mutex for linked list for thread safety. def __init__(self, max_size): cache = {} -- cgit 1.5.1 From 25187ab67426e206de30ebf5c8b5f34c995d3faf Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 4 Mar 2015 19:45:16 +0000 Subject: Collect per-SQL-verb timer stats on query execution time --- synapse/storage/_base.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d6cf88c350..92e90ac579 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -32,12 +32,15 @@ import time logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for("synapse.storage") - sql_logger = logging.getLogger("synapse.storage.SQL") transaction_logger = logging.getLogger("synapse.storage.txn") +metrics = synapse.metrics.get_metrics_for("synapse.storage") + +sql_query_timer = metrics.register_timer("queries", keys=["verb"]) + + # TODO(paul): # * more generic key management # * consider other eviction strategies - LRU? @@ -89,7 +92,8 @@ def cached(max_entries=1000): class LoggingTransaction(object): """An object that almost-transparently proxies for the 'txn' object - passed to the constructor. Adds logging to the .execute() method.""" + passed to the constructor. Adds logging and metrics to the .execute() + method.""" __slots__ = ["txn", "name"] def __init__(self, txn, name): @@ -105,6 +109,7 @@ class LoggingTransaction(object): def execute(self, sql, *args, **kwargs): # TODO(paul): Maybe use 'info' and 'debug' for values? sql_logger.debug("[SQL] {%s} %s", self.name, sql) + try: if args and args[0]: values = args[0] @@ -126,8 +131,9 @@ class LoggingTransaction(object): logger.exception("[SQL FAIL] {%s}", self.name) raise finally: - end = time.time() * 1000 - sql_logger.debug("[SQL time] {%s} %f", self.name, end - start) + msecs = (time.time() * 1000) - start + sql_logger.debug("[SQL time] {%s} %f", self.name, msecs) + sql_query_timer.inc_time(msecs, sql.split()[0]) class PerformanceCounters(object): -- cgit 1.5.1 From fa319a57860af460add565d7801711ec53ab6799 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Thu, 5 Mar 2015 15:58:03 +0000 Subject: Add TimerMetrics to shadow the PerformanceCounters in synapse.storage; with the view to eventually replacing them entirely --- synapse/storage/_base.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 92e90ac579..d8c5a60c71 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -39,6 +39,8 @@ transaction_logger = logging.getLogger("synapse.storage.txn") metrics = synapse.metrics.get_metrics_for("synapse.storage") sql_query_timer = metrics.register_timer("queries", keys=["verb"]) +sql_txn_timer = metrics.register_timer("transactions", keys=["desc"]) +sql_getevents_timer = metrics.register_timer("get_events", keys=["desc"]) # TODO(paul): @@ -184,11 +186,16 @@ class SQLBaseStore(object): self._previous_txn_total_time = 0 self._current_txn_total_time = 0 self._previous_loop_ts = 0 + + # TODO(paul): These can eventually be removed once the metrics code + # is running in mainline, and we have some nice monitoring frontends + # to watch it self._txn_perf_counters = PerformanceCounters() self._get_event_counters = PerformanceCounters() self._get_event_cache = LruCache(hs.config.event_cache_size) - self._get_event_cache_counter = metrics.register_cache("get_event", + self._get_event_cache_counter = metrics.register_cache( + "get_event_cache", size_callback=lambda: len(self._get_event_cache), ) @@ -254,6 +261,8 @@ class SQLBaseStore(object): self._current_txn_total_time += end - start self._txn_perf_counters.update(desc, start, end) + sql_txn_timer.inc_time(self._current_txn_total_time, desc) + with PreserveLoggingContext(): result = yield self._db_pool.runInteraction( inner_func, *args, **kwargs @@ -653,7 +662,11 @@ class SQLBaseStore(object): get_prev_content=False, allow_rejected=False): start_time = time.time() * 1000 - update_counter = self._get_event_counters.update + + def update_counter(desc, last_time): + curr_time = self._get_event_counters.update(desc, last_time) + sql_getevents_timer.inc_time(curr_time - last_time, desc) + return curr_time cache = self._get_event_cache.setdefault(event_id, {}) @@ -704,7 +717,11 @@ class SQLBaseStore(object): check_redacted=True, get_prev_content=False): start_time = time.time() * 1000 - update_counter = self._get_event_counters.update + + def update_counter(desc, last_time): + curr_time = self._get_event_counters.update(desc, last_time) + sql_getevents_timer.inc_time(curr_time - last_time, desc) + return curr_time d = json.loads(js) start_time = update_counter("decode_json", start_time) -- cgit 1.5.1 From f9478e475bf645038b4f1f163240d7fd0ec02af0 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 15:28:06 +0000 Subject: Rename Metrics' "keys" to "labels" --- synapse/federation/transaction_queue.py | 4 ++-- synapse/http/client.py | 4 ++-- synapse/http/matrixfederationclient.py | 4 ++-- synapse/http/server.py | 4 ++-- synapse/metrics/metric.py | 24 ++++++++++++------------ synapse/storage/_base.py | 6 +++--- tests/metrics/test_metric.py | 6 +++--- 7 files changed, 26 insertions(+), 26 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index b9d3f89324..ae62c69fc3 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -71,11 +71,11 @@ class TransactionQueue(object): metrics.register_callback("pending_pdus", lambda: {(dest,): len(pdus[dest]) for dest in pdus.keys()}, - keys=["dest"], + labels=["dest"], ) metrics.register_callback("pending_edus", lambda: {(dest,): len(edus[dest]) for dest in edus.keys()}, - keys=["dest"], + labels=["dest"], ) def can_send_to(self, destination): diff --git a/synapse/http/client.py b/synapse/http/client.py index e40e82e80b..ad2c9c05ec 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -35,10 +35,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) outgoing_requests_counter = metrics.register_counter("outgoing_requests", - keys=["method"], + labels=["method"], ) incoming_responses_counter = metrics.register_counter("incoming_responses", - keys=["method","code"], + labels=["method","code"], ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 0091527693..6b6d79a044 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -44,10 +44,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) outgoing_requests_counter = metrics.register_counter("outgoing_requests", - keys=["method"], + labels=["method"], ) incoming_responses_counter = metrics.register_counter("incoming_responses", - keys=["method","code"], + labels=["method","code"], ) diff --git a/synapse/http/server.py b/synapse/http/server.py index ac893bb40c..35bd3a00ba 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,10 +38,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) incoming_requests_counter = metrics.register_counter("incoming_requests", - keys=["method"], + labels=["method"], ) outgoing_responses_counter = metrics.register_counter("outgoing_responses", - keys=["method","code"], + labels=["method","code"], ) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 4a6ab9cd74..8ba13075f7 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -25,22 +25,22 @@ def map_concat(func, items): class BaseMetric(object): - def __init__(self, name, keys=[]): + def __init__(self, name, labels=[]): self.name = name - self.keys = keys # OK not to clone as we never write it + self.labels = labels # OK not to clone as we never write it def dimension(self): - return len(self.keys) + return len(self.labels) def is_scalar(self): - return not len(self.keys) + return not len(self.labels) def _render_key(self, values): if self.is_scalar(): return "" # TODO: some kind of value escape return "{%s}" % ( - ",".join(["%s=%s" % kv for kv in zip(self.keys, values)]) + ",".join(["%s=%s" % kv for kv in zip(self.labels, values)]) ) def render(self): @@ -62,7 +62,7 @@ class CounterMetric(BaseMetric): def inc(self, *values): if len(values) != self.dimension(): - raise ValueError("Expected as many values to inc() as keys (%d)" % + raise ValueError("Expected as many values to inc() as labels (%d)" % (self.dimension()) ) @@ -85,8 +85,8 @@ class CallbackMetric(BaseMetric): it is rendered. Typically this is used to implement gauges that yield the size or other state of some in-memory object by actively querying it.""" - def __init__(self, name, callback, keys=[]): - super(CallbackMetric, self).__init__(name, keys=keys) + def __init__(self, name, callback, labels=[]): + super(CallbackMetric, self).__init__(name, labels=labels) self.callback = callback @@ -139,15 +139,15 @@ class CacheMetric(object): This metric generates standard metric name pairs, so that monitoring rules can easily be applied to measure hit ratio.""" - def __init__(self, name, size_callback, keys=[]): + def __init__(self, name, size_callback, labels=[]): self.name = name - self.hits = CounterMetric(name + ":hits", keys=keys) - self.misses = CounterMetric(name + ":misses", keys=keys) + self.hits = CounterMetric(name + ":hits", labels=labels) + self.misses = CounterMetric(name + ":misses", labels=labels) self.size = CallbackMetric(name + ":size", callback=size_callback, - keys=keys, + labels=labels, ) def inc_hits(self, *values): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d8c5a60c71..a38b603584 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -38,9 +38,9 @@ transaction_logger = logging.getLogger("synapse.storage.txn") metrics = synapse.metrics.get_metrics_for("synapse.storage") -sql_query_timer = metrics.register_timer("queries", keys=["verb"]) -sql_txn_timer = metrics.register_timer("transactions", keys=["desc"]) -sql_getevents_timer = metrics.register_timer("get_events", keys=["desc"]) +sql_query_timer = metrics.register_timer("queries", labels=["verb"]) +sql_txn_timer = metrics.register_timer("transactions", labels=["desc"]) +sql_getevents_timer = metrics.register_timer("get_events", labels=["desc"]) # TODO(paul): diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index b25520821d..fefe1a5867 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -43,7 +43,7 @@ class CounterMetricTestCase(unittest.TestCase): ]) def test_vector(self): - counter = CounterMetric("vector", keys=["method"]) + counter = CounterMetric("vector", labels=["method"]) # Empty counter doesn't yet know what values it has self.assertEquals(counter.render(), []) @@ -83,7 +83,7 @@ class CallbackMetricTestCase(unittest.TestCase): def test_vector(self): vals = dict() - metric = CallbackMetric("values", lambda: vals, keys=["type"]) + metric = CallbackMetric("values", lambda: vals, labels=["type"]) self.assertEquals(metric.render(), []) @@ -115,7 +115,7 @@ class TimerMetricTestCase(unittest.TestCase): ]) def test_vector(self): - metric = TimerMetric("queries", keys=["verb"]) + metric = TimerMetric("queries", labels=["verb"]) self.assertEquals(metric.render(), []) -- cgit 1.5.1 From b0cf86731957876ca877c35bf30c6f695f1a544c Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Fri, 6 Mar 2015 16:18:21 +0000 Subject: Use _ instead of . as a metric namespacing separator, for Prometheus --- synapse/federation/transaction_queue.py | 4 ++-- synapse/handlers/presence.py | 2 +- synapse/http/client.py | 4 ++-- synapse/http/matrixfederationclient.py | 4 ++-- synapse/http/server.py | 4 ++-- synapse/metrics/__init__.py | 14 +++++++++++--- synapse/notifier.py | 2 +- synapse/storage/_base.py | 18 +++++++++++++----- 8 files changed, 34 insertions(+), 18 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index ae62c69fc3..ca5bcf21cf 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -69,11 +69,11 @@ class TransactionQueue(object): # HACK to get unique tx id self._next_txn_id = int(self._clock.time_msec()) - metrics.register_callback("pending_pdus", + metrics.register_callback("pendingPdus", lambda: {(dest,): len(pdus[dest]) for dest in pdus.keys()}, labels=["dest"], ) - metrics.register_callback("pending_edus", + metrics.register_callback("pendingEdus", lambda: {(dest,): len(edus[dest]) for dest in edus.keys()}, labels=["dest"], ) diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 698946a48b..c6d6aef53b 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -136,7 +136,7 @@ class PresenceHandler(BaseHandler): self._user_cachemap = {} self._user_cachemap_latest_serial = 0 - metrics.register_callback("user_cachemap:size", + metrics.register_callback("userCachemap:size", lambda: len(self._user_cachemap) ) diff --git a/synapse/http/client.py b/synapse/http/client.py index ad2c9c05ec..01737a7188 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -34,10 +34,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -outgoing_requests_counter = metrics.register_counter("outgoing_requests", +outgoing_requests_counter = metrics.register_counter("requests", labels=["method"], ) -incoming_responses_counter = metrics.register_counter("incoming_responses", +incoming_responses_counter = metrics.register_counter("responses", labels=["method","code"], ) diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 6b6d79a044..11883d3852 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -43,10 +43,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -outgoing_requests_counter = metrics.register_counter("outgoing_requests", +outgoing_requests_counter = metrics.register_counter("requests", labels=["method"], ) -incoming_responses_counter = metrics.register_counter("incoming_responses", +incoming_responses_counter = metrics.register_counter("responses", labels=["method","code"], ) diff --git a/synapse/http/server.py b/synapse/http/server.py index 35bd3a00ba..23708c08c9 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -37,10 +37,10 @@ logger = logging.getLogger(__name__) metrics = synapse.metrics.get_metrics_for(__name__) -incoming_requests_counter = metrics.register_counter("incoming_requests", +incoming_requests_counter = metrics.register_counter("requests", labels=["method"], ) -outgoing_responses_counter = metrics.register_counter("outgoing_responses", +outgoing_responses_counter = metrics.register_counter("responses", labels=["method","code"], ) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 443d67f41c..47e475acd2 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -41,7 +41,12 @@ class Metrics(object): self.name_prefix = name def _register(self, metric_class, name, *args, **kwargs): - full_name = "%s.%s" % (self.name_prefix, name) + if "_" in name: + raise ValueError("Metric names %s is invalid as it cannot contain an underscore" + % (name) + ) + + full_name = "%s_%s" % (self.name_prefix, name) metric = metric_class(full_name, *args, **kwargs) @@ -78,10 +83,13 @@ class Metrics(object): return wrapped -def get_metrics_for(name): +def get_metrics_for(pkg_name): """ Returns a Metrics instance for conveniently creating metrics namespaced with the given name prefix. """ - return Metrics(name) + + # Convert a "package.name" to "package_name" because Prometheus doesn't + # let us use . in metric names + return Metrics(pkg_name.replace(".", "_")) def render_all(): diff --git a/synapse/notifier.py b/synapse/notifier.py index 1f7cad624e..75e8152d03 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -122,7 +122,7 @@ class Notifier(object): all_listeners |= x return len(all_listeners) - metrics.register_callback("all_listeners", count_listeners) + metrics.register_callback("listeners", count_listeners) metrics.register_callback("rooms", lambda: count(bool, self.room_to_listeners.values()) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a38b603584..35d118c586 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -40,7 +40,14 @@ metrics = synapse.metrics.get_metrics_for("synapse.storage") sql_query_timer = metrics.register_timer("queries", labels=["verb"]) sql_txn_timer = metrics.register_timer("transactions", labels=["desc"]) -sql_getevents_timer = metrics.register_timer("get_events", labels=["desc"]) +sql_getevents_timer = metrics.register_timer("getEvents", labels=["desc"]) + +caches_by_name = {} +cache_counter = metrics.register_cache( + "cache", + lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, + labels=["name"], +) # TODO(paul): @@ -62,8 +69,9 @@ def cached(max_entries=1000): """ def wrap(orig): cache = OrderedDict() + name = orig.__name__ - counter = metrics.register_cache(orig.__name__, lambda: len(cache)) + caches_by_name[name] = cache def prefill(key, value): while len(cache) > max_entries: @@ -74,10 +82,10 @@ def cached(max_entries=1000): @defer.inlineCallbacks def wrapped(self, key): if key in cache: - counter.inc_hits() + cache_counter.inc_hits(name) defer.returnValue(cache[key]) - counter.inc_misses() + cache_counter.inc_misses(name) ret = yield orig(self, key) prefill(key, ret) defer.returnValue(ret) @@ -195,7 +203,7 @@ class SQLBaseStore(object): self._get_event_cache = LruCache(hs.config.event_cache_size) self._get_event_cache_counter = metrics.register_cache( - "get_event_cache", + "getEventCache", size_callback=lambda: len(self._get_event_cache), ) -- cgit 1.5.1 From 642f725fd74df0a921912c5284a93a81dc0d448d Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Mon, 9 Mar 2015 20:39:17 +0000 Subject: Pretend the 'getEvent' cache is just another cache in the set of all the others for metric --- synapse/storage/_base.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 35d118c586..2708d3c5b6 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -202,10 +202,9 @@ class SQLBaseStore(object): self._get_event_counters = PerformanceCounters() self._get_event_cache = LruCache(hs.config.event_cache_size) - self._get_event_cache_counter = metrics.register_cache( - "getEventCache", - size_callback=lambda: len(self._get_event_cache), - ) + + # Pretend the getEventCache is just another named cache + caches_by_name["*getEvent*"] = self._get_event_cache def start_profiling(self): self._previous_loop_ts = self._clock.time_msec() @@ -682,10 +681,10 @@ class SQLBaseStore(object): # Separate cache entries for each way to invoke _get_event_txn ret = cache[(check_redacted, get_prev_content, allow_rejected)] - self._get_event_cache_counter.inc_hits() + cache_counter.inc_hits("*getEvent*") return ret except KeyError: - self._get_event_cache_counter.inc_misses() + cache_counter.inc_misses("*getEvent*") pass finally: start_time = update_counter("event_cache", start_time) -- cgit 1.5.1 From f1fbe3e09f5573ac7ea9159635b02cc579e19720 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 15:21:03 +0000 Subject: Rename TimerMetric to DistributionMetric; as it could count more than just time --- synapse/metrics/__init__.py | 8 +++++--- synapse/metrics/metric.py | 24 +++++++++++++----------- synapse/storage/_base.py | 14 +++++++------- tests/metrics/test_metric.py | 24 ++++++++++++------------ 4 files changed, 37 insertions(+), 33 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 1acaa3fd09..c161c17e9f 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -19,7 +19,9 @@ from __future__ import absolute_import import logging from resource import getrusage, getpagesize, RUSAGE_SELF -from .metric import CounterMetric, CallbackMetric, TimerMetric, CacheMetric +from .metric import ( + CounterMetric, CallbackMetric, DistributionMetric, CacheMetric +) logger = logging.getLogger(__name__) @@ -59,8 +61,8 @@ class Metrics(object): def register_callback(self, *args, **kwargs): return self._register(CallbackMetric, *args, **kwargs) - def register_timer(self, *args, **kwargs): - return self._register(TimerMetric, *args, **kwargs) + def register_distribution(self, *args, **kwargs): + return self._register(DistributionMetric, *args, **kwargs) def register_cache(self, *args, **kwargs): return self._register(CacheMetric, *args, **kwargs) diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py index 6b7d3358bc..45d2752a20 100644 --- a/synapse/metrics/metric.py +++ b/synapse/metrics/metric.py @@ -101,35 +101,37 @@ class CallbackMetric(BaseMetric): for k in sorted(value.keys())] -class TimerMetric(CounterMetric): - """A combination of an event counter and a time accumulator, which counts - both the number of events and how long each one takes. +class DistributionMetric(CounterMetric): + """A combination of an event counter and an accumulator, which counts + both the number of events and accumulates the total value. Typically this + could be used to keep track of method-running times, or other distributions + of values that occur in discrete occurances. TODO(paul): Try to export some heatmap-style stats? """ def __init__(self, *args, **kwargs): - super(TimerMetric, self).__init__(*args, **kwargs) + super(DistributionMetric, self).__init__(*args, **kwargs) - self.times = {} + self.totals = {} # Scalar metrics are never empty if self.is_scalar(): - self.times[()] = 0 + self.totals[()] = 0 - def inc_time(self, msec, *values): + def inc_by(self, inc, *values): self.inc(*values) - if values not in self.times: - self.times[values] = msec + if values not in self.totals: + self.totals[values] = inc else: - self.times[values] += msec + self.totals[values] += inc def render_item(self, k): keystr = self._render_key(k) return ["%s:count%s %d" % (self.name, keystr, self.counts[k]), - "%s:msec%s %d" % (self.name, keystr, self.times[k])] + "%s:total%s %d" % (self.name, keystr, self.totals[k])] class CacheMetric(object): diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 2708d3c5b6..104e8e3cf6 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -38,9 +38,9 @@ transaction_logger = logging.getLogger("synapse.storage.txn") metrics = synapse.metrics.get_metrics_for("synapse.storage") -sql_query_timer = metrics.register_timer("queries", labels=["verb"]) -sql_txn_timer = metrics.register_timer("transactions", labels=["desc"]) -sql_getevents_timer = metrics.register_timer("getEvents", labels=["desc"]) +sql_query_timer = metrics.register_distribution("queries", labels=["verb"]) +sql_txn_timer = metrics.register_distribution("transactions", labels=["desc"]) +sql_getevents_timer = metrics.register_distribution("getEvents", labels=["desc"]) caches_by_name = {} cache_counter = metrics.register_cache( @@ -143,7 +143,7 @@ class LoggingTransaction(object): finally: msecs = (time.time() * 1000) - start sql_logger.debug("[SQL time] {%s} %f", self.name, msecs) - sql_query_timer.inc_time(msecs, sql.split()[0]) + sql_query_timer.inc_by(msecs, sql.split()[0]) class PerformanceCounters(object): @@ -268,7 +268,7 @@ class SQLBaseStore(object): self._current_txn_total_time += end - start self._txn_perf_counters.update(desc, start, end) - sql_txn_timer.inc_time(self._current_txn_total_time, desc) + sql_txn_timer.inc_by(self._current_txn_total_time, desc) with PreserveLoggingContext(): result = yield self._db_pool.runInteraction( @@ -672,7 +672,7 @@ class SQLBaseStore(object): def update_counter(desc, last_time): curr_time = self._get_event_counters.update(desc, last_time) - sql_getevents_timer.inc_time(curr_time - last_time, desc) + sql_getevents_timer.inc_by(curr_time - last_time, desc) return curr_time cache = self._get_event_cache.setdefault(event_id, {}) @@ -727,7 +727,7 @@ class SQLBaseStore(object): def update_counter(desc, last_time): curr_time = self._get_event_counters.update(desc, last_time) - sql_getevents_timer.inc_time(curr_time - last_time, desc) + sql_getevents_timer.inc_by(curr_time - last_time, desc) return curr_time d = json.loads(js) diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py index 193908b44e..1ca3e45a26 100644 --- a/tests/metrics/test_metric.py +++ b/tests/metrics/test_metric.py @@ -16,7 +16,7 @@ from tests import unittest from synapse.metrics.metric import ( - CounterMetric, CallbackMetric, TimerMetric, CacheMetric + CounterMetric, CallbackMetric, DistributionMetric, CacheMetric ) @@ -97,37 +97,37 @@ class CallbackMetricTestCase(unittest.TestCase): ]) -class TimerMetricTestCase(unittest.TestCase): +class DistributionMetricTestCase(unittest.TestCase): def test_scalar(self): - metric = TimerMetric("thing") + metric = DistributionMetric("thing") self.assertEquals(metric.render(), [ 'thing:count 0', - 'thing:msec 0', + 'thing:total 0', ]) - metric.inc_time(500) + metric.inc_by(500) self.assertEquals(metric.render(), [ 'thing:count 1', - 'thing:msec 500', + 'thing:total 500', ]) def test_vector(self): - metric = TimerMetric("queries", labels=["verb"]) + metric = DistributionMetric("queries", labels=["verb"]) self.assertEquals(metric.render(), []) - metric.inc_time(300, "SELECT") - metric.inc_time(200, "SELECT") - metric.inc_time(800, "INSERT") + metric.inc_by(300, "SELECT") + metric.inc_by(200, "SELECT") + metric.inc_by(800, "INSERT") self.assertEquals(metric.render(), [ 'queries:count{verb="INSERT"} 1', - 'queries:msec{verb="INSERT"} 800', + 'queries:total{verb="INSERT"} 800', 'queries:count{verb="SELECT"} 2', - 'queries:msec{verb="SELECT"} 500', + 'queries:total{verb="SELECT"} 500', ]) -- cgit 1.5.1 From 63cb7ece62962a0004bc45cf866b87e107bc0bc9 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Tue, 10 Mar 2015 15:24:02 +0000 Subject: Rename the timer metrics exported by synapse.storage to append _time, so the meaning of ':total' is clearer --- synapse/storage/_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 104e8e3cf6..f742fe15df 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -38,9 +38,9 @@ transaction_logger = logging.getLogger("synapse.storage.txn") metrics = synapse.metrics.get_metrics_for("synapse.storage") -sql_query_timer = metrics.register_distribution("queries", labels=["verb"]) -sql_txn_timer = metrics.register_distribution("transactions", labels=["desc"]) -sql_getevents_timer = metrics.register_distribution("getEvents", labels=["desc"]) +sql_query_timer = metrics.register_distribution("query_time", labels=["verb"]) +sql_txn_timer = metrics.register_distribution("transaction_time", labels=["desc"]) +sql_getevents_timer = metrics.register_distribution("getEvents_time", labels=["desc"]) caches_by_name = {} cache_counter = metrics.register_cache( -- cgit 1.5.1 From e75fa8bbbf1215a89e0239f96789317b96383734 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 11 Mar 2015 18:57:35 +0000 Subject: Bugfix to sql_txn_timer increment - add only the per-TXN duration, not the total time ever spent since boot --- synapse/storage/_base.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'synapse/storage') diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index f742fe15df..40f2fc6d76 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -260,15 +260,13 @@ class SQLBaseStore(object): raise finally: end = time.time() * 1000 - transaction_logger.debug( - "[TXN END] {%s} %f", - name, end - start - ) + duration = end - start - self._current_txn_total_time += end - start - self._txn_perf_counters.update(desc, start, end) + transaction_logger.debug("[TXN END] {%s} %f", name, duration) - sql_txn_timer.inc_by(self._current_txn_total_time, desc) + self._current_txn_total_time += duration + self._txn_perf_counters.update(desc, start, end) + sql_txn_timer.inc_by(duration, desc) with PreserveLoggingContext(): result = yield self._db_pool.runInteraction( -- cgit 1.5.1