diff --git a/CHANGES.rst b/CHANGES.rst
index 24e4e7a384..a7ed49e105 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,7 +2,10 @@ Unreleased
==========
synctl no longer starts the main synapse when using ``-a`` option with workers.
-A new worker file should be added with ``worker_app: synapse.app.homeserver``
+A new worker file should be added with ``worker_app: synapse.app.homeserver``.
+
+This release also begins the process of renaming a number of the metrics
+reported to prometheus. See `docs/metrics-howto.rst <docs/metrics-howto.rst#block-and-response-metrics-renamed-for-0-27-0>`_.
Changes in synapse v0.26.0 (2018-01-05)
diff --git a/docs/metrics-howto.rst b/docs/metrics-howto.rst
index 143cd0f42f..8acc479bc3 100644
--- a/docs/metrics-howto.rst
+++ b/docs/metrics-howto.rst
@@ -16,7 +16,7 @@ How to monitor Synapse metrics using Prometheus
metrics_port: 9092
Also ensure that ``enable_metrics`` is set to ``True``.
-
+
Restart synapse.
3. Add a prometheus target for synapse.
@@ -28,11 +28,58 @@ How to monitor Synapse metrics using Prometheus
static_configs:
- targets: ["my.server.here:9092"]
- If your prometheus is older than 1.5.2, you will need to replace
+ If your prometheus is older than 1.5.2, you will need to replace
``static_configs`` in the above with ``target_groups``.
-
+
Restart prometheus.
+
+Block and response metrics renamed for 0.27.0
+---------------------------------------------
+
+Synapse 0.27.0 begins the process of rationalising the duplicate ``*:count``
+metrics reported for the resource tracking for code blocks and HTTP requests.
+
+At the same time, the corresponding ``*:total`` metrics are being renamed, as
+the ``:total`` suffix no longer makes sense in the absence of a corresponding
+``:count`` metric.
+
+To enable a graceful migration path, this release just adds new names for the
+metrics being renamed. A future release will remove the old ones.
+
+The following table shows the new metrics, and the old metrics which they are
+replacing.
+
+==================================================== ===================================================
+New name Old name
+==================================================== ===================================================
+synapse_util_metrics_block_count synapse_util_metrics_block_timer:count
+synapse_util_metrics_block_count synapse_util_metrics_block_ru_utime:count
+synapse_util_metrics_block_count synapse_util_metrics_block_ru_stime:count
+synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_count:count
+synapse_util_metrics_block_count synapse_util_metrics_block_db_txn_duration:count
+
+synapse_util_metrics_block_time_seconds synapse_util_metrics_block_timer:total
+synapse_util_metrics_block_ru_utime_seconds synapse_util_metrics_block_ru_utime:total
+synapse_util_metrics_block_ru_stime_seconds synapse_util_metrics_block_ru_stime:total
+synapse_util_metrics_block_db_txn_count synapse_util_metrics_block_db_txn_count:total
+synapse_util_metrics_block_db_txn_duration_seconds synapse_util_metrics_block_db_txn_duration:total
+
+synapse_http_server_response_count synapse_http_server_requests
+synapse_http_server_response_count synapse_http_server_response_time:count
+synapse_http_server_response_count synapse_http_server_response_ru_utime:count
+synapse_http_server_response_count synapse_http_server_response_ru_stime:count
+synapse_http_server_response_count synapse_http_server_response_db_txn_count:count
+synapse_http_server_response_count synapse_http_server_response_db_txn_duration:count
+
+synapse_http_server_response_time_seconds synapse_http_server_response_time:total
+synapse_http_server_response_ru_utime_seconds synapse_http_server_response_ru_utime:total
+synapse_http_server_response_ru_stime_seconds synapse_http_server_response_ru_stime:total
+synapse_http_server_response_db_txn_count synapse_http_server_response_db_txn_count:total
+synapse_http_server_response_db_txn_duration_seconds synapse_http_server_response_db_txn_duration:total
+==================================================== ===================================================
+
+
Standard Metric Names
---------------------
@@ -42,7 +89,7 @@ have been changed to seconds, from miliseconds.
================================== =============================
New name Old name
----------------------------------- -----------------------------
+================================== =============================
process_cpu_user_seconds_total process_resource_utime / 1000
process_cpu_system_seconds_total process_resource_stime / 1000
process_open_fds (no 'type' label) process_fds
@@ -52,8 +99,8 @@ The python-specific counts of garbage collector performance have been renamed.
=========================== ======================
New name Old name
---------------------------- ----------------------
-python_gc_time reactor_gc_time
+=========================== ======================
+python_gc_time reactor_gc_time
python_gc_unreachable_total reactor_gc_unreachable
python_gc_counts reactor_gc_counts
=========================== ======================
@@ -62,7 +109,7 @@ The twisted-specific reactor metrics have been renamed.
==================================== =====================
New name Old name
------------------------------------- ---------------------
+==================================== =====================
python_twisted_reactor_pending_calls reactor_pending_calls
python_twisted_reactor_tick_time reactor_tick_time
==================================== =====================
diff --git a/synapse/http/server.py b/synapse/http/server.py
index 6e8f4c9c5f..269b65ca41 100644
--- a/synapse/http/server.py
+++ b/synapse/http/server.py
@@ -42,34 +42,62 @@ logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__)
-incoming_requests_counter = metrics.register_counter(
- "requests",
+# total number of responses served, split by method/servlet/tag
+response_count = metrics.register_counter(
+ "response_count",
labels=["method", "servlet", "tag"],
+ alternative_names=(
+ # the following are all deprecated aliases for the same metric
+ metrics.name_prefix + x for x in (
+ "_requests",
+ "_response_time:count",
+ "_response_ru_utime:count",
+ "_response_ru_stime:count",
+ "_response_db_txn_count:count",
+ "_response_db_txn_duration:count",
+ )
+ )
)
+
outgoing_responses_counter = metrics.register_counter(
"responses",
labels=["method", "code"],
)
-response_timer = metrics.register_distribution(
- "response_time",
- labels=["method", "servlet", "tag"]
+response_timer = metrics.register_counter(
+ "response_time_seconds",
+ labels=["method", "servlet", "tag"],
+ alternative_names=(
+ metrics.name_prefix + "_response_time:total",
+ ),
)
-response_ru_utime = metrics.register_distribution(
- "response_ru_utime", labels=["method", "servlet", "tag"]
+response_ru_utime = metrics.register_counter(
+ "response_ru_utime_seconds", labels=["method", "servlet", "tag"],
+ alternative_names=(
+ metrics.name_prefix + "_response_ru_utime:total",
+ ),
)
-response_ru_stime = metrics.register_distribution(
- "response_ru_stime", labels=["method", "servlet", "tag"]
+response_ru_stime = metrics.register_counter(
+ "response_ru_stime_seconds", labels=["method", "servlet", "tag"],
+ alternative_names=(
+ metrics.name_prefix + "_response_ru_stime:total",
+ ),
)
-response_db_txn_count = metrics.register_distribution(
- "response_db_txn_count", labels=["method", "servlet", "tag"]
+response_db_txn_count = metrics.register_counter(
+ "response_db_txn_count", labels=["method", "servlet", "tag"],
+ alternative_names=(
+ metrics.name_prefix + "_response_db_txn_count:total",
+ ),
)
-response_db_txn_duration = metrics.register_distribution(
- "response_db_txn_duration", labels=["method", "servlet", "tag"]
+response_db_txn_duration = metrics.register_counter(
+ "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"],
+ alternative_names=(
+ metrics.name_prefix + "_response_db_txn_duration:total",
+ ),
)
@@ -330,7 +358,7 @@ class RequestMetrics(object):
)
return
- incoming_requests_counter.inc(request.method, self.name, tag)
+ response_count.inc(request.method, self.name, tag)
response_timer.inc_by(
clock.time_msec() - self.start, request.method,
diff --git a/synapse/metrics/metric.py b/synapse/metrics/metric.py
index 1d054dd557..f480aae614 100644
--- a/synapse/metrics/metric.py
+++ b/synapse/metrics/metric.py
@@ -17,16 +17,33 @@
from itertools import chain
-# TODO(paul): I can't believe Python doesn't have one of these
-def map_concat(func, items):
- # flatten a list-of-lists
- return list(chain.from_iterable(map(func, items)))
+def flatten(items):
+ """Flatten a list of lists
+
+ Args:
+ items: iterable[iterable[X]]
+
+ Returns:
+ list[X]: flattened list
+ """
+ return list(chain.from_iterable(items))
class BaseMetric(object):
+ """Base class for metrics which report a single value per label set
+ """
- def __init__(self, name, labels=[]):
- self.name = name
+ def __init__(self, name, labels=[], alternative_names=[]):
+ """
+ Args:
+ name (str): principal name for this metric
+ labels (list(str)): names of the labels which will be reported
+ for this metric
+ alternative_names (iterable(str)): list of alternative names for
+ this metric. This can be useful to provide a migration path
+ when renaming metrics.
+ """
+ self._names = [name] + list(alternative_names)
self.labels = labels # OK not to clone as we never write it
def dimension(self):
@@ -36,7 +53,7 @@ class BaseMetric(object):
return not len(self.labels)
def _render_labelvalue(self, value):
- # TODO: some kind of value escape
+ # TODO: escape backslashes, quotes and newlines
return '"%s"' % (value)
def _render_key(self, values):
@@ -47,6 +64,36 @@ class BaseMetric(object):
for k, v in zip(self.labels, values)])
)
+ def _render_for_labels(self, label_values, value):
+ """Render this metric for a single set of labels
+
+ Args:
+ label_values (list[str]): values for each of the labels
+ value: value of the metric at with these labels
+
+ Returns:
+ iterable[str]: rendered metric
+ """
+ rendered_labels = self._render_key(label_values)
+ return (
+ "%s%s %.12g" % (name, rendered_labels, value)
+ for name in self._names
+ )
+
+ def render(self):
+ """Render this metric
+
+ Each metric is rendered as:
+
+ name{label1="val1",label2="val2"} value
+
+ https://prometheus.io/docs/instrumenting/exposition_formats/#text-format-details
+
+ Returns:
+ iterable[str]: rendered metrics
+ """
+ raise NotImplementedError()
+
class CounterMetric(BaseMetric):
"""The simplest kind of metric; one that stores a monotonically-increasing
@@ -62,6 +109,10 @@ class CounterMetric(BaseMetric):
def __init__(self, *args, **kwargs):
super(CounterMetric, self).__init__(*args, **kwargs)
+ # dict[list[str]]: value for each set of label values. the keys are the
+ # label values, in the same order as the labels in self.labels.
+ #
+ # (if the metric is a scalar, the (single) key is the empty list).
self.counts = {}
# Scalar metrics are never empty
@@ -84,11 +135,11 @@ class CounterMetric(BaseMetric):
def inc(self, *values):
self.inc_by(1, *values)
- def render_item(self, k):
- return ["%s%s %.12g" % (self.name, self._render_key(k), self.counts[k])]
-
def render(self):
- return map_concat(self.render_item, sorted(self.counts.keys()))
+ return flatten(
+ self._render_for_labels(k, self.counts[k])
+ for k in sorted(self.counts.keys())
+ )
class CallbackMetric(BaseMetric):
@@ -105,10 +156,12 @@ class CallbackMetric(BaseMetric):
value = self.callback()
if self.is_scalar():
- return ["%s %.12g" % (self.name, value)]
+ return list(self._render_for_labels([], value))
- return ["%s%s %.12g" % (self.name, self._render_key(k), value[k])
- for k in sorted(value.keys())]
+ return flatten(
+ self._render_for_labels(k, value[k])
+ for k in sorted(value.keys())
+ )
class DistributionMetric(object):
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 4ea930d3e8..8d22ff3068 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -27,25 +27,56 @@ logger = logging.getLogger(__name__)
metrics = synapse.metrics.get_metrics_for(__name__)
-block_timer = metrics.register_distribution(
- "block_timer",
- labels=["block_name"]
+# total number of times we have hit this block
+response_count = metrics.register_counter(
+ "block_count",
+ labels=["block_name"],
+ alternative_names=(
+ # the following are all deprecated aliases for the same metric
+ metrics.name_prefix + x for x in (
+ "_block_timer:count",
+ "_block_ru_utime:count",
+ "_block_ru_stime:count",
+ "_block_db_txn_count:count",
+ "_block_db_txn_duration:count",
+ )
+ )
+)
+
+block_timer = metrics.register_counter(
+ "block_time_seconds",
+ labels=["block_name"],
+ alternative_names=(
+ metrics.name_prefix + "_block_timer:total",
+ ),
)
-block_ru_utime = metrics.register_distribution(
- "block_ru_utime", labels=["block_name"]
+block_ru_utime = metrics.register_counter(
+ "block_ru_utime_seconds", labels=["block_name"],
+ alternative_names=(
+ metrics.name_prefix + "_block_ru_utime:total",
+ ),
)
-block_ru_stime = metrics.register_distribution(
- "block_ru_stime", labels=["block_name"]
+block_ru_stime = metrics.register_counter(
+ "block_ru_stime_seconds", labels=["block_name"],
+ alternative_names=(
+ metrics.name_prefix + "_block_ru_stime:total",
+ ),
)
-block_db_txn_count = metrics.register_distribution(
- "block_db_txn_count", labels=["block_name"]
+block_db_txn_count = metrics.register_counter(
+ "block_db_txn_count", labels=["block_name"],
+ alternative_names=(
+ metrics.name_prefix + "_block_db_txn_count:total",
+ ),
)
-block_db_txn_duration = metrics.register_distribution(
- "block_db_txn_duration", labels=["block_name"]
+block_db_txn_duration = metrics.register_counter(
+ "block_db_txn_duration_seconds", labels=["block_name"],
+ alternative_names=(
+ metrics.name_prefix + "_block_db_txn_count:total",
+ ),
)
|