From b932b4ea256fcbfa112f006ccb2e1c10ef195c17 Mon Sep 17 00:00:00 2001 From: Adrian Tschira Date: Mon, 30 Apr 2018 14:36:39 +0200 Subject: use repr, not str Signed-off-by: Adrian Tschira --- synapse/http/site.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'synapse/http') diff --git a/synapse/http/site.py b/synapse/http/site.py index 202a990508..75e32924e9 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -56,7 +56,7 @@ class SynapseRequest(Request): def __repr__(self): # We overwrite this so that we don't log ``access_token`` - return '<%s at 0x%x method=%s uri=%s clientproto=%s site=%s>' % ( + return '<%s at 0x%x method=%r uri=%r clientproto=%r site=%r>' % ( self.__class__.__name__, id(self), self.method, -- cgit 1.5.1 From d9fe2b2d9dbb539da32f969b8bd752159fd5eb6f Mon Sep 17 00:00:00 2001 From: Adrian Tschira Date: Sun, 15 Apr 2018 21:43:35 +0200 Subject: Replace some more comparisons with six plus a bonus b"" string I missed last time Signed-off-by: Adrian Tschira --- synapse/events/utils.py | 4 +++- synapse/events/validator.py | 6 ++++-- synapse/groups/groups_server.py | 4 +++- synapse/http/matrixfederationclient.py | 8 +++++--- synapse/push/push_rule_evaluator.py | 4 +++- synapse/rest/client/v1/presence.py | 8 +++++--- 6 files changed, 23 insertions(+), 11 deletions(-) (limited to 'synapse/http') diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 824f4a42e3..29ae086786 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -20,6 +20,8 @@ from frozendict import frozendict import re +from six import string_types + # Split strings on "." but not "\." This uses a negative lookbehind assertion for '\' # (? Date: Mon, 21 May 2018 19:47:37 -0500 Subject: replacing portions --- synapse/api/auth.py | 2 +- synapse/federation/federation_client.py | 17 ++- synapse/federation/federation_server.py | 16 ++- synapse/federation/send_queue.py | 8 +- synapse/federation/transaction_queue.py | 47 +++----- synapse/handlers/appservice.py | 19 +-- synapse/handlers/presence.py | 58 ++++----- synapse/http/client.py | 20 +--- synapse/http/matrixfederationclient.py | 14 +-- synapse/metrics/__init__.py | 182 ++++++++++------------------- synapse/notifier.py | 23 ++-- synapse/push/httppusher.py | 13 +-- synapse/push/push_rule_evaluator.py | 2 +- synapse/python_dependencies.py | 1 + synapse/replication/tcp/protocol.py | 88 ++++++-------- synapse/util/caches/__init__.py | 57 ++++++--- synapse/util/caches/descriptors.py | 2 +- synapse/util/caches/dictionary_cache.py | 2 +- synapse/util/caches/expiringcache.py | 4 +- synapse/util/caches/response_cache.py | 11 +- synapse/util/caches/stream_change_cache.py | 2 +- synapse/util/metrics.py | 91 ++++----------- tests/__init__.py | 3 + 23 files changed, 268 insertions(+), 414 deletions(-) (limited to 'synapse/http') diff --git a/synapse/api/auth.py b/synapse/api/auth.py index f17fda6315..b052cf532b 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -57,7 +57,7 @@ class Auth(object): self.TOKEN_NOT_FOUND_HTTP_STATUS = 401 self.token_cache = LruCache(CACHE_SIZE_FACTOR * 10000) - register_cache("token_cache", self.token_cache) + register_cache("cache", "token_cache", self.token_cache) @defer.inlineCallbacks def check_from_context(self, event, context, do_sig_check=True): diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 6163f7c466..2761ffae07 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -32,20 +32,17 @@ from synapse.federation.federation_base import ( FederationBase, event_from_pdu_json, ) -import synapse.metrics from synapse.util import logcontext, unwrapFirstError from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.logcontext import make_deferred_yieldable, run_in_background from synapse.util.logutils import log_function from synapse.util.retryutils import NotRetryingDestination -logger = logging.getLogger(__name__) - +from prometheus_client import Counter -# synapse.federation.federation_client is a silly name -metrics = synapse.metrics.get_metrics_for("synapse.federation.client") +logger = logging.getLogger(__name__) -sent_queries_counter = metrics.register_counter("sent_queries", labels=["type"]) +sent_queries_counter = Counter("synapse_federation_client_sent_queries", "", ["type"]) PDU_RETRY_TIME_MS = 1 * 60 * 1000 @@ -108,7 +105,7 @@ class FederationClient(FederationBase): a Deferred which will eventually yield a JSON object from the response """ - sent_queries_counter.inc(query_type) + sent_queries_counter.labels(query_type).inc() return self.transport_layer.make_query( destination, query_type, args, retry_on_dns_fail=retry_on_dns_fail, @@ -127,7 +124,7 @@ class FederationClient(FederationBase): a Deferred which will eventually yield a JSON object from the response """ - sent_queries_counter.inc("client_device_keys") + sent_queries_counter.labels("client_device_keys").inc() return self.transport_layer.query_client_keys( destination, content, timeout ) @@ -137,7 +134,7 @@ class FederationClient(FederationBase): """Query the device keys for a list of user ids hosted on a remote server. """ - sent_queries_counter.inc("user_devices") + sent_queries_counter.labels("user_devices").inc() return self.transport_layer.query_user_devices( destination, user_id, timeout ) @@ -154,7 +151,7 @@ class FederationClient(FederationBase): a Deferred which will eventually yield a JSON object from the response """ - sent_queries_counter.inc("client_one_time_keys") + sent_queries_counter.labels("client_one_time_keys").inc() return self.transport_layer.claim_client_keys( destination, content, timeout ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 247ddc89d5..8211273006 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -27,12 +27,13 @@ from synapse.federation.federation_base import ( from synapse.federation.persistence import TransactionActions from synapse.federation.units import Edu, Transaction -import synapse.metrics from synapse.types import get_domain_from_id from synapse.util import async from synapse.util.caches.response_cache import ResponseCache from synapse.util.logutils import log_function +from prometheus_client import Counter + from six import iteritems # when processing incoming transactions, we try to handle multiple rooms in @@ -41,14 +42,11 @@ TRANSACTION_CONCURRENCY_LIMIT = 10 logger = logging.getLogger(__name__) -# synapse.federation.federation_server is a silly name -metrics = synapse.metrics.get_metrics_for("synapse.federation.server") - -received_pdus_counter = metrics.register_counter("received_pdus") +received_pdus_counter = Counter("synapse_federation_server_received_pdus", "") -received_edus_counter = metrics.register_counter("received_edus") +received_edus_counter = Counter("synapse_federation_server_received_edus", "") -received_queries_counter = metrics.register_counter("received_queries", labels=["type"]) +received_queries_counter = Counter("synapse_federation_server_received_queries", "", ["type"]) class FederationServer(FederationBase): @@ -131,7 +129,7 @@ class FederationServer(FederationBase): logger.debug("[%s] Transaction is new", transaction.transaction_id) - received_pdus_counter.inc_by(len(transaction.pdus)) + received_pdus_counter.inc(len(transaction.pdus)) pdus_by_room = {} @@ -292,7 +290,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks def on_query_request(self, query_type, args): - received_queries_counter.inc(query_type) + received_queries_counter.labels(query_type).inc() resp = yield self.registry.on_query(query_type, args) defer.returnValue((200, resp)) diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index 0f0c687b37..e6e1888f3a 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -33,7 +33,7 @@ from .units import Edu from synapse.storage.presence import UserPresenceState from synapse.util.metrics import Measure -import synapse.metrics +from synapse.metrics import LaterGauge from blist import sorteddict from collections import namedtuple @@ -45,9 +45,6 @@ from six import itervalues, iteritems logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) - - class FederationRemoteSendQueue(object): """A drop in replacement for TransactionQueue""" @@ -77,8 +74,7 @@ class FederationRemoteSendQueue(object): # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): - metrics.register_callback( - queue_name + "_size", + LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,), "", lambda: len(queue), ) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index ded2b1871a..778924a13c 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -26,23 +26,18 @@ from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from synapse.util.metrics import measure_func from synapse.handlers.presence import format_user_presence_state, get_interested_remotes import synapse.metrics +from synapse.metrics import LaterGauge +from synapse.metrics import ( + sent_edus_counter, sent_transactions_counter, events_processed_counter) + +from prometheus_client import Counter import logging logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) - -client_metrics = synapse.metrics.get_metrics_for("synapse.federation.client") -sent_pdus_destination_dist = client_metrics.register_distribution( - "sent_pdu_destinations" -) -sent_edus_counter = client_metrics.register_counter("sent_edus") - -sent_transactions_counter = client_metrics.register_counter("sent_transactions") - -events_processed_counter = client_metrics.register_counter("events_processed") +sent_pdus_destination_dist = Counter("synapse_federation_client_sent_pdu_destinations", "") class TransactionQueue(object): @@ -69,8 +64,7 @@ class TransactionQueue(object): # done self.pending_transactions = {} - metrics.register_callback( - "pending_destinations", + LaterGauge("pending_destinations", "", [], lambda: len(self.pending_transactions), ) @@ -94,12 +88,12 @@ class TransactionQueue(object): # Map of destination -> (edu_type, key) -> Edu self.pending_edus_keyed_by_dest = edus_keyed = {} - metrics.register_callback( - "pending_pdus", + LaterGauge( + "pending_pdus", "", [], lambda: sum(map(len, pdus.values())), ) - metrics.register_callback( - "pending_edus", + LaterGauge( + "pending_edus", "", [], lambda: ( sum(map(len, edus.values())) + sum(map(len, presence.values())) @@ -241,18 +235,15 @@ class TransactionQueue(object): now = self.clock.time_msec() ts = yield self.store.get_received_ts(events[-1].event_id) - synapse.metrics.event_processing_lag.set( - now - ts, "federation_sender", - ) - synapse.metrics.event_processing_last_ts.set( - ts, "federation_sender", - ) + synapse.metrics.event_processing_lag.labels( + "federation_sender").set(now - ts) + synapse.metrics.event_processing_last_ts.labels( + "federation_sender").set(ts) - events_processed_counter.inc_by(len(events)) + events_processed_counter.inc(len(events)) - synapse.metrics.event_processing_positions.set( - next_token, "federation_sender", - ) + synapse.metrics.event_processing_positions.labels( + "federation_sender").set(next_token) finally: self._is_processing = False @@ -275,7 +266,7 @@ class TransactionQueue(object): if not destinations: return - sent_pdus_destination_dist.inc_by(len(destinations)) + sent_pdus_destination_dist.inc(len(destinations)) for destination in destinations: self.pending_pdus_by_dest.setdefault(destination, []).append( diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index b596f098fd..a7345331af 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -21,14 +21,13 @@ from synapse.util.metrics import Measure from synapse.util.logcontext import ( make_deferred_yieldable, run_in_background, ) +from prometheus_client import Counter import logging logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) - -events_processed_counter = metrics.register_counter("events_processed") +events_processed_counter = Counter("synapse_handlers_appservice_events_processed", "") def log_failure(failure): @@ -128,18 +127,12 @@ class ApplicationServicesHandler(object): now = self.clock.time_msec() ts = yield self.store.get_received_ts(events[-1].event_id) - synapse.metrics.event_processing_positions.set( - upper_bound, "appservice_sender", - ) + synapse.metrics.event_processing_positions.labels("appservice_sender").set(upper_bound) - events_processed_counter.inc_by(len(events)) + events_processed_counter.inc(len(events)) - synapse.metrics.event_processing_lag.set( - now - ts, "appservice_sender", - ) - synapse.metrics.event_processing_last_ts.set( - ts, "appservice_sender", - ) + synapse.metrics.event_processing_lag.labels("appservice_sender").set(now - ts) + synapse.metrics.event_processing_last_ts.labels("appservice_sender").set(ts) finally: self.is_processing = False diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 585f3e4da2..06d937ef3a 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -36,27 +36,27 @@ from synapse.util.logutils import log_function from synapse.util.metrics import Measure from synapse.util.wheel_timer import WheelTimer from synapse.types import UserID, get_domain_from_id -import synapse.metrics +from synapse.metrics import LaterGauge import logging +from prometheus_client import Counter + logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) -notified_presence_counter = metrics.register_counter("notified_presence") -federation_presence_out_counter = metrics.register_counter("federation_presence_out") -presence_updates_counter = metrics.register_counter("presence_updates") -timers_fired_counter = metrics.register_counter("timers_fired") -federation_presence_counter = metrics.register_counter("federation_presence") -bump_active_time_counter = metrics.register_counter("bump_active_time") +notified_presence_counter = Counter("synapse_handler_presence_notified_presence", "") +federation_presence_out_counter = Counter("synapse_handler_presence_federation_presence_out", "") +presence_updates_counter = Counter("synapse_handler_presence_presence_updates", "") +timers_fired_counter = Counter("synapse_handler_presence_timers_fired", "") +federation_presence_counter = Counter("synapse_handler_presence_federation_presence", "") +bump_active_time_counter = Counter("synapse_handler_presence_bump_active_time", "") -get_updates_counter = metrics.register_counter("get_updates", labels=["type"]) +get_updates_counter = Counter("synapse_handler_presence_get_updates", "", ["type"]) -notify_reason_counter = metrics.register_counter("notify_reason", labels=["reason"]) -state_transition_counter = metrics.register_counter( - "state_transition", labels=["from", "to"] +notify_reason_counter = Counter("synapse_handler_presence_notify_reason", "", ["reason"]) +state_transition_counter = Counter("synapse_handler_presence_state_transition", "", ["from", "to"] ) @@ -137,9 +137,9 @@ class PresenceHandler(object): for state in active_presence } - metrics.register_callback( - "user_to_current_state_size", lambda: len(self.user_to_current_state) - ) + LaterGauge( + "user_to_current_state_size", "", [], lambda: len(self.user_to_current_state) + ).register() now = self.clock.time_msec() for state in active_presence: @@ -208,7 +208,7 @@ class PresenceHandler(object): 60 * 1000, ) - metrics.register_callback("wheel_timer_size", lambda: len(self.wheel_timer)) + LaterGauge("wheel_timer_size", "", [], lambda: len(self.wheel_timer)).register() @defer.inlineCallbacks def _on_shutdown(self): @@ -311,10 +311,10 @@ class PresenceHandler(object): # TODO: We should probably ensure there are no races hereafter - presence_updates_counter.inc_by(len(new_states)) + presence_updates_counter.inc(len(new_states)) if to_notify: - notified_presence_counter.inc_by(len(to_notify)) + notified_presence_counter.inc(len(to_notify)) yield self._persist_and_notify(to_notify.values()) self.unpersisted_users_changes |= set(s.user_id for s in new_states) @@ -325,7 +325,7 @@ class PresenceHandler(object): if user_id not in to_notify } if to_federation_ping: - federation_presence_out_counter.inc_by(len(to_federation_ping)) + federation_presence_out_counter.inc(len(to_federation_ping)) self._push_to_remotes(to_federation_ping.values()) @@ -363,7 +363,7 @@ class PresenceHandler(object): for user_id in users_to_check ] - timers_fired_counter.inc_by(len(states)) + timers_fired_counter.inc(len(states)) changes = handle_timeouts( states, @@ -707,7 +707,7 @@ class PresenceHandler(object): updates.append(prev_state.copy_and_replace(**new_fields)) if updates: - federation_presence_counter.inc_by(len(updates)) + federation_presence_counter.inc(len(updates)) yield self._update_states(updates) @defer.inlineCallbacks @@ -982,28 +982,28 @@ def should_notify(old_state, new_state): return False if old_state.status_msg != new_state.status_msg: - notify_reason_counter.inc("status_msg_change") + notify_reason_counter.labels("status_msg_change").inc() return True if old_state.state != new_state.state: - notify_reason_counter.inc("state_change") - state_transition_counter.inc(old_state.state, new_state.state) + notify_reason_counter.labels("state_change").inc() + state_transition_counter.labels(old_state.state, new_state.state).inc() return True if old_state.state == PresenceState.ONLINE: if new_state.currently_active != old_state.currently_active: - notify_reason_counter.inc("current_active_change") + notify_reason_counter.labels("current_active_change").inc() return True if new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY: # Only notify about last active bumps if we're not currently acive if not new_state.currently_active: - notify_reason_counter.inc("last_active_change_online") + notify_reason_counter.labels("last_active_change_online").inc() return True elif new_state.last_active_ts - old_state.last_active_ts > LAST_ACTIVE_GRANULARITY: # Always notify for a transition where last active gets bumped. - notify_reason_counter.inc("last_active_change_not_online") + notify_reason_counter.labels("last_active_change_not_online").inc() return True return False @@ -1077,14 +1077,14 @@ class PresenceEventSource(object): if changed is not None and len(changed) < 500: # For small deltas, its quicker to get all changes and then # work out if we share a room or they're in our presence list - get_updates_counter.inc("stream") + get_updates_counter.labels("stream").inc() for other_user_id in changed: if other_user_id in users_interested_in: user_ids_changed.add(other_user_id) else: # Too many possible updates. Find all users we can see and check # if any of them have changed. - get_updates_counter.inc("full") + get_updates_counter.labels("full").inc() if from_key: user_ids_changed = stream_change_cache.get_entities_changed( diff --git a/synapse/http/client.py b/synapse/http/client.py index 70a19d9b74..61a1d2e2b3 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -23,7 +23,6 @@ from synapse.http import cancelled_to_request_timed_out_error from synapse.util.async import add_timeout_to_deferred from synapse.util.caches import CACHE_SIZE_FACTOR from synapse.util.logcontext import make_deferred_yieldable -import synapse.metrics from synapse.http.endpoint import SpiderEndpoint from canonicaljson import encode_canonical_json @@ -42,6 +41,7 @@ from twisted.web._newclient import ResponseDone from six import StringIO +from prometheus_client import Counter import simplejson as json import logging import urllib @@ -49,16 +49,8 @@ import urllib logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) - -outgoing_requests_counter = metrics.register_counter( - "requests", - labels=["method"], -) -incoming_responses_counter = metrics.register_counter( - "responses", - labels=["method", "code"], -) +outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"]) +incoming_responses_counter = Counter("synapse_http_client_responses", "", ["method", "code"]) class SimpleHttpClient(object): @@ -95,7 +87,7 @@ class SimpleHttpClient(object): def request(self, method, uri, *args, **kwargs): # A small wrapper around self.agent.request() so we can easily attach # counters to it - outgoing_requests_counter.inc(method) + outgoing_requests_counter.labels(method).inc() logger.info("Sending request %s %s", method, uri) @@ -109,14 +101,14 @@ class SimpleHttpClient(object): ) response = yield make_deferred_yieldable(request_deferred) - incoming_responses_counter.inc(method, response.code) + incoming_responses_counter.labels(method, response.code).inc() logger.info( "Received response to %s %s: %s", method, uri, response.code ) defer.returnValue(response) except Exception as e: - incoming_responses_counter.inc(method, "ERR") + incoming_responses_counter.labels(method, "ERR").inc() logger.info( "Error sending request to %s %s: %s %s", method, uri, type(e).__name__, e.message diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 4b2b85464d..259d3884e2 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -43,19 +43,13 @@ import sys import urllib from six.moves.urllib import parse as urlparse +from prometheus_client import Counter + logger = logging.getLogger(__name__) outbound_logger = logging.getLogger("synapse.http.outbound") -metrics = synapse.metrics.get_metrics_for(__name__) - -outgoing_requests_counter = metrics.register_counter( - "requests", - labels=["method"], -) -incoming_responses_counter = metrics.register_counter( - "responses", - labels=["method", "code"], -) +outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests", "", ["method"]) +incoming_responses_counter = Counter("synapse_http_matrixfederationclient_responses", "", ["method", "code"]) MAX_LONG_RETRIES = 10 diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e3b831db67..973ba6506f 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -18,14 +18,13 @@ import functools import time import gc import platform +import attr -from twisted.internet import reactor +from prometheus_client import Gauge, Histogram, Counter +from prometheus_client.core import ( + GaugeMetricFamily, CounterMetricFamily, REGISTRY) -from .metric import ( - CounterMetric, CallbackMetric, DistributionMetric, CacheMetric, - MemoryUsageMetric, GaugeMetric, -) -from .process_collector import register_process_collector +from twisted.internet import reactor logger = logging.getLogger(__name__) @@ -34,149 +33,94 @@ logger = logging.getLogger(__name__) running_on_pypy = platform.python_implementation() == 'PyPy' all_metrics = [] all_collectors = [] +all_gauges = {} +@attr.s(hash=True) +class LaterGauge(object): -class Metrics(object): - """ A single Metrics object gives a (mutable) slice view of the all_metrics - dict, allowing callers to easily register new metrics that are namespaced - nicely.""" - - def __init__(self, name): - self.name_prefix = name - - def make_subspace(self, name): - return Metrics("%s_%s" % (self.name_prefix, name)) - - def register_collector(self, func): - all_collectors.append(func) - - def _register(self, metric_class, name, *args, **kwargs): - full_name = "%s_%s" % (self.name_prefix, name) - - metric = metric_class(full_name, *args, **kwargs) - - all_metrics.append(metric) - return metric - - def register_counter(self, *args, **kwargs): - """ - Returns: - CounterMetric - """ - return self._register(CounterMetric, *args, **kwargs) - - def register_gauge(self, *args, **kwargs): - """ - Returns: - GaugeMetric - """ - return self._register(GaugeMetric, *args, **kwargs) + name = attr.ib() + desc = attr.ib() + labels = attr.ib(hash=False) + caller = attr.ib() - def register_callback(self, *args, **kwargs): - """ - Returns: - CallbackMetric - """ - return self._register(CallbackMetric, *args, **kwargs) + def collect(self): - def register_distribution(self, *args, **kwargs): - """ - Returns: - DistributionMetric - """ - return self._register(DistributionMetric, *args, **kwargs) - - def register_cache(self, *args, **kwargs): - """ - Returns: - CacheMetric - """ - return self._register(CacheMetric, *args, **kwargs) + g = GaugeMetricFamily(self.name, self.desc, self.labels) + try: + calls = self.caller() + except Exception as e: + print(e) + logger.err() + yield g -def register_memory_metrics(hs): - try: - import psutil - process = psutil.Process() - process.memory_info().rss - except (ImportError, AttributeError): - logger.warn( - "psutil is not installed or incorrect version." - " Disabling memory metrics." - ) - return - metric = MemoryUsageMetric(hs, psutil) - all_metrics.append(metric) + if isinstance(calls, dict): + for k, v in calls.items(): + g.add_metric(k, v) + else: + g.add_metric([], calls) + yield g -def get_metrics_for(pkg_name): - """ Returns a Metrics instance for conveniently creating metrics - namespaced with the given name prefix. """ + def register(self): + if self.name in all_gauges.keys(): + REGISTRY.unregister(all_gauges.pop(self.name)) - # Convert a "package.name" to "package_name" because Prometheus doesn't - # let us use . in metric names - return Metrics(pkg_name.replace(".", "_")) + REGISTRY.register(self) + all_gauges[self.name] = self -def render_all(): - strs = [] +# +# Python GC metrics +# - for collector in all_collectors: - collector() +gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"]) +gc_time = Histogram("python_gc_time", "Time taken to GC (ms)", ["gen"], buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000]) - for metric in all_metrics: - try: - strs += metric.render() - except Exception: - strs += ["# FAILED to render"] - logger.exception("Failed to render metric") +class GCCounts(object): + def collect(self): + gc_counts = gc.get_count() - strs.append("") # to generate a final CRLF + cm = GaugeMetricFamily("python_gc_counts", "GC cycle counts", labels=["gen"]) + for n, m in enumerate(gc.get_count()): + cm.add_metric([str(n)], m) - return "\n".join(strs) + yield cm +REGISTRY.register(GCCounts()) -register_process_collector(get_metrics_for("process")) +# +# Twisted reactor metrics +# +tick_time = Histogram("python_twisted_reactor_tick_time", "Tick time of the Twisted reactor (ms)", buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000]) +pending_calls_metric = Histogram("python_twisted_reactor_pending_calls", "Pending calls", buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000]) -python_metrics = get_metrics_for("python") +# +# Federation Metrics +# -gc_time = python_metrics.register_distribution("gc_time", labels=["gen"]) -gc_unreachable = python_metrics.register_counter("gc_unreachable_total", labels=["gen"]) -python_metrics.register_callback( - "gc_counts", lambda: {(i,): v for i, v in enumerate(gc.get_count())}, labels=["gen"] -) +sent_edus_counter = Counter("synapse_federation_client_sent_edus", "") -reactor_metrics = get_metrics_for("python.twisted.reactor") -tick_time = reactor_metrics.register_distribution("tick_time") -pending_calls_metric = reactor_metrics.register_distribution("pending_calls") +sent_transactions_counter = Counter("synapse_federation_client_sent_transactions", "") -synapse_metrics = get_metrics_for("synapse") +events_processed_counter = Counter("synapse_federation_client_events_processed", "") # Used to track where various components have processed in the event stream, # e.g. federation sending, appservice sending, etc. -event_processing_positions = synapse_metrics.register_gauge( - "event_processing_positions", labels=["name"], -) +event_processing_positions = Gauge("synapse_event_processing_positions", "", ["name"]) # Used to track the current max events stream position -event_persisted_position = synapse_metrics.register_gauge( - "event_persisted_position", -) +event_persisted_position = Gauge("synapse_event_persisted_position", "") # Used to track the received_ts of the last event processed by various # components -event_processing_last_ts = synapse_metrics.register_gauge( - "event_processing_last_ts", labels=["name"], -) +event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name"]) # Used to track the lag processing events. This is the time difference # between the last processed event's received_ts and the time it was # finished being processed. -event_processing_lag = synapse_metrics.register_gauge( - "event_processing_lag", labels=["name"], -) - +event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"]) def runUntilCurrentTimer(func): @@ -206,8 +150,8 @@ def runUntilCurrentTimer(func): # since about 25% of time is actually spent running things triggered by # I/O events, but that is harder to capture without rewriting half the # reactor. - tick_time.inc_by(end - start) - pending_calls_metric.inc_by(num_pending) + tick_time.observe(end - start) + pending_calls_metric.observe(num_pending) if running_on_pypy: return ret @@ -224,8 +168,8 @@ def runUntilCurrentTimer(func): unreachable = gc.collect(i) end = time.time() * 1000 - gc_time.inc_by(end - start, i) - gc_unreachable.inc_by(unreachable, i) + gc_time.labels(i).observe(end - start) + gc_unreachable.labels(i).set(unreachable) return ret diff --git a/synapse/notifier.py b/synapse/notifier.py index 8355c7d621..123e6f1840 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -28,22 +28,19 @@ from synapse.util.logcontext import PreserveLoggingContext, run_in_background from synapse.util.metrics import Measure from synapse.types import StreamToken from synapse.visibility import filter_events_for_client -import synapse.metrics +from synapse.metrics import LaterGauge from collections import namedtuple +from prometheus_client import Counter import logging logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) +notified_events_counter = Counter("synapse_notifier_notified_events", "") -notified_events_counter = metrics.register_counter("notified_events") - -users_woken_by_stream_counter = metrics.register_counter( - "users_woken_by_stream", labels=["stream"] -) +users_woken_by_stream_counter = Counter("synapse_notifier_users_woken_by_stream", "", ["stream"]) # TODO(paul): Should be shared somewhere @@ -108,7 +105,7 @@ class _NotifierUserStream(object): self.last_notified_ms = time_now_ms noify_deferred = self.notify_deferred - users_woken_by_stream_counter.inc(stream_key) + users_woken_by_stream_counter.labels(stream_key).inc() with PreserveLoggingContext(): self.notify_deferred = ObservableDeferred(defer.Deferred()) @@ -197,14 +194,14 @@ class Notifier(object): all_user_streams.add(x) return sum(stream.count_listeners() for stream in all_user_streams) - metrics.register_callback("listeners", count_listeners) + LaterGauge("listeners", "", [], count_listeners) - metrics.register_callback( - "rooms", + LaterGauge( + "rooms", "", [], lambda: count(bool, self.room_to_user_streams.values()), ) - metrics.register_callback( - "users", + LaterGauge( + "users", "", [], lambda: len(self.user_to_user_stream), ) diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index b077e1a446..e22088ad6f 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -20,22 +20,17 @@ from twisted.internet.error import AlreadyCalled, AlreadyCancelled from . import push_rule_evaluator from . import push_tools -import synapse from synapse.push import PusherConfigException from synapse.util.logcontext import LoggingContext from synapse.util.metrics import Measure -logger = logging.getLogger(__name__) +from prometheus_client import Counter -metrics = synapse.metrics.get_metrics_for(__name__) +logger = logging.getLogger(__name__) -http_push_processed_counter = metrics.register_counter( - "http_pushes_processed", -) +http_push_processed_counter = Counter("http_pushes_processed", "") -http_push_failed_counter = metrics.register_counter( - "http_pushes_failed", -) +http_push_failed_counter = Counter("http_pushes_failed", "") class HttpPusher(object): diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 3601f2d365..c3e6c5c258 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -150,7 +150,7 @@ class PushRuleEvaluatorForEvent(object): # Caches (glob, word_boundary) -> regex for push. See _glob_matches regex_cache = LruCache(50000 * CACHE_SIZE_FACTOR) -register_cache("regex_push_cache", regex_cache) +register_cache("cache", "regex_push_cache", regex_cache) def _glob_matches(glob, value, word_boundary=False): diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index 216db4d164..478c497722 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -56,6 +56,7 @@ REQUIREMENTS = { "msgpack-python>=0.3.0": ["msgpack"], "phonenumbers>=8.2.0": ["phonenumbers"], "six": ["six"], + "prometheus_client": ["prometheus_client"], } CONDITIONAL_REQUIREMENTS = { "web_client": { diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index d7d38464b2..5848f57c5e 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -60,19 +60,19 @@ from .commands import ( ) from .streams import STREAMS_MAP +from synapse.metrics import LaterGauge from synapse.util.stringutils import random_string -from synapse.metrics.metric import CounterMetric + +from prometheus_client import Counter + +from collections import defaultdict import logging -import synapse.metrics import struct import fcntl - -metrics = synapse.metrics.get_metrics_for(__name__) - -connection_close_counter = metrics.register_counter( - "close_reason", labels=["reason_type"], +connection_close_counter = Counter( + "synapse_replication_tcp_protocol_close_reason", "", ["reason_type"], ) @@ -136,12 +136,8 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): # The LoopingCall for sending pings. self._send_ping_loop = None - self.inbound_commands_counter = CounterMetric( - "inbound_commands", labels=["command"], - ) - self.outbound_commands_counter = CounterMetric( - "outbound_commands", labels=["command"], - ) + self.inbound_commands_counter = defaultdict(int) + self.outbound_commands_counter = defaultdict(int) def connectionMade(self): logger.info("[%s] Connection established", self.id()) @@ -201,7 +197,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): self.last_received_command = self.clock.time_msec() - self.inbound_commands_counter.inc(cmd_name) + self.inbound_commands_counter[cmd_name] = self.inbound_commands_counter[cmd_name] + 1 cmd_cls = COMMAND_MAP[cmd_name] try: @@ -251,8 +247,7 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): self._queue_command(cmd) return - self.outbound_commands_counter.inc(cmd.NAME) - + self.outbound_commands_counter[cmd.NAME] = self.outbound_commands_counter[cmd.NAME] + 1 string = "%s %s" % (cmd.NAME, cmd.to_line(),) if "\n" in string: raise Exception("Unexpected newline in command: %r", string) @@ -317,9 +312,9 @@ class BaseReplicationStreamProtocol(LineOnlyReceiver): def connectionLost(self, reason): logger.info("[%s] Replication connection closed: %r", self.id(), reason) if isinstance(reason, Failure): - connection_close_counter.inc(reason.type.__name__) + connection_close_counter.labels(reason.type.__name__).inc() else: - connection_close_counter.inc(reason.__class__.__name__) + connection_close_counter.labels(reason.__class__.__name__).inc() try: # Remove us from list of connections to be monitored @@ -566,14 +561,12 @@ class ClientReplicationStreamProtocol(BaseReplicationStreamProtocol): # The following simply registers metrics for the replication connections -metrics.register_callback( - "pending_commands", +pending_commands = LaterGauge( + "pending_commands", "", ["name", "conn_id"], lambda: { (p.name, p.conn_id): len(p.pending_commands) for p in connected_connections - }, - labels=["name", "conn_id"], -) + }) def transport_buffer_size(protocol): @@ -583,14 +576,12 @@ def transport_buffer_size(protocol): return 0 -metrics.register_callback( - "transport_send_buffer", +transport_send_buffer = LaterGauge( + "synapse_replication_tcp_transport_send_buffer", "", ["name", "conn_id"], lambda: { (p.name, p.conn_id): transport_buffer_size(p) for p in connected_connections - }, - labels=["name", "conn_id"], -) + }) def transport_kernel_read_buffer_size(protocol, read=True): @@ -608,48 +599,37 @@ def transport_kernel_read_buffer_size(protocol, read=True): return 0 -metrics.register_callback( - "transport_kernel_send_buffer", +tcp_transport_kernel_send_buffer = LaterGauge( + "synapse_replication_tcp_transport_kernel_send_buffer", "", ["name", "conn_id"], lambda: { (p.name, p.conn_id): transport_kernel_read_buffer_size(p, False) for p in connected_connections - }, - labels=["name", "conn_id"], -) + }) -metrics.register_callback( - "transport_kernel_read_buffer", +tcp_transport_kernel_read_buffer = LaterGauge( + "synapse_replication_tcp_transport_kernel_read_buffer", "", ["name", "conn_id"], lambda: { (p.name, p.conn_id): transport_kernel_read_buffer_size(p, True) for p in connected_connections - }, - labels=["name", "conn_id"], -) + }) -metrics.register_callback( - "inbound_commands", +tcp_inbound_commands = LaterGauge( + "synapse_replication_tcp_inbound_commands", "", ["command", "name", "conn_id"], lambda: { (k[0], p.name, p.conn_id): count for p in connected_connections - for k, count in p.inbound_commands_counter.counts.iteritems() - }, - labels=["command", "name", "conn_id"], -) + for k, count in p.inbound_commands_counter.items() + }) -metrics.register_callback( - "outbound_commands", +tcp_outbound_commands = LaterGauge( + "synapse_replication_tcp_outbound_commands", "", ["command", "name", "conn_id"], lambda: { (k[0], p.name, p.conn_id): count for p in connected_connections - for k, count in p.outbound_commands_counter.counts.iteritems() - }, - labels=["command", "name", "conn_id"], -) + for k, count in p.outbound_commands_counter.items() + }) # number of updates received for each RDATA stream -inbound_rdata_count = metrics.register_counter( - "inbound_rdata_count", - labels=["stream_name"], -) +inbound_rdata_count = Counter("synapse_replication_tcp_inbound_rdata_count", "", ["stream_name"]) diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 4adae96681..438dcddf55 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -13,29 +13,52 @@ # See the License for the specific language governing permissions and # limitations under the License. -import synapse.metrics +from prometheus_client.core import GaugeMetricFamily, REGISTRY + import os CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5)) -metrics = synapse.metrics.get_metrics_for("synapse.util.caches") - caches_by_name = {} -# cache_counter = metrics.register_cache( -# "cache", -# lambda: {(name,): len(caches_by_name[name]) for name in caches_by_name.keys()}, -# labels=["name"], -# ) - - -def register_cache(name, cache): - caches_by_name[name] = cache - return metrics.register_cache( - "cache", - lambda: len(cache), - name, - ) +collectors_by_name = {} + +def register_cache(name, cache_name, cache): + + # Check if the metric is already registered. Unregister it, if so. + metric_name = "synapse_util_caches_%s:%s" % (name, cache_name,) + if metric_name in collectors_by_name.keys(): + REGISTRY.unregister(collectors_by_name[metric_name]) + + class CacheMetric(object): + + hits = 0 + misses = 0 + evicted_size = 0 + + def inc_hits(self): + self.hits += 1 + + def inc_misses(self): + self.misses += 1 + + def inc_evictions(self, size=1): + self.evicted_size += size + + def collect(self): + cache_size = len(cache) + + gm = GaugeMetricFamily(metric_name, "", labels=["size", "hits", "misses", "total"]) + gm.add_metric(["size"], cache_size) + gm.add_metric(["hits"], self.hits) + gm.add_metric(["misses"], self.misses) + gm.add_metric(["total"], self.hits + self.misses) + yield gm + metric = CacheMetric() + REGISTRY.register(metric) + caches_by_name[cache_name] = cache + collectors_by_name[metric_name] = metric + return metric KNOWN_KEYS = { key: key for key in diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 68285a7594..a4188eb099 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -80,7 +80,7 @@ class Cache(object): self.name = name self.keylen = keylen self.thread = None - self.metrics = register_cache(name, self.cache) + self.metrics = register_cache("descriptor", name, self.cache) def _on_evicted(self, evicted_count): self.metrics.inc_evictions(evicted_count) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 1709e8b429..bdc21e348f 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -55,7 +55,7 @@ class DictionaryCache(object): __slots__ = [] self.sentinel = Sentinel() - self.metrics = register_cache(name, self.cache) + self.metrics = register_cache("dictionary", name, self.cache) def check_thread(self): expected_thread = self.thread diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 0aa103eecb..ff04c91955 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -52,12 +52,12 @@ class ExpiringCache(object): self._cache = OrderedDict() - self.metrics = register_cache(cache_name, self) - self.iterable = iterable self._size_estimate = 0 + self.metrics = register_cache("expiring", cache_name, self) + def start(self): if not self._expiry_ms: # Don't bother starting the loop if things never expire diff --git a/synapse/util/caches/response_cache.py b/synapse/util/caches/response_cache.py index 7f79333e96..a8491b42d5 100644 --- a/synapse/util/caches/response_cache.py +++ b/synapse/util/caches/response_cache.py @@ -17,7 +17,7 @@ import logging from twisted.internet import defer from synapse.util.async import ObservableDeferred -from synapse.util.caches import metrics as cache_metrics +from synapse.util.caches import register_cache from synapse.util.logcontext import make_deferred_yieldable, run_in_background logger = logging.getLogger(__name__) @@ -38,15 +38,16 @@ class ResponseCache(object): self.timeout_sec = timeout_ms / 1000. self._name = name - self._metrics = cache_metrics.register_cache( - "response_cache", - size_callback=lambda: self.size(), - cache_name=name, + self._metrics = register_cache( + "response_cache", name, self ) def size(self): return len(self.pending_result_cache) + def __len__(self): + return self.size() + def get(self, key): """Look up the given key. diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 941d873ab8..a7fe0397fa 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -38,7 +38,7 @@ class StreamChangeCache(object): self._cache = sorteddict() self._earliest_known_stream_pos = current_stream_pos self.name = name - self.metrics = register_cache(self.name, self._cache) + self.metrics = register_cache("cache", self.name, self._cache) for entity, stream_pos in prefilled_cache.items(): self.entity_has_changed(entity, stream_pos) diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index e4b5687a4b..a964286d85 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -15,8 +15,8 @@ from twisted.internet import defer +from prometheus_client import Counter from synapse.util.logcontext import LoggingContext -import synapse.metrics from functools import wraps import logging @@ -24,66 +24,21 @@ import logging logger = logging.getLogger(__name__) +block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"]) -metrics = synapse.metrics.get_metrics_for(__name__) - -# total number of times we have hit this block -block_counter = metrics.register_counter( - "block_count", - labels=["block_name"], - alternative_names=( - # the following are all deprecated aliases for the same metric - metrics.name_prefix + x for x in ( - "_block_timer:count", - "_block_ru_utime:count", - "_block_ru_stime:count", - "_block_db_txn_count:count", - "_block_db_txn_duration:count", - ) - ) -) - -block_timer = metrics.register_counter( - "block_time_seconds", - labels=["block_name"], - alternative_names=( - metrics.name_prefix + "_block_timer:total", - ), -) - -block_ru_utime = metrics.register_counter( - "block_ru_utime_seconds", labels=["block_name"], - alternative_names=( - metrics.name_prefix + "_block_ru_utime:total", - ), -) - -block_ru_stime = metrics.register_counter( - "block_ru_stime_seconds", labels=["block_name"], - alternative_names=( - metrics.name_prefix + "_block_ru_stime:total", - ), -) - -block_db_txn_count = metrics.register_counter( - "block_db_txn_count", labels=["block_name"], - alternative_names=( - metrics.name_prefix + "_block_db_txn_count:total", - ), -) +block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"]) + +block_ru_utime = Counter("synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"]) + +block_ru_stime = Counter("synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"]) + +block_db_txn_count = Counter("synapse_util_metrics_block_db_txn_count", "", ["block_name"]) # seconds spent waiting for db txns, excluding scheduling time, in this block -block_db_txn_duration = metrics.register_counter( - "block_db_txn_duration_seconds", labels=["block_name"], - alternative_names=( - metrics.name_prefix + "_block_db_txn_duration:total", - ), -) +block_db_txn_duration = Counter("synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"]) # seconds spent waiting for a db connection, in this block -block_db_sched_duration = metrics.register_counter( - "block_db_sched_duration_seconds", labels=["block_name"], -) +block_db_sched_duration = Counter("synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"]) def measure_func(name): @@ -132,8 +87,8 @@ class Measure(object): duration = self.clock.time_msec() - self.start - block_counter.inc(self.name) - block_timer.inc_by(duration, self.name) + block_counter.labels(self.name).inc() + block_timer.labels(self.name).inc(duration) context = LoggingContext.current_context() @@ -150,19 +105,13 @@ class Measure(object): ru_utime, ru_stime = context.get_resource_usage() - block_ru_utime.inc_by(ru_utime - self.ru_utime, self.name) - block_ru_stime.inc_by(ru_stime - self.ru_stime, self.name) - block_db_txn_count.inc_by( - context.db_txn_count - self.db_txn_count, self.name - ) - block_db_txn_duration.inc_by( - (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., - self.name - ) - block_db_sched_duration.inc_by( - (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000., - self.name - ) + block_ru_utime.labels(self.name).inc(ru_utime - self.ru_utime) + block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime) + block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count) + block_db_txn_duration.labels(self.name).inc( + (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.) + block_db_sched_duration.labels(self.name).inc( + (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) diff --git a/tests/__init__.py b/tests/__init__.py index bfebb0f644..aab20e8e02 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -12,3 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from twisted.trial import util +util.DEFAULT_TIMEOUT_DURATION = 10 -- cgit 1.5.1 From fcc525b0b705703fead3d703c0df62a264ff8ce8 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 21 May 2018 19:48:57 -0500 Subject: rest of the changes --- synapse/http/request_metrics.py | 106 ++++------------- synapse/http/server.py | 4 +- synapse/push/bulk_push_rule_evaluator.py | 28 ++--- synapse/replication/tcp/resource.py | 30 +++-- synapse/storage/_base.py | 17 ++- synapse/storage/events.py | 28 ++--- tests/metrics/__init__.py | 0 tests/metrics/test_metric.py | 192 ------------------------------- 8 files changed, 68 insertions(+), 337 deletions(-) delete mode 100644 tests/metrics/__init__.py delete mode 100644 tests/metrics/test_metric.py (limited to 'synapse/http') diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 8c850bf23f..34a730d5bc 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -16,86 +16,38 @@ import logging -import synapse.metrics +from prometheus_client.core import Counter, Histogram + from synapse.util.logcontext import LoggingContext logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for("synapse.http.server") # total number of responses served, split by method/servlet/tag -response_count = metrics.register_counter( - "response_count", - labels=["method", "servlet", "tag"], - alternative_names=( - # the following are all deprecated aliases for the same metric - metrics.name_prefix + x for x in ( - "_requests", - "_response_time:count", - "_response_ru_utime:count", - "_response_ru_stime:count", - "_response_db_txn_count:count", - "_response_db_txn_duration:count", - ) - ) -) +response_count = Counter("synapse_http_server_response_count", "", ["method", "servlet", "tag"]) -requests_counter = metrics.register_counter( - "requests_received", - labels=["method", "servlet", ], -) +requests_counter = Counter("synapse_http_server_requests_received", "", ["method", "servlet"]) -outgoing_responses_counter = metrics.register_counter( - "responses", - labels=["method", "code"], -) +outgoing_responses_counter = Counter("synapse_http_server_responses", "", ["method", "code"]) -response_timer = metrics.register_counter( - "response_time_seconds", - labels=["method", "servlet", "tag"], - alternative_names=( - metrics.name_prefix + "_response_time:total", - ), -) +response_timer = Histogram("synapse_http_server_response_time_seconds", "", ["method", "servlet", "tag"]) -response_ru_utime = metrics.register_counter( - "response_ru_utime_seconds", labels=["method", "servlet", "tag"], - alternative_names=( - metrics.name_prefix + "_response_ru_utime:total", - ), -) +response_ru_utime = Counter("synapse_http_server_response_ru_utime_seconds", "", ["method", "servlet", "tag"]) -response_ru_stime = metrics.register_counter( - "response_ru_stime_seconds", labels=["method", "servlet", "tag"], - alternative_names=( - metrics.name_prefix + "_response_ru_stime:total", - ), -) +response_ru_stime = Counter("synapse_http_server_response_ru_stime_seconds", "", ["method", "servlet", "tag"]) -response_db_txn_count = metrics.register_counter( - "response_db_txn_count", labels=["method", "servlet", "tag"], - alternative_names=( - metrics.name_prefix + "_response_db_txn_count:total", - ), -) +response_db_txn_count = Counter("synapse_http_server_response_db_txn_count", "", ["method", "servlet", "tag"]) # seconds spent waiting for db txns, excluding scheduling time, when processing # this request -response_db_txn_duration = metrics.register_counter( - "response_db_txn_duration_seconds", labels=["method", "servlet", "tag"], - alternative_names=( - metrics.name_prefix + "_response_db_txn_duration:total", - ), -) +response_db_txn_duration = Counter("synapse_http_server_response_db_txn_duration_seconds", "", ["method", "servlet", "tag"]) # seconds spent waiting for a db connection, when processing this request -response_db_sched_duration = metrics.register_counter( - "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"] +response_db_sched_duration = Counter("synapse_http_request_response_db_sched_duration_seconds", "", ["method", "servlet", "tag"] ) # size in bytes of the response written -response_size = metrics.register_counter( - "response_size", labels=["method", "servlet", "tag"] +response_size = Counter("synapse_http_request_response_size", "", ["method", "servlet", "tag"] ) @@ -119,31 +71,19 @@ class RequestMetrics(object): ) return - outgoing_responses_counter.inc(request.method, str(request.code)) + outgoing_responses_counter.labels(request.method, str(request.code)).inc() - response_count.inc(request.method, self.name, tag) + response_count.labels(request.method, self.name, tag).inc() - response_timer.inc_by( - time_msec - self.start, request.method, - self.name, tag - ) + response_timer.labels(request.method, self.name, tag).observe(time_msec - self.start) ru_utime, ru_stime = context.get_resource_usage() - response_ru_utime.inc_by( - ru_utime, request.method, self.name, tag - ) - response_ru_stime.inc_by( - ru_stime, request.method, self.name, tag - ) - response_db_txn_count.inc_by( - context.db_txn_count, request.method, self.name, tag - ) - response_db_txn_duration.inc_by( - context.db_txn_duration_ms / 1000., request.method, self.name, tag - ) - response_db_sched_duration.inc_by( - context.db_sched_duration_ms / 1000., request.method, self.name, tag - ) - - response_size.inc_by(request.sentLength, request.method, self.name, tag) + response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime) + response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime) + response_db_txn_count.labels(request.method, self.name, tag).inc(context.db_txn_count) + response_db_txn_duration.labels(request.method, self.name, tag).inc(context.db_txn_duration_ms / 1000.) + response_db_sched_duration.labels(request.method, self.name, tag).inc( + context.db_sched_duration_ms / 1000.) + + response_size.labels(request.method, self.name, tag).inc(request.sentLength) diff --git a/synapse/http/server.py b/synapse/http/server.py index b6e2ae14a2..f72d986288 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -138,8 +138,8 @@ def wrap_request_handler_with_logging(h): # dispatching to the handler, so that the handler # can update the servlet name in the request # metrics - requests_counter.inc(request.method, - request.request_metrics.name) + requests_counter.labels(request.method, + request.request_metrics.name).inc() yield d return wrapped_request_handler diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 7c680659b6..6fcca5e260 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -22,35 +22,29 @@ from .push_rule_evaluator import PushRuleEvaluatorForEvent from synapse.event_auth import get_user_power_level from synapse.api.constants import EventTypes, Membership -from synapse.metrics import get_metrics_for -from synapse.util.caches import metrics as cache_metrics +from synapse.util.caches import register_cache from synapse.util.caches.descriptors import cached from synapse.util.async import Linearizer from synapse.state import POWER_KEY from collections import namedtuple - +from prometheus_client import Counter logger = logging.getLogger(__name__) rules_by_room = {} -push_metrics = get_metrics_for(__name__) -push_rules_invalidation_counter = push_metrics.register_counter( - "push_rules_invalidation_counter" -) -push_rules_state_size_counter = push_metrics.register_counter( - "push_rules_state_size_counter" -) +push_rules_invalidation_counter = Counter("synapse_push_bulk_push_role_evaluator_push_rules_invalidation_counter", "") +push_rules_state_size_counter = Counter("synapse_push_bulk_push_role_evaluator_push_rules_state_size_counter", "") # Measures whether we use the fast path of using state deltas, or if we have to # recalculate from scratch -push_rules_delta_state_cache_metric = cache_metrics.register_cache( +push_rules_delta_state_cache_metric = register_cache( "cache", - size_callback=lambda: 0, # Meaningless size, as this isn't a cache that stores values - cache_name="push_rules_delta_state_cache_metric", + "push_rules_delta_state_cache_metric", + cache=[], # Meaningless size, as this isn't a cache that stores values ) @@ -64,10 +58,10 @@ class BulkPushRuleEvaluator(object): self.store = hs.get_datastore() self.auth = hs.get_auth() - self.room_push_rule_cache_metrics = cache_metrics.register_cache( + self.room_push_rule_cache_metrics = register_cache( "cache", - size_callback=lambda: 0, # There's not good value for this - cache_name="room_push_rule_cache", + "room_push_rule_cache", + cache=[], # Meaningless size, as this isn't a cache that stores values ) @defer.inlineCallbacks @@ -309,7 +303,7 @@ class RulesForRoom(object): current_state_ids = context.current_state_ids push_rules_delta_state_cache_metric.inc_misses() - push_rules_state_size_counter.inc_by(len(current_state_ids)) + push_rules_state_size_counter.inc(len(current_state_ids)) logger.debug( "Looking for member changes in %r %r", state_group, current_state_ids diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index a41af4fd6c..0e6b1957c6 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -22,20 +22,19 @@ from .streams import STREAMS_MAP, FederationStream from .protocol import ServerReplicationStreamProtocol from synapse.util.metrics import Measure, measure_func +from synapse.metrics import LaterGauge import logging -import synapse.metrics +from prometheus_client import Counter -metrics = synapse.metrics.get_metrics_for(__name__) -stream_updates_counter = metrics.register_counter( - "stream_updates", labels=["stream_name"] +stream_updates_counter = Counter("synapse_replication_tcp_resource_stream_updates", "", ["stream_name"] ) -user_sync_counter = metrics.register_counter("user_sync") -federation_ack_counter = metrics.register_counter("federation_ack") -remove_pusher_counter = metrics.register_counter("remove_pusher") -invalidate_cache_counter = metrics.register_counter("invalidate_cache") -user_ip_cache_counter = metrics.register_counter("user_ip_cache") +user_sync_counter = Counter("synapse_replication_tcp_resource_user_sync", "") +federation_ack_counter = Counter("synapse_replication_tcp_resource_federation_ack", "") +remove_pusher_counter = Counter("synapse_replication_tcp_resource_remove_pusher", "") +invalidate_cache_counter = Counter("synapse_replication_tcp_resource_invalidate_cache", "") +user_ip_cache_counter = Counter("synapse_replication_tcp_resource_user_ip_cache", "") logger = logging.getLogger(__name__) @@ -73,7 +72,8 @@ class ReplicationStreamer(object): # Current connections. self.connections = [] - metrics.register_callback("total_connections", lambda: len(self.connections)) + l = LaterGauge("synapse_replication_tcp_resource_total_connections", "", [], lambda: len(self.connections)) + l.register() # List of streams that clients can subscribe to. # We only support federation stream if federation sending hase been @@ -85,17 +85,15 @@ class ReplicationStreamer(object): self.streams_by_name = {stream.NAME: stream for stream in self.streams} - metrics.register_callback( - "connections_per_stream", + LaterGauge( + "synapse_replication_tcp_resource_connections_per_stream", "", ["stream_name"], lambda: { (stream_name,): len([ conn for conn in self.connections if stream_name in conn.replication_streams ]) for stream_name in self.streams_by_name - }, - labels=["stream_name"], - ) + }).register() self.federation_sender = None if not hs.config.send_federation: @@ -175,7 +173,7 @@ class ReplicationStreamer(object): logger.info( "Streaming: %s -> %s", stream.NAME, updates[-1][0] ) - stream_updates_counter.inc_by(len(updates), stream.NAME) + stream_updates_counter.labels(stream.NAME).inc(len(updates)) # Some streams return multiple rows with the same stream IDs, # we need to make sure they get sent out in batches. We do diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 2262776ab2..d1b625dc30 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -18,8 +18,8 @@ from synapse.api.errors import StoreError from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.caches.descriptors import Cache from synapse.storage.engines import PostgresEngine -import synapse.metrics +from prometheus_client import Histogram from twisted.internet import defer @@ -34,13 +34,10 @@ sql_logger = logging.getLogger("synapse.storage.SQL") transaction_logger = logging.getLogger("synapse.storage.txn") perf_logger = logging.getLogger("synapse.storage.TIME") +sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "") -metrics = synapse.metrics.get_metrics_for("synapse.storage") - -sql_scheduling_timer = metrics.register_distribution("schedule_time") - -sql_query_timer = metrics.register_distribution("query_time", labels=["verb"]) -sql_txn_timer = metrics.register_distribution("transaction_time", labels=["desc"]) +sql_query_timer = Histogram("synapse_storage_query_time", "", ["verb"]) +sql_txn_timer = Histogram("synapse_storage_transaction_time", "", ["desc"]) class LoggingTransaction(object): @@ -117,7 +114,7 @@ class LoggingTransaction(object): finally: msecs = (time.time() * 1000) - start sql_logger.debug("[SQL time] {%s} %f", self.name, msecs) - sql_query_timer.inc_by(msecs, sql.split()[0]) + sql_query_timer.labels(sql.split()[0]).observe(msecs) class PerformanceCounters(object): @@ -287,7 +284,7 @@ class SQLBaseStore(object): self._current_txn_total_time += duration self._txn_perf_counters.update(desc, start, end) - sql_txn_timer.inc_by(duration, desc) + sql_txn_timer.labels(desc).observe(duration) @defer.inlineCallbacks def runInteraction(self, desc, func, *args, **kwargs): @@ -349,7 +346,7 @@ class SQLBaseStore(object): def inner_func(conn, *args, **kwargs): with LoggingContext("runWithConnection") as context: sched_duration_ms = time.time() * 1000 - start_time - sql_scheduling_timer.inc_by(sched_duration_ms) + sql_scheduling_timer.observe(sched_duration_ms) current_context.add_database_scheduled(sched_duration_ms) if self.database_engine.is_connection_closed(conn): diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 05cde96afc..96b48cfdbb 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -40,30 +40,24 @@ import synapse.metrics from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 -logger = logging.getLogger(__name__) +from prometheus_client import Counter +logger = logging.getLogger(__name__) -metrics = synapse.metrics.get_metrics_for(__name__) -persist_event_counter = metrics.register_counter("persisted_events") -event_counter = metrics.register_counter( - "persisted_events_sep", labels=["type", "origin_type", "origin_entity"] -) +persist_event_counter = Counter("synapse_storage_events_persisted_events", "") +event_counter = Counter("synapse_storage_events_persisted_events_sep", "", ["type", "origin_type", "origin_entity"]) # The number of times we are recalculating the current state -state_delta_counter = metrics.register_counter( - "state_delta", -) +state_delta_counter = Counter("synapse_storage_events_state_delta", "") + # The number of times we are recalculating state when there is only a # single forward extremity -state_delta_single_event_counter = metrics.register_counter( - "state_delta_single_event", -) +state_delta_single_event_counter = Counter("synapse_storage_events_state_delta_single_event", "") + # The number of times we are reculating state when we could have resonably # calculated the delta when we calculated the state for an event we were # persisting. -state_delta_reuse_delta_counter = metrics.register_counter( - "state_delta_reuse_delta", -) +state_delta_reuse_delta_counter = Counter("synapse_storage_events_state_delta_reuse_delta", "") def encode_json(json_object): @@ -445,7 +439,7 @@ class EventsStore(EventsWorkerStore): state_delta_for_room=state_delta_for_room, new_forward_extremeties=new_forward_extremeties, ) - persist_event_counter.inc_by(len(chunk)) + persist_event_counter.inc(len(chunk)) synapse.metrics.event_persisted_position.set( chunk[-1][0].internal_metadata.stream_ordering, ) @@ -460,7 +454,7 @@ class EventsStore(EventsWorkerStore): origin_type = "remote" origin_entity = get_domain_from_id(event.sender) - event_counter.inc(event.type, origin_type, origin_entity) + event_counter.labels(event.type, origin_type, origin_entity).inc() for room_id, new_state in current_state_for_room.iteritems(): self.get_current_state_ids.prefill( diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/metrics/test_metric.py b/tests/metrics/test_metric.py deleted file mode 100644 index 069c0be762..0000000000 --- a/tests/metrics/test_metric.py +++ /dev/null @@ -1,192 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2015, 2016 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from tests import unittest - -from synapse.metrics.metric import ( - CounterMetric, CallbackMetric, DistributionMetric, CacheMetric, - _escape_label_value, -) - - -class CounterMetricTestCase(unittest.TestCase): - - def test_scalar(self): - counter = CounterMetric("scalar") - - self.assertEquals(counter.render(), [ - 'scalar 0', - ]) - - counter.inc() - - self.assertEquals(counter.render(), [ - 'scalar 1', - ]) - - counter.inc_by(2) - - self.assertEquals(counter.render(), [ - 'scalar 3' - ]) - - def test_vector(self): - counter = CounterMetric("vector", labels=["method"]) - - # Empty counter doesn't yet know what values it has - self.assertEquals(counter.render(), []) - - counter.inc("GET") - - self.assertEquals(counter.render(), [ - 'vector{method="GET"} 1', - ]) - - counter.inc("GET") - counter.inc("PUT") - - self.assertEquals(counter.render(), [ - 'vector{method="GET"} 2', - 'vector{method="PUT"} 1', - ]) - - -class CallbackMetricTestCase(unittest.TestCase): - - def test_scalar(self): - d = dict() - - metric = CallbackMetric("size", lambda: len(d)) - - self.assertEquals(metric.render(), [ - 'size 0', - ]) - - d["key"] = "value" - - self.assertEquals(metric.render(), [ - 'size 1', - ]) - - def test_vector(self): - vals = dict() - - metric = CallbackMetric("values", lambda: vals, labels=["type"]) - - self.assertEquals(metric.render(), []) - - # Keys have to be tuples, even if they're 1-element - vals[("foo",)] = 1 - vals[("bar",)] = 2 - - self.assertEquals(metric.render(), [ - 'values{type="bar"} 2', - 'values{type="foo"} 1', - ]) - - -class DistributionMetricTestCase(unittest.TestCase): - - def test_scalar(self): - metric = DistributionMetric("thing") - - self.assertEquals(metric.render(), [ - 'thing:count 0', - 'thing:total 0', - ]) - - metric.inc_by(500) - - self.assertEquals(metric.render(), [ - 'thing:count 1', - 'thing:total 500', - ]) - - def test_vector(self): - metric = DistributionMetric("queries", labels=["verb"]) - - self.assertEquals(metric.render(), []) - - metric.inc_by(300, "SELECT") - metric.inc_by(200, "SELECT") - metric.inc_by(800, "INSERT") - - self.assertEquals(metric.render(), [ - 'queries:count{verb="INSERT"} 1', - 'queries:count{verb="SELECT"} 2', - 'queries:total{verb="INSERT"} 800', - 'queries:total{verb="SELECT"} 500', - ]) - - -class CacheMetricTestCase(unittest.TestCase): - - def test_cache(self): - d = dict() - - metric = CacheMetric("cache", lambda: len(d), "cache_name") - - self.assertEquals(metric.render(), [ - 'cache:hits{name="cache_name"} 0', - 'cache:total{name="cache_name"} 0', - 'cache:size{name="cache_name"} 0', - 'cache:evicted_size{name="cache_name"} 0', - ]) - - metric.inc_misses() - d["key"] = "value" - - self.assertEquals(metric.render(), [ - 'cache:hits{name="cache_name"} 0', - 'cache:total{name="cache_name"} 1', - 'cache:size{name="cache_name"} 1', - 'cache:evicted_size{name="cache_name"} 0', - ]) - - metric.inc_hits() - - self.assertEquals(metric.render(), [ - 'cache:hits{name="cache_name"} 1', - 'cache:total{name="cache_name"} 2', - 'cache:size{name="cache_name"} 1', - 'cache:evicted_size{name="cache_name"} 0', - ]) - - metric.inc_evictions(2) - - self.assertEquals(metric.render(), [ - 'cache:hits{name="cache_name"} 1', - 'cache:total{name="cache_name"} 2', - 'cache:size{name="cache_name"} 1', - 'cache:evicted_size{name="cache_name"} 2', - ]) - - -class LabelValueEscapeTestCase(unittest.TestCase): - def test_simple(self): - string = "safjhsdlifhyskljfksdfh" - self.assertEqual(string, _escape_label_value(string)) - - def test_escape(self): - self.assertEqual( - "abc\\\"def\\nghi\\\\", - _escape_label_value("abc\"def\nghi\\"), - ) - - def test_sequence_of_escapes(self): - self.assertEqual( - "abc\\\"def\\nghi\\\\\\n", - _escape_label_value("abc\"def\nghi\\\n"), - ) -- cgit 1.5.1 From 8f5a688d420c8f6b51826c561da9094b76fbea1e Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 22 May 2018 10:56:03 -0500 Subject: cleanups, self-registration --- synapse/handlers/presence.py | 7 ++++--- synapse/http/request_metrics.py | 15 +++------------ synapse/replication/tcp/resource.py | 9 +++++---- 3 files changed, 12 insertions(+), 19 deletions(-) (limited to 'synapse/http') diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index aca822c46a..4ee87d5714 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -142,8 +142,9 @@ class PresenceHandler(object): } LaterGauge( - "user_to_current_state_size", "", [], lambda: len(self.user_to_current_state) - ).register() + "synapse_handlers_presence_user_to_current_state_size", "", [], + lambda: len(self.user_to_current_state) + ) now = self.clock.time_msec() for state in active_presence: @@ -212,7 +213,7 @@ class PresenceHandler(object): 60 * 1000, ) - LaterGauge("wheel_timer_size", "", [], lambda: len(self.wheel_timer)).register() + LaterGauge("synapse_handlers_presence_wheel_timer_size", "", [], lambda: len(self.wheel_timer)) @defer.inlineCallbacks def _on_shutdown(self): diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 0984870e7e..e7f1bfc4ae 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -71,24 +71,14 @@ in_flight_requests_db_sched_duration = Counter("synapse_http_request_in_flight_r _in_flight_requests = set() -def _collect_in_flight(): - """Called just before metrics are collected, so we use it to update all - the in flight request metrics - """ - - for rm in _in_flight_requests: - rm.update_metrics() - - -metrics.register_collector(_collect_in_flight) - - def _get_in_flight_counts(): """Returns a count of all in flight requests by (method, server_name) Returns: dict[tuple[str, str], int] """ + for rm in _in_flight_requests: + rm.update_metrics() # Map from (method, name) -> int, the number of in flight requests of that # type @@ -99,6 +89,7 @@ def _get_in_flight_counts(): return counts + LaterGauge( "synapse_http_request_metrics_in_flight_requests_count", "", ["method", "servlet"], diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index ed251c79ac..1ca30bc31a 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -73,8 +73,8 @@ class ReplicationStreamer(object): # Current connections. self.connections = [] - l = LaterGauge("synapse_replication_tcp_resource_total_connections", "", [], lambda: len(self.connections)) - l.register() + LaterGauge("synapse_replication_tcp_resource_total_connections", "", [], + lambda: len(self.connections)) # List of streams that clients can subscribe to. # We only support federation stream if federation sending hase been @@ -87,14 +87,15 @@ class ReplicationStreamer(object): self.streams_by_name = {stream.NAME: stream for stream in self.streams} LaterGauge( - "synapse_replication_tcp_resource_connections_per_stream", "", ["stream_name"], + "synapse_replication_tcp_resource_connections_per_stream", "", + ["stream_name"], lambda: { (stream_name,): len([ conn for conn in self.connections if stream_name in conn.replication_streams ]) for stream_name in self.streams_by_name - }).register() + }) self.federation_sender = None if not hs.config.send_federation: -- cgit 1.5.1 From 071206304d088aac8bd0e2fff600141dae1d09b6 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 22 May 2018 16:54:22 -0500 Subject: cleanup pep8 errors --- synapse/federation/federation_server.py | 5 +- synapse/federation/transaction_queue.py | 22 ++++-- synapse/http/request_metrics.py | 114 ++++++++++++++++++++++++-------- synapse/notifier.py | 3 +- synapse/util/caches/__init__.py | 7 +- 5 files changed, 114 insertions(+), 37 deletions(-) (limited to 'synapse/http') diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 8211273006..2d420a58a2 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -46,10 +46,13 @@ received_pdus_counter = Counter("synapse_federation_server_received_pdus", "") received_edus_counter = Counter("synapse_federation_server_received_edus", "") -received_queries_counter = Counter("synapse_federation_server_received_queries", "", ["type"]) +received_queries_counter = Counter( + "synapse_federation_server_received_queries", "", ["type"] +) class FederationServer(FederationBase): + def __init__(self, hs): super(FederationServer, self).__init__(hs) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 2049351fdd..53442688c8 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -28,7 +28,10 @@ from synapse.handlers.presence import format_user_presence_state, get_interested import synapse.metrics from synapse.metrics import LaterGauge from synapse.metrics import ( - sent_edus_counter, sent_transactions_counter, events_processed_counter) + sent_edus_counter, + sent_transactions_counter, + events_processed_counter, +) from prometheus_client import Counter @@ -37,7 +40,9 @@ import logging logger = logging.getLogger(__name__) -sent_pdus_destination_dist = Counter("synapse_federation_client_sent_pdu_destinations", "") +sent_pdus_destination_dist = Counter( + "synapse_federation_client_sent_pdu_destinations", "" +) class TransactionQueue(object): @@ -64,7 +69,10 @@ class TransactionQueue(object): # done self.pending_transactions = {} - LaterGauge("synapse_federation_client_pending_destinations", "", [], + LaterGauge( + "synapse_federation_client_pending_destinations", + "", + [], lambda: len(self.pending_transactions), ) @@ -89,11 +97,15 @@ class TransactionQueue(object): self.pending_edus_keyed_by_dest = edus_keyed = {} LaterGauge( - "synapse_federation_client_pending_pdus", "", [], + "synapse_federation_client_pending_pdus", + "", + [], lambda: sum(map(len, pdus.values())), ) LaterGauge( - "synapse_federation_client_pending_edus", "", [], + "synapse_federation_client_pending_edus", + "", + [], lambda: ( sum(map(len, edus.values())) + sum(map(len, presence.values())) diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index e7f1bfc4ae..7f11b5c5a4 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -25,47 +25,87 @@ logger = logging.getLogger(__name__) # total number of responses served, split by method/servlet/tag -response_count = Counter("synapse_http_server_response_count", "", ["method", "servlet", "tag"]) +response_count = Counter( + "synapse_http_server_response_count", "", ["method", "servlet", "tag"] +) -requests_counter = Counter("synapse_http_server_requests_received", "", ["method", "servlet"]) +requests_counter = Counter( + "synapse_http_server_requests_received", "", ["method", "servlet"] +) -outgoing_responses_counter = Counter("synapse_http_server_responses", "", ["method", "code"]) +outgoing_responses_counter = Counter( + "synapse_http_server_responses", "", ["method", "code"] +) -response_timer = Histogram("synapse_http_server_response_time_seconds", "", ["method", "servlet", "tag"]) +response_timer = Histogram( + "synapse_http_server_response_time_seconds", "", ["method", "servlet", "tag"] +) -response_ru_utime = Counter("synapse_http_server_response_ru_utime_seconds", "", ["method", "servlet", "tag"]) +response_ru_utime = Counter( + "synapse_http_server_response_ru_utime_seconds", "", ["method", "servlet", "tag"] +) -response_ru_stime = Counter("synapse_http_server_response_ru_stime_seconds", "", ["method", "servlet", "tag"]) +response_ru_stime = Counter( + "synapse_http_server_response_ru_stime_seconds", "", ["method", "servlet", "tag"] +) -response_db_txn_count = Counter("synapse_http_server_response_db_txn_count", "", ["method", "servlet", "tag"]) +response_db_txn_count = Counter( + "synapse_http_server_response_db_txn_count", "", ["method", "servlet", "tag"] +) # seconds spent waiting for db txns, excluding scheduling time, when processing # this request -response_db_txn_duration = Counter("synapse_http_server_response_db_txn_duration_seconds", "", ["method", "servlet", "tag"]) +response_db_txn_duration = Counter( + "synapse_http_server_response_db_txn_duration_seconds", + "", + ["method", "servlet", "tag"], +) # seconds spent waiting for a db connection, when processing this request -response_db_sched_duration = Counter("synapse_http_request_response_db_sched_duration_seconds", "", ["method", "servlet", "tag"] +response_db_sched_duration = Counter( + "synapse_http_request_response_db_sched_duration_seconds", + "", + ["method", "servlet", "tag"], ) # size in bytes of the response written -response_size = Counter("synapse_http_request_response_size", "", ["method", "servlet", "tag"] +response_size = Counter( + "synapse_http_request_response_size", "", ["method", "servlet", "tag"] ) # In flight metrics are incremented while the requests are in flight, rather # than when the response was written. -in_flight_requests_ru_utime = Counter("synapse_http_request_in_flight_requests_ru_utime_seconds", "", ["method", "servlet"]) +in_flight_requests_ru_utime = Counter( + "synapse_http_request_in_flight_requests_ru_utime_seconds", + "", + ["method", "servlet"], +) -in_flight_requests_ru_stime = Counter("synapse_http_request_in_flight_requests_ru_stime_seconds", "", ["method", "servlet"]) +in_flight_requests_ru_stime = Counter( + "synapse_http_request_in_flight_requests_ru_stime_seconds", + "", + ["method", "servlet"], +) -in_flight_requests_db_txn_count = Counter("synapse_http_request_in_flight_requests_db_txn_count", "", ["method", "servlet"]) +in_flight_requests_db_txn_count = Counter( + "synapse_http_request_in_flight_requests_db_txn_count", "", ["method", "servlet"] +) # seconds spent waiting for db txns, excluding scheduling time, when processing # this request -in_flight_requests_db_txn_duration = Counter("synapse_http_request_in_flight_requests_db_txn_duration_seconds", "", ["method", "servlet"]) +in_flight_requests_db_txn_duration = Counter( + "synapse_http_request_in_flight_requests_db_txn_duration_seconds", + "", + ["method", "servlet"], +) # seconds spent waiting for a db connection, when processing this request -in_flight_requests_db_sched_duration = Counter("synapse_http_request_in_flight_requests_db_sched_duration_seconds", "", ["method", "servlet"]) +in_flight_requests_db_sched_duration = Counter( + "synapse_http_request_in_flight_requests_db_sched_duration_seconds", + "", + ["method", "servlet"], +) # The set of all in flight requests, set[RequestMetrics] _in_flight_requests = set() @@ -91,9 +131,10 @@ def _get_in_flight_counts(): LaterGauge( - "synapse_http_request_metrics_in_flight_requests_count", "", + "synapse_http_request_metrics_in_flight_requests_count", + "", ["method", "servlet"], - _get_in_flight_counts + _get_in_flight_counts, ) @@ -128,16 +169,23 @@ class RequestMetrics(object): response_count.labels(request.method, self.name, tag).inc() - response_timer.labels(request.method, self.name, tag).observe(time_msec - self.start) + response_timer.labels(request.method, self.name, tag).observe( + time_msec - self.start + ) ru_utime, ru_stime = context.get_resource_usage() response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime) response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime) - response_db_txn_count.labels(request.method, self.name, tag).inc(context.db_txn_count) - response_db_txn_duration.labels(request.method, self.name, tag).inc(context.db_txn_duration_ms / 1000.) + response_db_txn_count.labels(request.method, self.name, tag).inc( + context.db_txn_count + ) + response_db_txn_duration.labels(request.method, self.name, tag).inc( + context.db_txn_duration_ms / 1000. + ) response_db_sched_duration.labels(request.method, self.name, tag).inc( - context.db_sched_duration_ms / 1000.) + context.db_sched_duration_ms / 1000. + ) response_size.labels(request.method, self.name, tag).inc(request.sentLength) @@ -154,11 +202,17 @@ class RequestMetrics(object): in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime) in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime) - in_flight_requests_db_txn_count.labels(self.method, self.name).inc(diff.db_txn_count) + in_flight_requests_db_txn_count.labels(self.method, self.name).inc( + diff.db_txn_count + ) - in_flight_requests_db_txn_duration.labels(self.method, self.name).inc(diff.db_txn_duration_ms / 1000.) + in_flight_requests_db_txn_duration.labels(self.method, self.name).inc( + diff.db_txn_duration_ms / 1000. + ) - in_flight_requests_db_sched_duration.labels(self.method, self.name).inc(diff.db_sched_duration_ms / 1000.) + in_flight_requests_db_sched_duration.labels(self.method, self.name).inc( + diff.db_sched_duration_ms / 1000. + ) class _RequestStats(object): @@ -166,12 +220,16 @@ class _RequestStats(object): """ __slots__ = [ - "ru_utime", "ru_stime", - "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "ru_utime", + "ru_stime", + "db_txn_count", + "db_txn_duration_ms", + "db_sched_duration_ms", ] - def __init__(self, ru_utime, ru_stime, db_txn_count, - db_txn_duration_ms, db_sched_duration_ms): + def __init__( + self, ru_utime, ru_stime, db_txn_count, db_txn_duration_ms, db_sched_duration_ms + ): self.ru_utime = ru_utime self.ru_stime = ru_stime self.db_txn_count = db_txn_count diff --git a/synapse/notifier.py b/synapse/notifier.py index 40cc553918..6dce20a284 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -40,7 +40,8 @@ logger = logging.getLogger(__name__) notified_events_counter = Counter("synapse_notifier_notified_events", "") -users_woken_by_stream_counter = Counter("synapse_notifier_users_woken_by_stream", "", ["stream"]) +users_woken_by_stream_counter = Counter( + "synapse_notifier_users_woken_by_stream", "", ["stream"]) # TODO(paul): Should be shared somewhere diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 1c511a7072..e0c22df249 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -29,13 +29,16 @@ cache_total = Gauge("synapse_util_caches_cache:total", "", ["name"]) response_cache_size = Gauge("synapse_util_caches_response_cache:size", "", ["name"]) response_cache_hits = Gauge("synapse_util_caches_response_cache:hits", "", ["name"]) -response_cache_evicted = Gauge("synapse_util_caches_response_cache:evicted_size", "", ["name"]) +response_cache_evicted = Gauge( + "synapse_util_caches_response_cache:evicted_size", "", ["name"] +) response_cache_total = Gauge("synapse_util_caches_response_cache:total", "", ["name"]) + def register_cache(cache_type, cache_name, cache): # Check if the metric is already registered. Unregister it, if so. - metric_name = "cache_%s_%s" % (cache_type, cache_name,) + metric_name = "cache_%s_%s" % (cache_type, cache_name) if metric_name in collectors_by_name.keys(): REGISTRY.unregister(collectors_by_name[metric_name]) -- cgit 1.5.1 From 53cc2cde1f609ec34a4ce6a7c678302c65ddfe53 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 22 May 2018 17:32:57 -0500 Subject: cleanup --- synapse/federation/send_queue.py | 5 ++--- synapse/handlers/appservice.py | 9 ++++++--- synapse/handlers/presence.py | 12 ++++++++---- synapse/http/client.py | 3 ++- synapse/http/matrixfederationclient.py | 6 ++++-- synapse/metrics/__init__.py | 28 ++++++++++++++++++++++------ synapse/push/bulk_push_rule_evaluator.py | 6 ++++-- synapse/storage/events.py | 9 ++++++--- synapse/util/metrics.py | 15 ++++++++++----- 9 files changed, 64 insertions(+), 29 deletions(-) (limited to 'synapse/http') diff --git a/synapse/federation/send_queue.py b/synapse/federation/send_queue.py index e6e1888f3a..c7ed465617 100644 --- a/synapse/federation/send_queue.py +++ b/synapse/federation/send_queue.py @@ -74,9 +74,8 @@ class FederationRemoteSendQueue(object): # lambda binds to the queue rather than to the name of the queue which # changes. ARGH. def register(name, queue): - LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,), "", - lambda: len(queue), - ) + LaterGauge("synapse_federation_send_queue_%s_size" % (queue_name,), + "", lambda: len(queue)) for queue_name in [ "presence_map", "presence_changed", "keyed_edu", "keyed_edu_changed", diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index a7345331af..d9f35a5dba 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -127,12 +127,15 @@ class ApplicationServicesHandler(object): now = self.clock.time_msec() ts = yield self.store.get_received_ts(events[-1].event_id) - synapse.metrics.event_processing_positions.labels("appservice_sender").set(upper_bound) + synapse.metrics.event_processing_positions.labels( + "appservice_sender").set(upper_bound) events_processed_counter.inc(len(events)) - synapse.metrics.event_processing_lag.labels("appservice_sender").set(now - ts) - synapse.metrics.event_processing_last_ts.labels("appservice_sender").set(ts) + synapse.metrics.event_processing_lag.labels( + "appservice_sender").set(now - ts) + synapse.metrics.event_processing_last_ts.labels( + "appservice_sender").set(ts) finally: self.is_processing = False diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 4ee87d5714..12939aa507 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -47,7 +47,8 @@ logger = logging.getLogger(__name__) notified_presence_counter = Counter("synapse_handler_presence_notified_presence", "") -federation_presence_out_counter = Counter("synapse_handler_presence_federation_presence_out", "") +federation_presence_out_counter = Counter( + "synapse_handler_presence_federation_presence_out", "") presence_updates_counter = Counter("synapse_handler_presence_presence_updates", "") timers_fired_counter = Counter("synapse_handler_presence_timers_fired", "") federation_presence_counter = Counter("synapse_handler_presence_federation_presence", "") @@ -55,8 +56,10 @@ bump_active_time_counter = Counter("synapse_handler_presence_bump_active_time", get_updates_counter = Counter("synapse_handler_presence_get_updates", "", ["type"]) -notify_reason_counter = Counter("synapse_handler_presence_notify_reason", "", ["reason"]) -state_transition_counter = Counter("synapse_handler_presence_state_transition", "", ["from", "to"] +notify_reason_counter = Counter( + "synapse_handler_presence_notify_reason", "", ["reason"]) +state_transition_counter = Counter( + "synapse_handler_presence_state_transition", "", ["from", "to"] ) @@ -213,7 +216,8 @@ class PresenceHandler(object): 60 * 1000, ) - LaterGauge("synapse_handlers_presence_wheel_timer_size", "", [], lambda: len(self.wheel_timer)) + LaterGauge("synapse_handlers_presence_wheel_timer_size", "", [], + lambda: len(self.wheel_timer)) @defer.inlineCallbacks def _on_shutdown(self): diff --git a/synapse/http/client.py b/synapse/http/client.py index 61a1d2e2b3..4d4eee3d64 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -50,7 +50,8 @@ import urllib logger = logging.getLogger(__name__) outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"]) -incoming_responses_counter = Counter("synapse_http_client_responses", "", ["method", "code"]) +incoming_responses_counter = Counter("synapse_http_client_responses", "", + ["method", "code"]) class SimpleHttpClient(object): diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 259d3884e2..77eaa06a1a 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -48,8 +48,10 @@ from prometheus_client import Counter logger = logging.getLogger(__name__) outbound_logger = logging.getLogger("synapse.http.outbound") -outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests", "", ["method"]) -incoming_responses_counter = Counter("synapse_http_matrixfederationclient_responses", "", ["method", "code"]) +outgoing_requests_counter = Counter("synapse_http_matrixfederationclient_requests", + "", ["method"]) +incoming_responses_counter = Counter("synapse_http_matrixfederationclient_responses", + "", ["method", "code"]) MAX_LONG_RETRIES = 10 diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 38408efb54..bed37b5f56 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -21,15 +21,14 @@ import platform import attr from prometheus_client import Gauge, Histogram, Counter -from prometheus_client.core import ( - GaugeMetricFamily, CounterMetricFamily, REGISTRY) +from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, REGISTRY from twisted.internet import reactor logger = logging.getLogger(__name__) -running_on_pypy = platform.python_implementation() == 'PyPy' +running_on_pypy = platform.python_implementation() == "PyPy" all_metrics = [] all_collectors = [] all_gauges = {} @@ -87,9 +86,16 @@ class LaterGauge(object): # gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"]) -gc_time = Histogram("python_gc_time", "Time taken to GC (ms)", ["gen"], buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000]) +gc_time = Histogram( + "python_gc_time", + "Time taken to GC (ms)", + ["gen"], + buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000], +) + class GCCounts(object): + def collect(self): gc_counts = gc.get_count() @@ -99,14 +105,23 @@ class GCCounts(object): yield cm + REGISTRY.register(GCCounts()) # # Twisted reactor metrics # -tick_time = Histogram("python_twisted_reactor_tick_time", "Tick time of the Twisted reactor (ms)", buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000]) -pending_calls_metric = Histogram("python_twisted_reactor_pending_calls", "Pending calls", buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000]) +tick_time = Histogram( + "python_twisted_reactor_tick_time", + "Tick time of the Twisted reactor (ms)", + buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000], +) +pending_calls_metric = Histogram( + "python_twisted_reactor_pending_calls", + "Pending calls", + buckets=[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000], +) # # Federation Metrics @@ -134,6 +149,7 @@ event_processing_last_ts = Gauge("synapse_event_processing_last_ts", "", ["name" # finished being processed. event_processing_lag = Gauge("synapse_event_processing_lag", "", ["name"]) + def runUntilCurrentTimer(func): @functools.wraps(func) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 6fcca5e260..b0053e7f3f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -36,8 +36,10 @@ logger = logging.getLogger(__name__) rules_by_room = {} -push_rules_invalidation_counter = Counter("synapse_push_bulk_push_role_evaluator_push_rules_invalidation_counter", "") -push_rules_state_size_counter = Counter("synapse_push_bulk_push_role_evaluator_push_rules_state_size_counter", "") +push_rules_invalidation_counter = Counter( + "synapse_push_bulk_push_role_evaluator_push_rules_invalidation_counter", "") +push_rules_state_size_counter = Counter( + "synapse_push_bulk_push_role_evaluator_push_rules_state_size_counter", "") # Measures whether we use the fast path of using state deltas, or if we have to # recalculate from scratch diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 00d66886ad..b96104ccae 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -45,19 +45,22 @@ from prometheus_client import Counter logger = logging.getLogger(__name__) persist_event_counter = Counter("synapse_storage_events_persisted_events", "") -event_counter = Counter("synapse_storage_events_persisted_events_sep", "", ["type", "origin_type", "origin_entity"]) +event_counter = Counter("synapse_storage_events_persisted_events_sep", "", + ["type", "origin_type", "origin_entity"]) # The number of times we are recalculating the current state state_delta_counter = Counter("synapse_storage_events_state_delta", "") # The number of times we are recalculating state when there is only a # single forward extremity -state_delta_single_event_counter = Counter("synapse_storage_events_state_delta_single_event", "") +state_delta_single_event_counter = Counter( + "synapse_storage_events_state_delta_single_event", "") # The number of times we are reculating state when we could have resonably # calculated the delta when we calculated the state for an event we were # persisting. -state_delta_reuse_delta_counter = Counter("synapse_storage_events_state_delta_reuse_delta", "") +state_delta_reuse_delta_counter = Counter( + "synapse_storage_events_state_delta_reuse_delta", "") def encode_json(json_object): diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index a964286d85..424fdcb036 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -28,17 +28,22 @@ block_counter = Counter("synapse_util_metrics_block_count", "", ["block_name"]) block_timer = Counter("synapse_util_metrics_block_time_seconds", "", ["block_name"]) -block_ru_utime = Counter("synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"]) +block_ru_utime = Counter( + "synapse_util_metrics_block_ru_utime_seconds", "", ["block_name"]) -block_ru_stime = Counter("synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"]) +block_ru_stime = Counter( + "synapse_util_metrics_block_ru_stime_seconds", "", ["block_name"]) -block_db_txn_count = Counter("synapse_util_metrics_block_db_txn_count", "", ["block_name"]) +block_db_txn_count = Counter( + "synapse_util_metrics_block_db_txn_count", "", ["block_name"]) # seconds spent waiting for db txns, excluding scheduling time, in this block -block_db_txn_duration = Counter("synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"]) +block_db_txn_duration = Counter( + "synapse_util_metrics_block_db_txn_duration_seconds", "", ["block_name"]) # seconds spent waiting for a db connection, in this block -block_db_sched_duration = Counter("synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"]) +block_db_sched_duration = Counter( + "synapse_util_metrics_block_db_sched_duration_seconds", "", ["block_name"]) def measure_func(name): -- cgit 1.5.1 From e987079037eb89a3c5c04fe64d843a6f9e6c1718 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 23 May 2018 13:03:51 -0500 Subject: fixes --- synapse/federation/transaction_queue.py | 8 ++++---- synapse/http/request_metrics.py | 14 +++++++------- synapse/push/bulk_push_rule_evaluator.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'synapse/http') diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 53442688c8..69312ec233 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -41,7 +41,7 @@ import logging logger = logging.getLogger(__name__) sent_pdus_destination_dist = Counter( - "synapse_federation_client_sent_pdu_destinations", "" + "synapse_federation_transaction_queue_sent_pdu_destinations", "" ) @@ -70,7 +70,7 @@ class TransactionQueue(object): self.pending_transactions = {} LaterGauge( - "synapse_federation_client_pending_destinations", + "synapse_federation_transaction_queue_pending_destinations", "", [], lambda: len(self.pending_transactions), @@ -97,13 +97,13 @@ class TransactionQueue(object): self.pending_edus_keyed_by_dest = edus_keyed = {} LaterGauge( - "synapse_federation_client_pending_pdus", + "synapse_federation_transaction_queue_pending_pdus", "", [], lambda: sum(map(len, pdus.values())), ) LaterGauge( - "synapse_federation_client_pending_edus", + "synapse_federation_transaction_queue_pending_edus", "", [], lambda: ( diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 7f11b5c5a4..e7df494333 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -63,46 +63,46 @@ response_db_txn_duration = Counter( # seconds spent waiting for a db connection, when processing this request response_db_sched_duration = Counter( - "synapse_http_request_response_db_sched_duration_seconds", + "synapse_http_server_response_db_sched_duration_seconds", "", ["method", "servlet", "tag"], ) # size in bytes of the response written response_size = Counter( - "synapse_http_request_response_size", "", ["method", "servlet", "tag"] + "synapse_http_server_response_size", "", ["method", "servlet", "tag"] ) # In flight metrics are incremented while the requests are in flight, rather # than when the response was written. in_flight_requests_ru_utime = Counter( - "synapse_http_request_in_flight_requests_ru_utime_seconds", + "synapse_http_server_in_flight_requests_ru_utime_seconds", "", ["method", "servlet"], ) in_flight_requests_ru_stime = Counter( - "synapse_http_request_in_flight_requests_ru_stime_seconds", + "synapse_http_server_in_flight_requests_ru_stime_seconds", "", ["method", "servlet"], ) in_flight_requests_db_txn_count = Counter( - "synapse_http_request_in_flight_requests_db_txn_count", "", ["method", "servlet"] + "synapse_http_server_in_flight_requests_db_txn_count", "", ["method", "servlet"] ) # seconds spent waiting for db txns, excluding scheduling time, when processing # this request in_flight_requests_db_txn_duration = Counter( - "synapse_http_request_in_flight_requests_db_txn_duration_seconds", + "synapse_http_server_in_flight_requests_db_txn_duration_seconds", "", ["method", "servlet"], ) # seconds spent waiting for a db connection, when processing this request in_flight_requests_db_sched_duration = Counter( - "synapse_http_request_in_flight_requests_db_sched_duration_seconds", + "synapse_http_server_in_flight_requests_db_sched_duration_seconds", "", ["method", "servlet"], ) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index b0053e7f3f..da1cde417f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -37,9 +37,9 @@ rules_by_room = {} push_rules_invalidation_counter = Counter( - "synapse_push_bulk_push_role_evaluator_push_rules_invalidation_counter", "") + "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter", "") push_rules_state_size_counter = Counter( - "synapse_push_bulk_push_role_evaluator_push_rules_state_size_counter", "") + "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter", "") # Measures whether we use the fast path of using state deltas, or if we have to # recalculate from scratch -- cgit 1.5.1 From a2eb5db4a066148da5057b8ee844c7e4f5d888bc Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 28 May 2018 19:10:27 +1000 Subject: update metrics to be in seconds --- synapse/http/request_metrics.py | 6 +++--- synapse/http/site.py | 20 ++++++++++---------- synapse/metrics/__init__.py | 19 ++++++++++--------- synapse/storage/_base.py | 30 +++++++++++++++--------------- 4 files changed, 38 insertions(+), 37 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index e7df494333..af3067b4bb 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -38,15 +38,15 @@ outgoing_responses_counter = Counter( ) response_timer = Histogram( - "synapse_http_server_response_time_seconds", "", ["method", "servlet", "tag"] + "synapse_http_server_response_time_seconds", "sec", ["method", "servlet", "tag"] ) response_ru_utime = Counter( - "synapse_http_server_response_ru_utime_seconds", "", ["method", "servlet", "tag"] + "synapse_http_server_response_ru_utime_seconds", "sec", ["method", "servlet", "tag"] ) response_ru_stime = Counter( - "synapse_http_server_response_ru_stime_seconds", "", ["method", "servlet", "tag"] + "synapse_http_server_response_ru_stime_seconds", "sec", ["method", "servlet", "tag"] ) response_db_txn_count = Counter( diff --git a/synapse/http/site.py b/synapse/http/site.py index 23c1b76922..60299657b9 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -83,7 +83,7 @@ class SynapseRequest(Request): return Request.render(self, resrc) def _started_processing(self, servlet_name): - self.start_time = int(time.time() * 1000) + self.start_time = time.time() self.request_metrics = RequestMetrics() self.request_metrics.start( self.start_time, name=servlet_name, method=self.method, @@ -102,26 +102,26 @@ class SynapseRequest(Request): context = LoggingContext.current_context() ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count - db_txn_duration_ms = context.db_txn_duration_ms - db_sched_duration_ms = context.db_sched_duration_ms + db_txn_duration_sec = context.db_txn_duration_sec + db_sched_duration_sec = context.db_sched_duration_sec except Exception: ru_utime, ru_stime = (0, 0) - db_txn_count, db_txn_duration_ms = (0, 0) + db_txn_count, db_txn_duration_sec = (0, 0) - end_time = int(time.time() * 1000) + end_time = time.time() self.site.access_logger.info( "%s - %s - {%s}" - " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" + " Processed request: %.3fsec (%.3fsec, %.3fsec) (%.3fsec/%.3fsec/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, self.authenticated_entity, end_time - self.start_time, - int(ru_utime * 1000), - int(ru_stime * 1000), - db_sched_duration_ms, - db_txn_duration_ms, + ru_utime, + ru_stime, + db_sched_duration_sec, + db_txn_duration_sec, int(db_txn_count), self.sentLength, self.code, diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index e33ed6c9be..0c557a43f6 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -122,10 +122,10 @@ REGISTRY.register(CPUMetrics()) gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"]) gc_time = Histogram( "python_gc_time", - "Time taken to GC (ms)", + "Time taken to GC (sec)", ["gen"], - buckets=[2.5, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 7500, 15000, - 30000, 45000, 60000], + buckets=[0.0025, 0.005, 0.01, 0.025, 0.05, 0.10, 0.25, 0.50, 1.00, 2.50, + 5.00, 7.50, 15.00, 30.00, 45.00, 60.00], ) @@ -147,8 +147,9 @@ REGISTRY.register(GCCounts()) tick_time = Histogram( "python_twisted_reactor_tick_time", - "Tick time of the Twisted reactor (ms)", - buckets=[1, 2, 5, 10, 50, 100, 250, 500, 1000, 2000], + "Tick time of the Twisted reactor (sec)", + buckets=[0.001, 0.002, 0.005, 0.001, 0.005, 0.01. 0.025, 0.05, 0.1, 0.2, + 0.5, 1, 2, 5], ) pending_calls_metric = Histogram( "python_twisted_reactor_pending_calls", @@ -202,9 +203,9 @@ def runUntilCurrentTimer(func): num_pending += 1 num_pending += len(reactor.threadCallQueue) - start = time.time() * 1000 + start = time.time() ret = func(*args, **kwargs) - end = time.time() * 1000 + end = time.time() # record the amount of wallclock time spent running pending calls. # This is a proxy for the actual amount of time between reactor polls, @@ -225,9 +226,9 @@ def runUntilCurrentTimer(func): if threshold[i] < counts[i]: logger.info("Collecting gc %d", i) - start = time.time() * 1000 + start = time.time() unreachable = gc.collect(i) - end = time.time() * 1000 + end = time.time() gc_time.labels(i).observe(end - start) gc_unreachable.labels(i).set(unreachable) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index d963af5c89..22d6257a9f 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -42,10 +42,10 @@ sql_logger = logging.getLogger("synapse.storage.SQL") transaction_logger = logging.getLogger("synapse.storage.txn") perf_logger = logging.getLogger("synapse.storage.TIME") -sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "") +sql_scheduling_timer = Histogram("synapse_storage_schedule_time", "sec") -sql_query_timer = Histogram("synapse_storage_query_time", "", ["verb"]) -sql_txn_timer = Histogram("synapse_storage_transaction_time", "", ["desc"]) +sql_query_timer = Histogram("synapse_storage_query_time", "sec", ["verb"]) +sql_txn_timer = Histogram("synapse_storage_transaction_time", "sec", ["desc"]) class LoggingTransaction(object): @@ -110,7 +110,7 @@ class LoggingTransaction(object): # Don't let logging failures stop SQL from working pass - start = time.time() * 1000 + start = time.time() try: return func( @@ -120,9 +120,9 @@ class LoggingTransaction(object): logger.debug("[SQL FAIL] {%s} %s", self.name, e) raise finally: - msecs = (time.time() * 1000) - start - sql_logger.debug("[SQL time] {%s} %f", self.name, msecs) - sql_query_timer.labels(sql.split()[0]).observe(msecs) + secs = time.time() - start + sql_logger.debug("[SQL time] {%s} %f sec", self.name, secs) + sql_query_timer.labels(sql.split()[0]).observe(secs) class PerformanceCounters(object): @@ -132,7 +132,7 @@ class PerformanceCounters(object): def update(self, key, start_time, end_time=None): if end_time is None: - end_time = time.time() * 1000 + end_time = time.time() duration = end_time - start_time count, cum_time = self.current_counters.get(key, (0, 0)) count += 1 @@ -222,7 +222,7 @@ class SQLBaseStore(object): def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks, logging_context, func, *args, **kwargs): - start = time.time() * 1000 + start = time.time() txn_id = self._TXN_ID # We don't really need these to be unique, so lets stop it from @@ -282,13 +282,13 @@ class SQLBaseStore(object): logger.debug("[TXN FAIL] {%s} %s", name, e) raise finally: - end = time.time() * 1000 + end = time.time() duration = end - start if logging_context is not None: logging_context.add_database_transaction(duration) - transaction_logger.debug("[TXN END] {%s} %f", name, duration) + transaction_logger.debug("[TXN END] {%s} %f sec", name, duration) self._current_txn_total_time += duration self._txn_perf_counters.update(desc, start, end) @@ -349,13 +349,13 @@ class SQLBaseStore(object): """ current_context = LoggingContext.current_context() - start_time = time.time() * 1000 + start_time = time.time() def inner_func(conn, *args, **kwargs): with LoggingContext("runWithConnection") as context: - sched_duration_ms = time.time() * 1000 - start_time - sql_scheduling_timer.observe(sched_duration_ms) - current_context.add_database_scheduled(sched_duration_ms) + sched_duration_sec = time.time() - start_time + sql_scheduling_timer.observe(sched_duration_sec) + current_context.add_database_scheduled(sched_duration_sec) if self.database_engine.is_connection_closed(conn): logger.debug("Reconnecting closed database connection") -- cgit 1.5.1 From 3ef5cd74a6bff9b33144cb834782e0402e6eb152 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 28 May 2018 19:39:27 +1000 Subject: update to more consistently use seconds in any metrics or logging --- synapse/http/request_metrics.py | 38 +++++++++++++++++++------------------- synapse/util/logcontext.py | 24 ++++++++++++------------ synapse/util/logutils.py | 6 +++--- synapse/util/metrics.py | 8 ++++---- 4 files changed, 38 insertions(+), 38 deletions(-) (limited to 'synapse/http') diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index af3067b4bb..dc06f6c443 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -139,8 +139,8 @@ LaterGauge( class RequestMetrics(object): - def start(self, time_msec, name, method): - self.start = time_msec + def start(self, time_sec, name, method): + self.start = time_sec self.start_context = LoggingContext.current_context() self.name = name self.method = method @@ -149,7 +149,7 @@ class RequestMetrics(object): _in_flight_requests.add(self) - def stop(self, time_msec, request): + def stop(self, time_sec, request): _in_flight_requests.discard(self) context = LoggingContext.current_context() @@ -170,7 +170,7 @@ class RequestMetrics(object): response_count.labels(request.method, self.name, tag).inc() response_timer.labels(request.method, self.name, tag).observe( - time_msec - self.start + time_sec - self.start ) ru_utime, ru_stime = context.get_resource_usage() @@ -181,10 +181,10 @@ class RequestMetrics(object): context.db_txn_count ) response_db_txn_duration.labels(request.method, self.name, tag).inc( - context.db_txn_duration_ms / 1000. + context.db_txn_duration_sec ) response_db_sched_duration.labels(request.method, self.name, tag).inc( - context.db_sched_duration_ms / 1000. + context.db_sched_duration_sec ) response_size.labels(request.method, self.name, tag).inc(request.sentLength) @@ -207,11 +207,11 @@ class RequestMetrics(object): ) in_flight_requests_db_txn_duration.labels(self.method, self.name).inc( - diff.db_txn_duration_ms / 1000. + diff.db_txn_duration_sec ) in_flight_requests_db_sched_duration.labels(self.method, self.name).inc( - diff.db_sched_duration_ms / 1000. + diff.db_sched_duration_sec ) @@ -223,18 +223,18 @@ class _RequestStats(object): "ru_utime", "ru_stime", "db_txn_count", - "db_txn_duration_ms", - "db_sched_duration_ms", + "db_txn_duration_sec", + "db_sched_duration_sec", ] def __init__( - self, ru_utime, ru_stime, db_txn_count, db_txn_duration_ms, db_sched_duration_ms + self, ru_utime, ru_stime, db_txn_count, db_txn_duration_sec, db_sched_duration_sec ): self.ru_utime = ru_utime self.ru_stime = ru_stime self.db_txn_count = db_txn_count - self.db_txn_duration_ms = db_txn_duration_ms - self.db_sched_duration_ms = db_sched_duration_ms + self.db_txn_duration_sec = db_txn_duration_sec + self.db_sched_duration_sec = db_sched_duration_sec @staticmethod def from_context(context): @@ -243,8 +243,8 @@ class _RequestStats(object): return _RequestStats( ru_utime, ru_stime, context.db_txn_count, - context.db_txn_duration_ms, - context.db_sched_duration_ms, + context.db_txn_duration_sec, + context.db_sched_duration_sec, ) def update(self, context): @@ -260,14 +260,14 @@ class _RequestStats(object): new.ru_utime - self.ru_utime, new.ru_stime - self.ru_stime, new.db_txn_count - self.db_txn_count, - new.db_txn_duration_ms - self.db_txn_duration_ms, - new.db_sched_duration_ms - self.db_sched_duration_ms, + new.db_txn_duration_sec - self.db_txn_duration_sec, + new.db_sched_duration_sec - self.db_sched_duration_sec, ) self.ru_utime = new.ru_utime self.ru_stime = new.ru_stime self.db_txn_count = new.db_txn_count - self.db_txn_duration_ms = new.db_txn_duration_ms - self.db_sched_duration_ms = new.db_sched_duration_ms + self.db_txn_duration_sec = new.db_txn_duration_sec + self.db_sched_duration_sec = new.db_sched_duration_sec return diff diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 914f616312..a58c723403 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -59,7 +59,7 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", "usage_start", "main_thread", "alive", "request", "tag", @@ -84,10 +84,10 @@ class LoggingContext(object): def stop(self): pass - def add_database_transaction(self, duration_ms): + def add_database_transaction(self, duration_sec): pass - def add_database_scheduled(self, sched_ms): + def add_database_scheduled(self, sched_sec): pass def __nonzero__(self): @@ -103,11 +103,11 @@ class LoggingContext(object): self.ru_utime = 0. self.db_txn_count = 0 - # ms spent waiting for db txns, excluding scheduling time - self.db_txn_duration_ms = 0 + # sec spent waiting for db txns, excluding scheduling time + self.db_txn_duration_sec = 0 - # ms spent waiting for db txns to be scheduled - self.db_sched_duration_ms = 0 + # sec spent waiting for db txns to be scheduled + self.db_sched_duration_sec = 0 # If alive has the thread resource usage when the logcontext last # became active. @@ -230,18 +230,18 @@ class LoggingContext(object): return ru_utime, ru_stime - def add_database_transaction(self, duration_ms): + def add_database_transaction(self, duration_sec): self.db_txn_count += 1 - self.db_txn_duration_ms += duration_ms + self.db_txn_duration_sec += duration_sec - def add_database_scheduled(self, sched_ms): + def add_database_scheduled(self, sched_sec): """Record a use of the database pool Args: - sched_ms (int): number of milliseconds it took us to get a + sched_sec (float): number of seconds it took us to get a connection """ - self.db_sched_duration_ms += sched_ms + self.db_sched_duration_sec += sched_sec class LoggingContextFilter(logging.Filter): diff --git a/synapse/util/logutils.py b/synapse/util/logutils.py index 3a83828d25..03249c5dc8 100644 --- a/synapse/util/logutils.py +++ b/synapse/util/logutils.py @@ -96,7 +96,7 @@ def time_function(f): id = _TIME_FUNC_ID _TIME_FUNC_ID += 1 - start = time.clock() * 1000 + start = time.clock() try: _log_debug_as_f( @@ -107,10 +107,10 @@ def time_function(f): r = f(*args, **kwargs) finally: - end = time.clock() * 1000 + end = time.clock() _log_debug_as_f( f, - "[FUNC END] {%s-%d} %f", + "[FUNC END] {%s-%d} %.3f sec", (func_name, id, end - start,), ) diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 424fdcb036..23fc0ca20d 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -74,7 +74,7 @@ class Measure(object): self.created_context = False def __enter__(self): - self.start = self.clock.time_msec() + self.start = self.clock.time() self.start_context = LoggingContext.current_context() if not self.start_context: self.start_context = LoggingContext("Measure") @@ -90,7 +90,7 @@ class Measure(object): if isinstance(exc_type, Exception) or not self.start_context: return - duration = self.clock.time_msec() - self.start + duration = self.clock.time() - self.start block_counter.labels(self.name).inc() block_timer.labels(self.name).inc(duration) @@ -114,9 +114,9 @@ class Measure(object): block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime) block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count) block_db_txn_duration.labels(self.name).inc( - (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000.) + context.db_txn_duration_sec - self.db_txn_duration_sec) block_db_sched_duration.labels(self.name).inc( - (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000.) + context.db_sched_duration_sec - self.db_sched_duration_sec) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) -- cgit 1.5.1