From 49af4020190eae6b0c65897d96cd2be286364d2b Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 9 Jul 2018 16:09:20 +1000 Subject: run isort --- synapse/util/__init__.py | 1 + synapse/util/async.py | 18 ++++++++++-------- synapse/util/caches/__init__.py | 6 +++--- synapse/util/caches/descriptors.py | 22 ++++++++++------------ synapse/util/caches/dictionary_cache.py | 9 +++++---- synapse/util/caches/expiringcache.py | 5 ++--- synapse/util/caches/lrucache.py | 2 +- synapse/util/caches/stream_change_cache.py | 5 ++--- synapse/util/file_consumer.py | 4 ++-- synapse/util/frozenutils.py | 6 +++--- synapse/util/httpresourcetree.py | 4 ++-- synapse/util/logcontext.py | 6 +++--- synapse/util/logformatter.py | 3 ++- synapse/util/logutils.py | 8 +++----- synapse/util/manhole.py | 6 +++--- synapse/util/metrics.py | 8 ++++---- synapse/util/msisdn.py | 1 + synapse/util/ratelimitutils.py | 13 ++++++------- synapse/util/retryutils.py | 9 ++++----- synapse/util/rlimit.py | 3 +-- synapse/util/stringutils.py | 1 + synapse/util/versionstring.py | 4 ++-- 22 files changed, 71 insertions(+), 73 deletions(-) (limited to 'synapse/util') diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index e9886ef299..680ea928c7 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -17,6 +17,7 @@ import logging from itertools import islice import attr + from twisted.internet import defer, task from synapse.util.logcontext import PreserveLoggingContext diff --git a/synapse/util/async.py b/synapse/util/async.py index 1668df4ce6..5d0fb39130 100644 --- a/synapse/util/async.py +++ b/synapse/util/async.py @@ -13,20 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging +from contextlib import contextmanager + +from six.moves import range + from twisted.internet import defer from twisted.internet.defer import CancelledError from twisted.python import failure +from synapse.util import Clock, logcontext, unwrapFirstError + from .logcontext import ( - PreserveLoggingContext, make_deferred_yieldable, run_in_background + PreserveLoggingContext, + make_deferred_yieldable, + run_in_background, ) -from synapse.util import logcontext, unwrapFirstError, Clock - -from contextlib import contextmanager - -import logging - -from six.moves import range logger = logging.getLogger(__name__) diff --git a/synapse/util/caches/__init__.py b/synapse/util/caches/__init__.py index 900575eb3c..7b065b195e 100644 --- a/synapse/util/caches/__init__.py +++ b/synapse/util/caches/__init__.py @@ -13,12 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from prometheus_client.core import Gauge, REGISTRY, GaugeMetricFamily - import os -from six.moves import intern import six +from six.moves import intern + +from prometheus_client.core import REGISTRY, Gauge, GaugeMetricFamily CACHE_SIZE_FACTOR = float(os.environ.get("SYNAPSE_CACHE_FACTOR", 0.5)) diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index 65a1042de1..f8a07df6b8 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -13,10 +13,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import functools +import inspect import logging +import threading +from collections import namedtuple +import six +from six import itervalues, string_types + +from twisted.internet import defer + +from synapse.util import logcontext, unwrapFirstError from synapse.util.async import ObservableDeferred -from synapse.util import unwrapFirstError, logcontext from synapse.util.caches import get_cache_factor_for from synapse.util.caches.lrucache import LruCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry @@ -24,17 +33,6 @@ from synapse.util.stringutils import to_ascii from . import register_cache -from twisted.internet import defer -from collections import namedtuple - -import functools -import inspect -import threading - -from six import string_types, itervalues -import six - - logger = logging.getLogger(__name__) diff --git a/synapse/util/caches/dictionary_cache.py b/synapse/util/caches/dictionary_cache.py index 95793d466d..6c0b5a4094 100644 --- a/synapse/util/caches/dictionary_cache.py +++ b/synapse/util/caches/dictionary_cache.py @@ -13,12 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.caches.lrucache import LruCache -from collections import namedtuple -from . import register_cache -import threading import logging +import threading +from collections import namedtuple + +from synapse.util.caches.lrucache import LruCache +from . import register_cache logger = logging.getLogger(__name__) diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index ff04c91955..4abca91f6d 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -13,11 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util.caches import register_cache - -from collections import OrderedDict import logging +from collections import OrderedDict +from synapse.util.caches import register_cache logger = logging.getLogger(__name__) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 1c5a982094..b684f24e7b 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -14,8 +14,8 @@ # limitations under the License. -from functools import wraps import threading +from functools import wraps from synapse.util.caches.treecache import TreeCache diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 0fb8620001..8637867c6d 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -13,12 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from synapse.util import caches - +import logging from sortedcontainers import SortedDict -import logging +from synapse.util import caches logger = logging.getLogger(__name__) diff --git a/synapse/util/file_consumer.py b/synapse/util/file_consumer.py index c78801015b..629ed44149 100644 --- a/synapse/util/file_consumer.py +++ b/synapse/util/file_consumer.py @@ -13,12 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from six.moves import queue + from twisted.internet import threads from synapse.util.logcontext import make_deferred_yieldable, run_in_background -from six.moves import queue - class BackgroundFileConsumer(object): """A consumer that writes to a file like object. Supports both push diff --git a/synapse/util/frozenutils.py b/synapse/util/frozenutils.py index 535e7d0e7a..581c6052ac 100644 --- a/synapse/util/frozenutils.py +++ b/synapse/util/frozenutils.py @@ -13,11 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from frozendict import frozendict -from canonicaljson import json - from six import string_types +from canonicaljson import json +from frozendict import frozendict + def freeze(o): if isinstance(o, dict): diff --git a/synapse/util/httpresourcetree.py b/synapse/util/httpresourcetree.py index e9f0f292ee..2d7ddc1cbe 100644 --- a/synapse/util/httpresourcetree.py +++ b/synapse/util/httpresourcetree.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.web.resource import NoResource - import logging +from twisted.web.resource import NoResource + logger = logging.getLogger(__name__) diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index df2b71b791..fe9288b031 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -22,10 +22,10 @@ them. See doc/log_contexts.rst for details on how this works. """ -from twisted.internet import defer - -import threading import logging +import threading + +from twisted.internet import defer logger = logging.getLogger(__name__) diff --git a/synapse/util/logformatter.py b/synapse/util/logformatter.py index 3e42868ea9..a46bc47ce3 100644 --- a/synapse/util/logformatter.py +++ b/synapse/util/logformatter.py @@ -14,10 +14,11 @@ # limitations under the License. -from six import StringIO import logging import traceback +from six import StringIO + class LogFormatter(logging.Formatter): """Log formatter which gives more detail for exceptions diff --git a/synapse/util/logutils.py b/synapse/util/logutils.py index 03249c5dc8..62a00189cc 100644 --- a/synapse/util/logutils.py +++ b/synapse/util/logutils.py @@ -14,13 +14,11 @@ # limitations under the License. -from inspect import getcallargs -from functools import wraps - -import logging import inspect +import logging import time - +from functools import wraps +from inspect import getcallargs _TIME_FUNC_ID = 0 diff --git a/synapse/util/manhole.py b/synapse/util/manhole.py index 97e0f00b67..14be3c7396 100644 --- a/synapse/util/manhole.py +++ b/synapse/util/manhole.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.conch.manhole import ColoredManhole -from twisted.conch.insults import insults from twisted.conch import manhole_ssh -from twisted.cred import checkers, portal +from twisted.conch.insults import insults +from twisted.conch.manhole import ColoredManhole from twisted.conch.ssh.keys import Key +from twisted.cred import checkers, portal PUBLIC_KEY = ( "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAGEArzJx8OYOnJmzf4tfBEvLi8DVPrJ3/c9k2I/Az" diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 1ba7d65c7c..63bc64c642 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -13,14 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from twisted.internet import defer +import logging +from functools import wraps from prometheus_client import Counter -from synapse.util.logcontext import LoggingContext -from functools import wraps -import logging +from twisted.internet import defer +from synapse.util.logcontext import LoggingContext logger = logging.getLogger(__name__) diff --git a/synapse/util/msisdn.py b/synapse/util/msisdn.py index 607161e7f0..a6c30e5265 100644 --- a/synapse/util/msisdn.py +++ b/synapse/util/msisdn.py @@ -14,6 +14,7 @@ # limitations under the License. import phonenumbers + from synapse.api.errors import SynapseError diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index c5a45cef7c..5ac33b2132 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -13,20 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections +import contextlib +import logging + from twisted.internet import defer from synapse.api.errors import LimitExceededError - from synapse.util.logcontext import ( - run_in_background, make_deferred_yieldable, PreserveLoggingContext, + make_deferred_yieldable, + run_in_background, ) -import collections -import contextlib -import logging - - logger = logging.getLogger(__name__) diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 4e93f69d3a..8a3a06fd74 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -12,14 +12,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import synapse.util.logcontext -from twisted.internet import defer - -from synapse.api.errors import CodeMessageException - import logging import random +from twisted.internet import defer + +import synapse.util.logcontext +from synapse.api.errors import CodeMessageException logger = logging.getLogger(__name__) diff --git a/synapse/util/rlimit.py b/synapse/util/rlimit.py index f4a9abf83f..6c0f2bb0cf 100644 --- a/synapse/util/rlimit.py +++ b/synapse/util/rlimit.py @@ -13,9 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import resource import logging - +import resource logger = logging.getLogger("synapse.app.homeserver") diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index b98b9dc6e4..43d9db67ec 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -15,6 +15,7 @@ import random import string + from six.moves import range _string_with_symbols = ( diff --git a/synapse/util/versionstring.py b/synapse/util/versionstring.py index 52086df465..1fbcd41115 100644 --- a/synapse/util/versionstring.py +++ b/synapse/util/versionstring.py @@ -14,9 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import subprocess -import os import logging +import os +import subprocess logger = logging.getLogger(__name__) -- cgit 1.5.1 From 55370331da54c46c04253b009865097fe9e95191 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 10 Jul 2018 13:56:07 +0100 Subject: Refactor logcontext resource usage tracking (#3501) Factor out the resource usage tracking out to a separate object, which can be passed around and copied independently of the logcontext itself. --- changelog.d/3501.misc | 0 synapse/http/request_metrics.py | 83 +++++------------------ synapse/http/site.py | 24 +++---- synapse/util/logcontext.py | 144 +++++++++++++++++++++++++++++++--------- synapse/util/metrics.py | 25 +++---- 5 files changed, 146 insertions(+), 130 deletions(-) create mode 100644 changelog.d/3501.misc (limited to 'synapse/util') diff --git a/changelog.d/3501.misc b/changelog.d/3501.misc new file mode 100644 index 0000000000..e69de29bb2 diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index bcaa2a9ecc..f24b4b949c 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -149,7 +149,9 @@ class RequestMetrics(object): self.name = name self.method = method - self._request_stats = _RequestStats.from_context(self.start_context) + # _request_stats records resource usage that we have already added + # to the "in flight" metrics. + self._request_stats = self.start_context.get_resource_usage() _in_flight_requests.add(self) @@ -177,18 +179,22 @@ class RequestMetrics(object): time_sec - self.start ) - ru_utime, ru_stime = context.get_resource_usage() + resource_usage = context.get_resource_usage() - response_ru_utime.labels(request.method, self.name, tag).inc(ru_utime) - response_ru_stime.labels(request.method, self.name, tag).inc(ru_stime) + response_ru_utime.labels(request.method, self.name, tag).inc( + resource_usage.ru_utime, + ) + response_ru_stime.labels(request.method, self.name, tag).inc( + resource_usage.ru_stime, + ) response_db_txn_count.labels(request.method, self.name, tag).inc( - context.db_txn_count + resource_usage.db_txn_count ) response_db_txn_duration.labels(request.method, self.name, tag).inc( - context.db_txn_duration_sec + resource_usage.db_txn_duration_sec ) response_db_sched_duration.labels(request.method, self.name, tag).inc( - context.db_sched_duration_sec + resource_usage.db_sched_duration_sec ) response_size.labels(request.method, self.name, tag).inc(request.sentLength) @@ -201,7 +207,10 @@ class RequestMetrics(object): def update_metrics(self): """Updates the in flight metrics with values from this request. """ - diff = self._request_stats.update(self.start_context) + new_stats = self.start_context.get_resource_usage() + + diff = new_stats - self._request_stats + self._request_stats = new_stats in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime) in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime) @@ -217,61 +226,3 @@ class RequestMetrics(object): in_flight_requests_db_sched_duration.labels(self.method, self.name).inc( diff.db_sched_duration_sec ) - - -class _RequestStats(object): - """Keeps tracks of various metrics for an in flight request. - """ - - __slots__ = [ - "ru_utime", - "ru_stime", - "db_txn_count", - "db_txn_duration_sec", - "db_sched_duration_sec", - ] - - def __init__( - self, ru_utime, ru_stime, db_txn_count, db_txn_duration_sec, db_sched_duration_sec - ): - self.ru_utime = ru_utime - self.ru_stime = ru_stime - self.db_txn_count = db_txn_count - self.db_txn_duration_sec = db_txn_duration_sec - self.db_sched_duration_sec = db_sched_duration_sec - - @staticmethod - def from_context(context): - ru_utime, ru_stime = context.get_resource_usage() - - return _RequestStats( - ru_utime, ru_stime, - context.db_txn_count, - context.db_txn_duration_sec, - context.db_sched_duration_sec, - ) - - def update(self, context): - """Updates the current values and returns the difference between the - old and new values. - - Returns: - _RequestStats: The difference between the old and new values - """ - new = _RequestStats.from_context(context) - - diff = _RequestStats( - new.ru_utime - self.ru_utime, - new.ru_stime - self.ru_stime, - new.db_txn_count - self.db_txn_count, - new.db_txn_duration_sec - self.db_txn_duration_sec, - new.db_sched_duration_sec - self.db_sched_duration_sec, - ) - - self.ru_utime = new.ru_utime - self.ru_stime = new.ru_stime - self.db_txn_count = new.db_txn_count - self.db_txn_duration_sec = new.db_txn_duration_sec - self.db_sched_duration_sec = new.db_sched_duration_sec - - return diff diff --git a/synapse/http/site.py b/synapse/http/site.py index fc954e343c..21e26f9c5e 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -20,7 +20,7 @@ from twisted.web.server import Request, Site from synapse.http import redact_uri from synapse.http.request_metrics import RequestMetrics -from synapse.util.logcontext import LoggingContext +from synapse.util.logcontext import LoggingContext, ContextResourceUsage logger = logging.getLogger(__name__) @@ -95,15 +95,9 @@ class SynapseRequest(Request): def _finished_processing(self): try: context = LoggingContext.current_context() - ru_utime, ru_stime = context.get_resource_usage() - db_txn_count = context.db_txn_count - db_txn_duration_sec = context.db_txn_duration_sec - db_sched_duration_sec = context.db_sched_duration_sec - evt_db_fetch_count = context.evt_db_fetch_count + usage = context.get_resource_usage() except Exception: - ru_utime, ru_stime = (0, 0) - db_txn_count, db_txn_duration_sec = (0, 0) - evt_db_fetch_count = 0 + usage = ContextResourceUsage() end_time = time.time() @@ -130,18 +124,18 @@ class SynapseRequest(Request): self.site.site_tag, authenticated_entity, end_time - self.start_time, - ru_utime, - ru_stime, - db_sched_duration_sec, - db_txn_duration_sec, - int(db_txn_count), + usage.ru_utime, + usage.ru_stime, + usage.db_sched_duration_sec, + usage.db_txn_duration_sec, + int(usage.db_txn_count), self.sentLength, self.code, self.method, self.get_redacted_uri(), self.clientproto, user_agent, - evt_db_fetch_count, + usage.evt_db_fetch_count, ) try: diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index fe9288b031..44a0a56818 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -49,6 +49,90 @@ except Exception: return None +class ContextResourceUsage(object): + """Object for tracking the resources used by a log context + + Attributes: + ru_utime (float): user CPU time (in seconds) + ru_stime (float): system CPU time (in seconds) + db_txn_count (int): number of database transactions done + db_sched_duration_sec (float): amount of time spent waiting for a + database connection + db_txn_duration_sec (float): amount of time spent doing database + transactions (excluding scheduling time) + evt_db_fetch_count (int): number of events requested from the database + """ + + __slots__ = [ + "ru_stime", "ru_utime", + "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", + "evt_db_fetch_count", + ] + + def __init__(self, copy_from=None): + """Create a new ContextResourceUsage + + Args: + copy_from (ContextResourceUsage|None): if not None, an object to + copy stats from + """ + if copy_from is None: + self.reset() + else: + self.ru_utime = copy_from.ru_utime + self.ru_stime = copy_from.ru_stime + self.db_txn_count = copy_from.db_txn_count + + self.db_txn_duration_sec = copy_from.db_txn_duration_sec + self.db_sched_duration_sec = copy_from.db_sched_duration_sec + self.evt_db_fetch_count = copy_from.evt_db_fetch_count + + def copy(self): + return ContextResourceUsage(copy_from=self) + + def reset(self): + self.ru_stime = 0. + self.ru_utime = 0. + self.db_txn_count = 0 + + self.db_txn_duration_sec = 0 + self.db_sched_duration_sec = 0 + self.evt_db_fetch_count = 0 + + def __iadd__(self, other): + """Add another ContextResourceUsage's stats to this one's. + + Args: + other (ContextResourceUsage): the other resource usage object + """ + self.ru_utime += other.ru_utime + self.ru_stime += other.ru_stime + self.db_txn_count += other.db_txn_count + self.db_txn_duration_sec += other.db_txn_duration_sec + self.db_sched_duration_sec += other.db_sched_duration_sec + self.evt_db_fetch_count += other.evt_db_fetch_count + return self + + def __isub__(self, other): + self.ru_utime -= other.ru_utime + self.ru_stime -= other.ru_stime + self.db_txn_count -= other.db_txn_count + self.db_txn_duration_sec -= other.db_txn_duration_sec + self.db_sched_duration_sec -= other.db_sched_duration_sec + self.evt_db_fetch_count -= other.evt_db_fetch_count + return self + + def __add__(self, other): + res = ContextResourceUsage(copy_from=self) + res += other + return res + + def __sub__(self, other): + res = ContextResourceUsage(copy_from=self) + res -= other + return res + + class LoggingContext(object): """Additional context for log formatting. Contexts are scoped within a "with" block. @@ -58,9 +142,8 @@ class LoggingContext(object): """ __slots__ = [ - "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", - "evt_db_fetch_count", + "previous_context", "name", + "_resource_usage", "usage_start", "main_thread", "alive", "request", "tag", @@ -103,18 +186,9 @@ class LoggingContext(object): def __init__(self, name=None): self.previous_context = LoggingContext.current_context() self.name = name - self.ru_stime = 0. - self.ru_utime = 0. - self.db_txn_count = 0 - # sec spent waiting for db txns, excluding scheduling time - self.db_txn_duration_sec = 0 - - # sec spent waiting for db txns to be scheduled - self.db_sched_duration_sec = 0 - - # number of events this thread has fetched from the db - self.evt_db_fetch_count = 0 + # track the resources used by this context so far + self._resource_usage = ContextResourceUsage() # If alive has the thread resource usage when the logcontext last # became active. @@ -207,39 +281,43 @@ class LoggingContext(object): logger.warning("Stopped logcontext %s on different thread", self) return - # When we stop, let's record the resource used since we started - if self.usage_start: - usage_end = get_thread_resource_usage() + # When we stop, let's record the cpu used since we started + if not self.usage_start: + logger.warning( + "Called stop on logcontext %s without calling start", self, + ) + return + + usage_end = get_thread_resource_usage() - self.ru_utime += usage_end.ru_utime - self.usage_start.ru_utime - self.ru_stime += usage_end.ru_stime - self.usage_start.ru_stime + self._resource_usage.ru_utime += usage_end.ru_utime - self.usage_start.ru_utime + self._resource_usage.ru_stime += usage_end.ru_stime - self.usage_start.ru_stime - self.usage_start = None - else: - logger.warning("Called stop on logcontext %s without calling start", self) + self.usage_start = None def get_resource_usage(self): - """Get CPU time used by this logcontext so far. + """Get resources used by this logcontext so far. Returns: - tuple[float, float]: The user and system CPU usage in seconds + ContextResourceUsage: a *copy* of the object tracking resource + usage so far """ - ru_utime = self.ru_utime - ru_stime = self.ru_stime + # we always return a copy, for consistency + res = self._resource_usage.copy() # If we are on the correct thread and we're currently running then we # can include resource usage so far. is_main_thread = threading.current_thread() is self.main_thread if self.alive and self.usage_start and is_main_thread: current = get_thread_resource_usage() - ru_utime += current.ru_utime - self.usage_start.ru_utime - ru_stime += current.ru_stime - self.usage_start.ru_stime + res.ru_utime += current.ru_utime - self.usage_start.ru_utime + res.ru_stime += current.ru_stime - self.usage_start.ru_stime - return ru_utime, ru_stime + return res def add_database_transaction(self, duration_sec): - self.db_txn_count += 1 - self.db_txn_duration_sec += duration_sec + self._resource_usage.db_txn_count += 1 + self._resource_usage.db_txn_duration_sec += duration_sec def add_database_scheduled(self, sched_sec): """Record a use of the database pool @@ -248,7 +326,7 @@ class LoggingContext(object): sched_sec (float): number of seconds it took us to get a connection """ - self.db_sched_duration_sec += sched_sec + self._resource_usage.db_sched_duration_sec += sched_sec def record_event_fetch(self, event_count): """Record a number of events being fetched from the db @@ -256,7 +334,7 @@ class LoggingContext(object): Args: event_count (int): number of events being fetched """ - self.evt_db_fetch_count += event_count + self._resource_usage.evt_db_fetch_count += event_count class LoggingContextFilter(logging.Filter): diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 63bc64c642..6ba7107896 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -60,10 +60,9 @@ def measure_func(name): class Measure(object): __slots__ = [ - "clock", "name", "start_context", "start", "new_context", "ru_utime", - "ru_stime", - "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", + "clock", "name", "start_context", "start", "created_context", + "start_usage", ] def __init__(self, clock, name): @@ -81,10 +80,7 @@ class Measure(object): self.start_context.__enter__() self.created_context = True - self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() - self.db_txn_count = self.start_context.db_txn_count - self.db_txn_duration_sec = self.start_context.db_txn_duration_sec - self.db_sched_duration_sec = self.start_context.db_sched_duration_sec + self.start_usage = self.start_context.get_resource_usage() def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: @@ -108,15 +104,12 @@ class Measure(object): logger.warn("Expected context. (%r)", self.name) return - ru_utime, ru_stime = context.get_resource_usage() - - block_ru_utime.labels(self.name).inc(ru_utime - self.ru_utime) - block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime) - block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count) - block_db_txn_duration.labels(self.name).inc( - context.db_txn_duration_sec - self.db_txn_duration_sec) - block_db_sched_duration.labels(self.name).inc( - context.db_sched_duration_sec - self.db_sched_duration_sec) + usage = context.get_resource_usage() - self.start_usage + block_ru_utime.labels(self.name).inc(usage.ru_utime) + block_ru_stime.labels(self.name).inc(usage.ru_stime) + block_db_txn_count.labels(self.name).inc(usage.db_txn_count) + block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec) + block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) -- cgit 1.5.1 From c3c29aa19625324ac9a7a8ebcdd58a9c0b457f74 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 10 Jul 2018 16:12:36 +0100 Subject: Attempt to include db threads in cpu usage stats (#3496) Let's try to include time spent in the DB threads in the per-request/block cpu usage metrics. --- changelog.d/3496.feature | 1 + synapse/storage/_base.py | 32 +++++++++++++++----------------- synapse/storage/events_worker.py | 3 ++- synapse/util/logcontext.py | 23 +++++++++++++++++++++-- 4 files changed, 39 insertions(+), 20 deletions(-) create mode 100644 changelog.d/3496.feature (limited to 'synapse/util') diff --git a/changelog.d/3496.feature b/changelog.d/3496.feature new file mode 100644 index 0000000000..6a06a7e755 --- /dev/null +++ b/changelog.d/3496.feature @@ -0,0 +1 @@ +Include CPU time from database threads in request/block metrics. diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 1fd5d8f162..98dde77431 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -220,7 +220,7 @@ class SQLBaseStore(object): self._clock.looping_call(loop, 10000) def _new_transaction(self, conn, desc, after_callbacks, exception_callbacks, - logging_context, func, *args, **kwargs): + func, *args, **kwargs): start = time.time() txn_id = self._TXN_ID @@ -284,8 +284,7 @@ class SQLBaseStore(object): end = time.time() duration = end - start - if logging_context is not None: - logging_context.add_database_transaction(duration) + LoggingContext.current_context().add_database_transaction(duration) transaction_logger.debug("[TXN END] {%s} %f sec", name, duration) @@ -309,19 +308,15 @@ class SQLBaseStore(object): Returns: Deferred: The result of func """ - current_context = LoggingContext.current_context() - after_callbacks = [] exception_callbacks = [] - def inner_func(conn, *args, **kwargs): - return self._new_transaction( - conn, desc, after_callbacks, exception_callbacks, current_context, - func, *args, **kwargs - ) - try: - result = yield self.runWithConnection(inner_func, *args, **kwargs) + result = yield self.runWithConnection( + self._new_transaction, + desc, after_callbacks, exception_callbacks, func, + *args, **kwargs + ) for after_callback, after_args, after_kwargs in after_callbacks: after_callback(*after_args, **after_kwargs) @@ -346,22 +341,25 @@ class SQLBaseStore(object): Returns: Deferred: The result of func """ - current_context = LoggingContext.current_context() + parent_context = LoggingContext.current_context() + if parent_context == LoggingContext.sentinel: + logger.warn( + "Running db txn from sentinel context: metrics will be lost", + ) + parent_context = None start_time = time.time() def inner_func(conn, *args, **kwargs): - with LoggingContext("runWithConnection") as context: + with LoggingContext("runWithConnection", parent_context) as context: sched_duration_sec = time.time() - start_time sql_scheduling_timer.observe(sched_duration_sec) - current_context.add_database_scheduled(sched_duration_sec) + context.add_database_scheduled(sched_duration_sec) if self.database_engine.is_connection_closed(conn): logger.debug("Reconnecting closed database connection") conn.reconnect() - current_context.copy_to(context) - return func(conn, *args, **kwargs) with PreserveLoggingContext(): diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index fa2659403d..67433606c6 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -261,7 +261,8 @@ class EventsWorkerStore(SQLBaseStore): ] rows = self._new_transaction( - conn, "do_fetch", [], [], None, self._fetch_event_rows, event_ids + conn, "do_fetch", [], [], + self._fetch_event_rows, event_ids, ) row_dict = { diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index 44a0a56818..f6c7175f74 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -137,12 +137,18 @@ class LoggingContext(object): """Additional context for log formatting. Contexts are scoped within a "with" block. + If a parent is given when creating a new context, then: + - logging fields are copied from the parent to the new context on entry + - when the new context exits, the cpu usage stats are copied from the + child to the parent + Args: name (str): Name for the context for debugging. + parent_context (LoggingContext|None): The parent of the new context """ __slots__ = [ - "previous_context", "name", + "previous_context", "name", "parent_context", "_resource_usage", "usage_start", "main_thread", "alive", @@ -183,7 +189,7 @@ class LoggingContext(object): sentinel = Sentinel() - def __init__(self, name=None): + def __init__(self, name=None, parent_context=None): self.previous_context = LoggingContext.current_context() self.name = name @@ -199,6 +205,8 @@ class LoggingContext(object): self.tag = "" self.alive = True + self.parent_context = parent_context + def __str__(self): return "%s@%x" % (self.name, id(self)) @@ -236,6 +244,10 @@ class LoggingContext(object): self.previous_context, old_context ) self.alive = True + + if self.parent_context is not None: + self.parent_context.copy_to(self) + return self def __exit__(self, type, value, traceback): @@ -257,6 +269,13 @@ class LoggingContext(object): self.previous_context = None self.alive = False + # if we have a parent, pass our CPU usage stats on + if self.parent_context is not None: + self.parent_context._resource_usage += self._resource_usage + + # reset them in case we get entered again + self._resource_usage.reset() + def copy_to(self, record): """Copy logging fields from this context to a log record or another LoggingContext -- cgit 1.5.1 From fa5c2bc082185580e7d65a8759a8b5213933a137 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 12 Jul 2018 11:37:44 +0100 Subject: Reduce set building in get_entities_changed This line shows up as about 5% of cpu time on a synchrotron: not_known_entities = set(entities) - set(self._entity_to_key) Presumably the problem here is that _entity_to_key can be largeish, and building a set for its keys every time this function is called is slow. Here we rewrite the logic to avoid building so many sets. --- synapse/util/caches/stream_change_cache.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'synapse/util') diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 8637867c6d..a1f8ff8f10 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -74,14 +74,18 @@ class StreamChangeCache(object): assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - not_known_entities = set(entities) - set(self._entity_to_key) - - result = ( - {self._cache[k] for k in self._cache.islice( - start=self._cache.bisect_right(stream_pos))} - .intersection(entities) - .union(not_known_entities) - ) + changed_entities = { + self._cache[k] for k in self._cache.islice( + start=self._cache.bisect_right(stream_pos), + ) + } + + # we need to include entities which we don't know about, as well as + # those which are known to have changed since the stream pos. + result = { + e for e in entities + if e in changed_entities or e not in self._entity_to_key + } self.metrics.inc_hits() else: -- cgit 1.5.1 From 33b40d0a2590a6d875548c27408c2d35d3269f33 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 13 Jul 2018 16:19:40 +0100 Subject: Make FederationRateLimiter queue requests properly popitem removes the *most recent* item by default [1]. We want the oldest. Fixes #3524 [1]: https://docs.python.org/2/library/collections.html#collections.OrderedDict.popitem --- synapse/util/ratelimitutils.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'synapse/util') diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 5ac33b2132..7deb38f2a7 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -92,13 +92,22 @@ class _PerHostRatelimiter(object): self.window_size = window_size self.sleep_limit = sleep_limit - self.sleep_msec = sleep_msec + self.sleep_sec = sleep_msec / 1000.0 self.reject_limit = reject_limit self.concurrent_requests = concurrent_requests + # request_id objects for requests which have been slept self.sleeping_requests = set() + + # map from request_id object to Deferred for requests which are ready + # for processing but have been queued self.ready_request_queue = collections.OrderedDict() + + # request id objects for requests which are in progress self.current_processing = set() + + # times at which we have recently (within the last window_size ms) + # received requests. self.request_times = [] @contextlib.contextmanager @@ -117,11 +126,15 @@ class _PerHostRatelimiter(object): def _on_enter(self, request_id): time_now = self.clock.time_msec() + + # remove any entries from request_times which aren't within the window self.request_times[:] = [ r for r in self.request_times if time_now - r < self.window_size ] + # reject the request if we already have too many queued up (either + # sleeping or in the ready queue). queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) if queue_size > self.reject_limit: raise LimitExceededError( @@ -134,9 +147,13 @@ class _PerHostRatelimiter(object): def queue_request(): if len(self.current_processing) > self.concurrent_requests: - logger.debug("Ratelimit [%s]: Queue req", id(request_id)) queue_defer = defer.Deferred() self.ready_request_queue[request_id] = queue_defer + logger.info( + "Ratelimiter: queueing request (queue now %i items)", + len(self.ready_request_queue), + ) + return queue_defer else: return defer.succeed(None) @@ -148,10 +165,9 @@ class _PerHostRatelimiter(object): if len(self.request_times) > self.sleep_limit: logger.debug( - "Ratelimit [%s]: sleeping req", - id(request_id), + "Ratelimiter: sleeping request for %f sec", self.sleep_sec, ) - ret_defer = run_in_background(self.clock.sleep, self.sleep_msec / 1000.0) + ret_defer = run_in_background(self.clock.sleep, self.sleep_sec) self.sleeping_requests.add(request_id) @@ -200,11 +216,8 @@ class _PerHostRatelimiter(object): ) self.current_processing.discard(request_id) try: - request_id, deferred = self.ready_request_queue.popitem() - - # XXX: why do we do the following? the on_start callback above will - # do it for us. - self.current_processing.add(request_id) + # start processing the next item on the queue. + _, deferred = self.ready_request_queue.popitem(last=False) with PreserveLoggingContext(): deferred.callback(None) -- cgit 1.5.1