diff options
author | Richard van der Hoff <1389908+richvdh@users.noreply.github.com> | 2018-07-10 13:56:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-07-10 13:56:07 +0100 |
commit | 55370331da54c46c04253b009865097fe9e95191 (patch) | |
tree | a4f7c5e260460586cc3adadc8ea6470e064e7e88 /synapse/util | |
parent | another typo (diff) | |
download | synapse-55370331da54c46c04253b009865097fe9e95191.tar.xz |
Refactor logcontext resource usage tracking (#3501)
Factor out the resource usage tracking out to a separate object, which can be passed around and copied independently of the logcontext itself.
Diffstat (limited to 'synapse/util')
-rw-r--r-- | synapse/util/logcontext.py | 144 | ||||
-rw-r--r-- | synapse/util/metrics.py | 25 |
2 files changed, 120 insertions, 49 deletions
diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index fe9288b031..44a0a56818 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -49,6 +49,90 @@ except Exception: return None +class ContextResourceUsage(object): + """Object for tracking the resources used by a log context + + Attributes: + ru_utime (float): user CPU time (in seconds) + ru_stime (float): system CPU time (in seconds) + db_txn_count (int): number of database transactions done + db_sched_duration_sec (float): amount of time spent waiting for a + database connection + db_txn_duration_sec (float): amount of time spent doing database + transactions (excluding scheduling time) + evt_db_fetch_count (int): number of events requested from the database + """ + + __slots__ = [ + "ru_stime", "ru_utime", + "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", + "evt_db_fetch_count", + ] + + def __init__(self, copy_from=None): + """Create a new ContextResourceUsage + + Args: + copy_from (ContextResourceUsage|None): if not None, an object to + copy stats from + """ + if copy_from is None: + self.reset() + else: + self.ru_utime = copy_from.ru_utime + self.ru_stime = copy_from.ru_stime + self.db_txn_count = copy_from.db_txn_count + + self.db_txn_duration_sec = copy_from.db_txn_duration_sec + self.db_sched_duration_sec = copy_from.db_sched_duration_sec + self.evt_db_fetch_count = copy_from.evt_db_fetch_count + + def copy(self): + return ContextResourceUsage(copy_from=self) + + def reset(self): + self.ru_stime = 0. + self.ru_utime = 0. + self.db_txn_count = 0 + + self.db_txn_duration_sec = 0 + self.db_sched_duration_sec = 0 + self.evt_db_fetch_count = 0 + + def __iadd__(self, other): + """Add another ContextResourceUsage's stats to this one's. + + Args: + other (ContextResourceUsage): the other resource usage object + """ + self.ru_utime += other.ru_utime + self.ru_stime += other.ru_stime + self.db_txn_count += other.db_txn_count + self.db_txn_duration_sec += other.db_txn_duration_sec + self.db_sched_duration_sec += other.db_sched_duration_sec + self.evt_db_fetch_count += other.evt_db_fetch_count + return self + + def __isub__(self, other): + self.ru_utime -= other.ru_utime + self.ru_stime -= other.ru_stime + self.db_txn_count -= other.db_txn_count + self.db_txn_duration_sec -= other.db_txn_duration_sec + self.db_sched_duration_sec -= other.db_sched_duration_sec + self.evt_db_fetch_count -= other.evt_db_fetch_count + return self + + def __add__(self, other): + res = ContextResourceUsage(copy_from=self) + res += other + return res + + def __sub__(self, other): + res = ContextResourceUsage(copy_from=self) + res -= other + return res + + class LoggingContext(object): """Additional context for log formatting. Contexts are scoped within a "with" block. @@ -58,9 +142,8 @@ class LoggingContext(object): """ __slots__ = [ - "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", - "evt_db_fetch_count", + "previous_context", "name", + "_resource_usage", "usage_start", "main_thread", "alive", "request", "tag", @@ -103,18 +186,9 @@ class LoggingContext(object): def __init__(self, name=None): self.previous_context = LoggingContext.current_context() self.name = name - self.ru_stime = 0. - self.ru_utime = 0. - self.db_txn_count = 0 - # sec spent waiting for db txns, excluding scheduling time - self.db_txn_duration_sec = 0 - - # sec spent waiting for db txns to be scheduled - self.db_sched_duration_sec = 0 - - # number of events this thread has fetched from the db - self.evt_db_fetch_count = 0 + # track the resources used by this context so far + self._resource_usage = ContextResourceUsage() # If alive has the thread resource usage when the logcontext last # became active. @@ -207,39 +281,43 @@ class LoggingContext(object): logger.warning("Stopped logcontext %s on different thread", self) return - # When we stop, let's record the resource used since we started - if self.usage_start: - usage_end = get_thread_resource_usage() + # When we stop, let's record the cpu used since we started + if not self.usage_start: + logger.warning( + "Called stop on logcontext %s without calling start", self, + ) + return + + usage_end = get_thread_resource_usage() - self.ru_utime += usage_end.ru_utime - self.usage_start.ru_utime - self.ru_stime += usage_end.ru_stime - self.usage_start.ru_stime + self._resource_usage.ru_utime += usage_end.ru_utime - self.usage_start.ru_utime + self._resource_usage.ru_stime += usage_end.ru_stime - self.usage_start.ru_stime - self.usage_start = None - else: - logger.warning("Called stop on logcontext %s without calling start", self) + self.usage_start = None def get_resource_usage(self): - """Get CPU time used by this logcontext so far. + """Get resources used by this logcontext so far. Returns: - tuple[float, float]: The user and system CPU usage in seconds + ContextResourceUsage: a *copy* of the object tracking resource + usage so far """ - ru_utime = self.ru_utime - ru_stime = self.ru_stime + # we always return a copy, for consistency + res = self._resource_usage.copy() # If we are on the correct thread and we're currently running then we # can include resource usage so far. is_main_thread = threading.current_thread() is self.main_thread if self.alive and self.usage_start and is_main_thread: current = get_thread_resource_usage() - ru_utime += current.ru_utime - self.usage_start.ru_utime - ru_stime += current.ru_stime - self.usage_start.ru_stime + res.ru_utime += current.ru_utime - self.usage_start.ru_utime + res.ru_stime += current.ru_stime - self.usage_start.ru_stime - return ru_utime, ru_stime + return res def add_database_transaction(self, duration_sec): - self.db_txn_count += 1 - self.db_txn_duration_sec += duration_sec + self._resource_usage.db_txn_count += 1 + self._resource_usage.db_txn_duration_sec += duration_sec def add_database_scheduled(self, sched_sec): """Record a use of the database pool @@ -248,7 +326,7 @@ class LoggingContext(object): sched_sec (float): number of seconds it took us to get a connection """ - self.db_sched_duration_sec += sched_sec + self._resource_usage.db_sched_duration_sec += sched_sec def record_event_fetch(self, event_count): """Record a number of events being fetched from the db @@ -256,7 +334,7 @@ class LoggingContext(object): Args: event_count (int): number of events being fetched """ - self.evt_db_fetch_count += event_count + self._resource_usage.evt_db_fetch_count += event_count class LoggingContextFilter(logging.Filter): diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 63bc64c642..6ba7107896 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -60,10 +60,9 @@ def measure_func(name): class Measure(object): __slots__ = [ - "clock", "name", "start_context", "start", "new_context", "ru_utime", - "ru_stime", - "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec", + "clock", "name", "start_context", "start", "created_context", + "start_usage", ] def __init__(self, clock, name): @@ -81,10 +80,7 @@ class Measure(object): self.start_context.__enter__() self.created_context = True - self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() - self.db_txn_count = self.start_context.db_txn_count - self.db_txn_duration_sec = self.start_context.db_txn_duration_sec - self.db_sched_duration_sec = self.start_context.db_sched_duration_sec + self.start_usage = self.start_context.get_resource_usage() def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: @@ -108,15 +104,12 @@ class Measure(object): logger.warn("Expected context. (%r)", self.name) return - ru_utime, ru_stime = context.get_resource_usage() - - block_ru_utime.labels(self.name).inc(ru_utime - self.ru_utime) - block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime) - block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count) - block_db_txn_duration.labels(self.name).inc( - context.db_txn_duration_sec - self.db_txn_duration_sec) - block_db_sched_duration.labels(self.name).inc( - context.db_sched_duration_sec - self.db_sched_duration_sec) + usage = context.get_resource_usage() - self.start_usage + block_ru_utime.labels(self.name).inc(usage.ru_utime) + block_ru_stime.labels(self.name).inc(usage.ru_stime) + block_db_txn_count.labels(self.name).inc(usage.db_txn_count) + block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec) + block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb) |