summary refs log tree commit diff
path: root/synapse/util
diff options
context:
space:
mode:
authorRichard van der Hoff <1389908+richvdh@users.noreply.github.com>2018-07-10 13:56:07 +0100
committerGitHub <noreply@github.com>2018-07-10 13:56:07 +0100
commit55370331da54c46c04253b009865097fe9e95191 (patch)
treea4f7c5e260460586cc3adadc8ea6470e064e7e88 /synapse/util
parentanother typo (diff)
downloadsynapse-55370331da54c46c04253b009865097fe9e95191.tar.xz
Refactor logcontext resource usage tracking (#3501)
Factor out the resource usage tracking out to a separate object, which can be
passed around and copied independently of the logcontext itself.
Diffstat (limited to 'synapse/util')
-rw-r--r--synapse/util/logcontext.py144
-rw-r--r--synapse/util/metrics.py25
2 files changed, 120 insertions, 49 deletions
diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py
index fe9288b031..44a0a56818 100644
--- a/synapse/util/logcontext.py
+++ b/synapse/util/logcontext.py
@@ -49,6 +49,90 @@ except Exception:
         return None
 
 
+class ContextResourceUsage(object):
+    """Object for tracking the resources used by a log context
+
+    Attributes:
+        ru_utime (float): user CPU time (in seconds)
+        ru_stime (float): system CPU time (in seconds)
+        db_txn_count (int): number of database transactions done
+        db_sched_duration_sec (float): amount of time spent waiting for a
+            database connection
+        db_txn_duration_sec (float): amount of time spent doing database
+            transactions (excluding scheduling time)
+        evt_db_fetch_count (int): number of events requested from the database
+    """
+
+    __slots__ = [
+        "ru_stime", "ru_utime",
+        "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec",
+        "evt_db_fetch_count",
+    ]
+
+    def __init__(self, copy_from=None):
+        """Create a new ContextResourceUsage
+
+        Args:
+            copy_from (ContextResourceUsage|None): if not None, an object to
+                copy stats from
+        """
+        if copy_from is None:
+            self.reset()
+        else:
+            self.ru_utime = copy_from.ru_utime
+            self.ru_stime = copy_from.ru_stime
+            self.db_txn_count = copy_from.db_txn_count
+
+            self.db_txn_duration_sec = copy_from.db_txn_duration_sec
+            self.db_sched_duration_sec = copy_from.db_sched_duration_sec
+            self.evt_db_fetch_count = copy_from.evt_db_fetch_count
+
+    def copy(self):
+        return ContextResourceUsage(copy_from=self)
+
+    def reset(self):
+        self.ru_stime = 0.
+        self.ru_utime = 0.
+        self.db_txn_count = 0
+
+        self.db_txn_duration_sec = 0
+        self.db_sched_duration_sec = 0
+        self.evt_db_fetch_count = 0
+
+    def __iadd__(self, other):
+        """Add another ContextResourceUsage's stats to this one's.
+
+        Args:
+            other (ContextResourceUsage): the other resource usage object
+        """
+        self.ru_utime += other.ru_utime
+        self.ru_stime += other.ru_stime
+        self.db_txn_count += other.db_txn_count
+        self.db_txn_duration_sec += other.db_txn_duration_sec
+        self.db_sched_duration_sec += other.db_sched_duration_sec
+        self.evt_db_fetch_count += other.evt_db_fetch_count
+        return self
+
+    def __isub__(self, other):
+        self.ru_utime -= other.ru_utime
+        self.ru_stime -= other.ru_stime
+        self.db_txn_count -= other.db_txn_count
+        self.db_txn_duration_sec -= other.db_txn_duration_sec
+        self.db_sched_duration_sec -= other.db_sched_duration_sec
+        self.evt_db_fetch_count -= other.evt_db_fetch_count
+        return self
+
+    def __add__(self, other):
+        res = ContextResourceUsage(copy_from=self)
+        res += other
+        return res
+
+    def __sub__(self, other):
+        res = ContextResourceUsage(copy_from=self)
+        res -= other
+        return res
+
+
 class LoggingContext(object):
     """Additional context for log formatting. Contexts are scoped within a
     "with" block.
@@ -58,9 +142,8 @@ class LoggingContext(object):
     """
 
     __slots__ = [
-        "previous_context", "name", "ru_stime", "ru_utime",
-        "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec",
-        "evt_db_fetch_count",
+        "previous_context", "name",
+        "_resource_usage",
         "usage_start",
         "main_thread", "alive",
         "request", "tag",
@@ -103,18 +186,9 @@ class LoggingContext(object):
     def __init__(self, name=None):
         self.previous_context = LoggingContext.current_context()
         self.name = name
-        self.ru_stime = 0.
-        self.ru_utime = 0.
-        self.db_txn_count = 0
 
-        # sec spent waiting for db txns, excluding scheduling time
-        self.db_txn_duration_sec = 0
-
-        # sec spent waiting for db txns to be scheduled
-        self.db_sched_duration_sec = 0
-
-        # number of events this thread has fetched from the db
-        self.evt_db_fetch_count = 0
+        # track the resources used by this context so far
+        self._resource_usage = ContextResourceUsage()
 
         # If alive has the thread resource usage when the logcontext last
         # became active.
@@ -207,39 +281,43 @@ class LoggingContext(object):
             logger.warning("Stopped logcontext %s on different thread", self)
             return
 
-        # When we stop, let's record the resource used since we started
-        if self.usage_start:
-            usage_end = get_thread_resource_usage()
+        # When we stop, let's record the cpu used since we started
+        if not self.usage_start:
+            logger.warning(
+                "Called stop on logcontext %s without calling start", self,
+            )
+            return
+
+        usage_end = get_thread_resource_usage()
 
-            self.ru_utime += usage_end.ru_utime - self.usage_start.ru_utime
-            self.ru_stime += usage_end.ru_stime - self.usage_start.ru_stime
+        self._resource_usage.ru_utime += usage_end.ru_utime - self.usage_start.ru_utime
+        self._resource_usage.ru_stime += usage_end.ru_stime - self.usage_start.ru_stime
 
-            self.usage_start = None
-        else:
-            logger.warning("Called stop on logcontext %s without calling start", self)
+        self.usage_start = None
 
     def get_resource_usage(self):
-        """Get CPU time used by this logcontext so far.
+        """Get resources used by this logcontext so far.
 
         Returns:
-            tuple[float, float]: The user and system CPU usage in seconds
+            ContextResourceUsage: a *copy* of the object tracking resource
+                usage so far
         """
-        ru_utime = self.ru_utime
-        ru_stime = self.ru_stime
+        # we always return a copy, for consistency
+        res = self._resource_usage.copy()
 
         # If we are on the correct thread and we're currently running then we
         # can include resource usage so far.
         is_main_thread = threading.current_thread() is self.main_thread
         if self.alive and self.usage_start and is_main_thread:
             current = get_thread_resource_usage()
-            ru_utime += current.ru_utime - self.usage_start.ru_utime
-            ru_stime += current.ru_stime - self.usage_start.ru_stime
+            res.ru_utime += current.ru_utime - self.usage_start.ru_utime
+            res.ru_stime += current.ru_stime - self.usage_start.ru_stime
 
-        return ru_utime, ru_stime
+        return res
 
     def add_database_transaction(self, duration_sec):
-        self.db_txn_count += 1
-        self.db_txn_duration_sec += duration_sec
+        self._resource_usage.db_txn_count += 1
+        self._resource_usage.db_txn_duration_sec += duration_sec
 
     def add_database_scheduled(self, sched_sec):
         """Record a use of the database pool
@@ -248,7 +326,7 @@ class LoggingContext(object):
             sched_sec (float): number of seconds it took us to get a
                 connection
         """
-        self.db_sched_duration_sec += sched_sec
+        self._resource_usage.db_sched_duration_sec += sched_sec
 
     def record_event_fetch(self, event_count):
         """Record a number of events being fetched from the db
@@ -256,7 +334,7 @@ class LoggingContext(object):
         Args:
             event_count (int): number of events being fetched
         """
-        self.evt_db_fetch_count += event_count
+        self._resource_usage.evt_db_fetch_count += event_count
 
 
 class LoggingContextFilter(logging.Filter):
diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py
index 63bc64c642..6ba7107896 100644
--- a/synapse/util/metrics.py
+++ b/synapse/util/metrics.py
@@ -60,10 +60,9 @@ def measure_func(name):
 
 class Measure(object):
     __slots__ = [
-        "clock", "name", "start_context", "start", "new_context", "ru_utime",
-        "ru_stime",
-        "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec",
+        "clock", "name", "start_context", "start",
         "created_context",
+        "start_usage",
     ]
 
     def __init__(self, clock, name):
@@ -81,10 +80,7 @@ class Measure(object):
             self.start_context.__enter__()
             self.created_context = True
 
-        self.ru_utime, self.ru_stime = self.start_context.get_resource_usage()
-        self.db_txn_count = self.start_context.db_txn_count
-        self.db_txn_duration_sec = self.start_context.db_txn_duration_sec
-        self.db_sched_duration_sec = self.start_context.db_sched_duration_sec
+        self.start_usage = self.start_context.get_resource_usage()
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         if isinstance(exc_type, Exception) or not self.start_context:
@@ -108,15 +104,12 @@ class Measure(object):
             logger.warn("Expected context. (%r)", self.name)
             return
 
-        ru_utime, ru_stime = context.get_resource_usage()
-
-        block_ru_utime.labels(self.name).inc(ru_utime - self.ru_utime)
-        block_ru_stime.labels(self.name).inc(ru_stime - self.ru_stime)
-        block_db_txn_count.labels(self.name).inc(context.db_txn_count - self.db_txn_count)
-        block_db_txn_duration.labels(self.name).inc(
-            context.db_txn_duration_sec - self.db_txn_duration_sec)
-        block_db_sched_duration.labels(self.name).inc(
-            context.db_sched_duration_sec - self.db_sched_duration_sec)
+        usage = context.get_resource_usage() - self.start_usage
+        block_ru_utime.labels(self.name).inc(usage.ru_utime)
+        block_ru_stime.labels(self.name).inc(usage.ru_stime)
+        block_db_txn_count.labels(self.name).inc(usage.db_txn_count)
+        block_db_txn_duration.labels(self.name).inc(usage.db_txn_duration_sec)
+        block_db_sched_duration.labels(self.name).inc(usage.db_sched_duration_sec)
 
         if self.created_context:
             self.start_context.__exit__(exc_type, exc_val, exc_tb)