diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py
index 990216145e..8dcae50b39 100644
--- a/synapse/util/logcontext.py
+++ b/synapse/util/logcontext.py
@@ -22,10 +22,10 @@ them.
See doc/log_contexts.rst for details on how this works.
"""
-from twisted.internet import defer
-
-import threading
import logging
+import threading
+
+from twisted.internet import defer
logger = logging.getLogger(__name__)
@@ -42,23 +42,128 @@ try:
def get_thread_resource_usage():
return resource.getrusage(RUSAGE_THREAD)
-except:
+except Exception:
# If the system doesn't support resource.getrusage(RUSAGE_THREAD) then we
# won't track resource usage by returning None.
def get_thread_resource_usage():
return None
+class ContextResourceUsage(object):
+ """Object for tracking the resources used by a log context
+
+ Attributes:
+ ru_utime (float): user CPU time (in seconds)
+ ru_stime (float): system CPU time (in seconds)
+ db_txn_count (int): number of database transactions done
+ db_sched_duration_sec (float): amount of time spent waiting for a
+ database connection
+ db_txn_duration_sec (float): amount of time spent doing database
+ transactions (excluding scheduling time)
+ evt_db_fetch_count (int): number of events requested from the database
+ """
+
+ __slots__ = [
+ "ru_stime", "ru_utime",
+ "db_txn_count", "db_txn_duration_sec", "db_sched_duration_sec",
+ "evt_db_fetch_count",
+ ]
+
+ def __init__(self, copy_from=None):
+ """Create a new ContextResourceUsage
+
+ Args:
+ copy_from (ContextResourceUsage|None): if not None, an object to
+ copy stats from
+ """
+ if copy_from is None:
+ self.reset()
+ else:
+ self.ru_utime = copy_from.ru_utime
+ self.ru_stime = copy_from.ru_stime
+ self.db_txn_count = copy_from.db_txn_count
+
+ self.db_txn_duration_sec = copy_from.db_txn_duration_sec
+ self.db_sched_duration_sec = copy_from.db_sched_duration_sec
+ self.evt_db_fetch_count = copy_from.evt_db_fetch_count
+
+ def copy(self):
+ return ContextResourceUsage(copy_from=self)
+
+ def reset(self):
+ self.ru_stime = 0.
+ self.ru_utime = 0.
+ self.db_txn_count = 0
+
+ self.db_txn_duration_sec = 0
+ self.db_sched_duration_sec = 0
+ self.evt_db_fetch_count = 0
+
+ def __repr__(self):
+ return ("<ContextResourceUsage ru_stime='%r', ru_utime='%r', "
+ "db_txn_count='%r', db_txn_duration_sec='%r', "
+ "db_sched_duration_sec='%r', evt_db_fetch_count='%r'>") % (
+ self.ru_stime,
+ self.ru_utime,
+ self.db_txn_count,
+ self.db_txn_duration_sec,
+ self.db_sched_duration_sec,
+ self.evt_db_fetch_count,)
+
+ def __iadd__(self, other):
+ """Add another ContextResourceUsage's stats to this one's.
+
+ Args:
+ other (ContextResourceUsage): the other resource usage object
+ """
+ self.ru_utime += other.ru_utime
+ self.ru_stime += other.ru_stime
+ self.db_txn_count += other.db_txn_count
+ self.db_txn_duration_sec += other.db_txn_duration_sec
+ self.db_sched_duration_sec += other.db_sched_duration_sec
+ self.evt_db_fetch_count += other.evt_db_fetch_count
+ return self
+
+ def __isub__(self, other):
+ self.ru_utime -= other.ru_utime
+ self.ru_stime -= other.ru_stime
+ self.db_txn_count -= other.db_txn_count
+ self.db_txn_duration_sec -= other.db_txn_duration_sec
+ self.db_sched_duration_sec -= other.db_sched_duration_sec
+ self.evt_db_fetch_count -= other.evt_db_fetch_count
+ return self
+
+ def __add__(self, other):
+ res = ContextResourceUsage(copy_from=self)
+ res += other
+ return res
+
+ def __sub__(self, other):
+ res = ContextResourceUsage(copy_from=self)
+ res -= other
+ return res
+
+
class LoggingContext(object):
"""Additional context for log formatting. Contexts are scoped within a
"with" block.
+
+ If a parent is given when creating a new context, then:
+ - logging fields are copied from the parent to the new context on entry
+ - when the new context exits, the cpu usage stats are copied from the
+ child to the parent
+
Args:
name (str): Name for the context for debugging.
+ parent_context (LoggingContext|None): The parent of the new context
"""
__slots__ = [
- "previous_context", "name", "usage_start", "usage_end", "main_thread",
- "__dict__", "tag", "alive",
+ "previous_context", "name", "parent_context",
+ "_resource_usage",
+ "usage_start",
+ "main_thread", "alive",
+ "request", "tag",
]
thread_local = threading.local()
@@ -80,32 +185,49 @@ class LoggingContext(object):
def stop(self):
pass
- def add_database_transaction(self, duration_ms):
+ def add_database_transaction(self, duration_sec):
+ pass
+
+ def add_database_scheduled(self, sched_sec):
+ pass
+
+ def record_event_fetch(self, event_count):
pass
def __nonzero__(self):
return False
+ __bool__ = __nonzero__ # python3
sentinel = Sentinel()
- def __init__(self, name=None):
+ def __init__(self, name=None, parent_context=None):
self.previous_context = LoggingContext.current_context()
self.name = name
- self.ru_stime = 0.
- self.ru_utime = 0.
- self.db_txn_count = 0
- self.db_txn_duration = 0.
+
+ # track the resources used by this context so far
+ self._resource_usage = ContextResourceUsage()
+
+ # If alive has the thread resource usage when the logcontext last
+ # became active.
self.usage_start = None
+
self.main_thread = threading.current_thread()
+ self.request = None
self.tag = ""
self.alive = True
+ self.parent_context = parent_context
+
def __str__(self):
return "%s@%x" % (self.name, id(self))
@classmethod
def current_context(cls):
- """Get the current logging context from thread local storage"""
+ """Get the current logging context from thread local storage
+
+ Returns:
+ LoggingContext: the current logging context
+ """
return getattr(cls.thread_local, "current_context", cls.sentinel)
@classmethod
@@ -133,18 +255,22 @@ class LoggingContext(object):
self.previous_context, old_context
)
self.alive = True
+
+ if self.parent_context is not None:
+ self.parent_context.copy_to(self)
+
return self
def __exit__(self, type, value, traceback):
"""Restore the logging context in thread local storage to the state it
was before this context was entered.
Returns:
- None to avoid suppressing any exeptions that were thrown.
+ None to avoid suppressing any exceptions that were thrown.
"""
current = self.set_current_context(self.previous_context)
if current is not self:
if current is self.sentinel:
- logger.debug("Expected logging context %s has been lost", self)
+ logger.warn("Expected logging context %s has been lost", self)
else:
logger.warn(
"Current logging context %s is not expected context %s",
@@ -154,47 +280,91 @@ class LoggingContext(object):
self.previous_context = None
self.alive = False
+ # if we have a parent, pass our CPU usage stats on
+ if self.parent_context is not None:
+ self.parent_context._resource_usage += self._resource_usage
+
+ # reset them in case we get entered again
+ self._resource_usage.reset()
+
def copy_to(self, record):
- """Copy fields from this context to the record"""
- for key, value in self.__dict__.items():
- setattr(record, key, value)
+ """Copy logging fields from this context to a log record or
+ another LoggingContext
+ """
- record.ru_utime, record.ru_stime = self.get_resource_usage()
+ # 'request' is the only field we currently use in the logger, so that's
+ # all we need to copy
+ record.request = self.request
def start(self):
if threading.current_thread() is not self.main_thread:
+ logger.warning("Started logcontext %s on different thread", self)
return
- if self.usage_start and self.usage_end:
- self.ru_utime += self.usage_end.ru_utime - self.usage_start.ru_utime
- self.ru_stime += self.usage_end.ru_stime - self.usage_start.ru_stime
- self.usage_start = None
- self.usage_end = None
-
+ # If we haven't already started record the thread resource usage so
+ # far
if not self.usage_start:
self.usage_start = get_thread_resource_usage()
def stop(self):
if threading.current_thread() is not self.main_thread:
+ logger.warning("Stopped logcontext %s on different thread", self)
+ return
+
+ # When we stop, let's record the cpu used since we started
+ if not self.usage_start:
+ logger.warning(
+ "Called stop on logcontext %s without calling start", self,
+ )
return
- if self.usage_start:
- self.usage_end = get_thread_resource_usage()
+ usage_end = get_thread_resource_usage()
+
+ self._resource_usage.ru_utime += usage_end.ru_utime - self.usage_start.ru_utime
+ self._resource_usage.ru_stime += usage_end.ru_stime - self.usage_start.ru_stime
+
+ self.usage_start = None
def get_resource_usage(self):
- ru_utime = self.ru_utime
- ru_stime = self.ru_stime
+ """Get resources used by this logcontext so far.
- if self.usage_start and threading.current_thread() is self.main_thread:
+ Returns:
+ ContextResourceUsage: a *copy* of the object tracking resource
+ usage so far
+ """
+ # we always return a copy, for consistency
+ res = self._resource_usage.copy()
+
+ # If we are on the correct thread and we're currently running then we
+ # can include resource usage so far.
+ is_main_thread = threading.current_thread() is self.main_thread
+ if self.alive and self.usage_start and is_main_thread:
current = get_thread_resource_usage()
- ru_utime += current.ru_utime - self.usage_start.ru_utime
- ru_stime += current.ru_stime - self.usage_start.ru_stime
+ res.ru_utime += current.ru_utime - self.usage_start.ru_utime
+ res.ru_stime += current.ru_stime - self.usage_start.ru_stime
- return ru_utime, ru_stime
+ return res
- def add_database_transaction(self, duration_ms):
- self.db_txn_count += 1
- self.db_txn_duration += duration_ms / 1000.
+ def add_database_transaction(self, duration_sec):
+ self._resource_usage.db_txn_count += 1
+ self._resource_usage.db_txn_duration_sec += duration_sec
+
+ def add_database_scheduled(self, sched_sec):
+ """Record a use of the database pool
+
+ Args:
+ sched_sec (float): number of seconds it took us to get a
+ connection
+ """
+ self._resource_usage.db_sched_duration_sec += sched_sec
+
+ def record_event_fetch(self, event_count):
+ """Record a number of events being fetched from the db
+
+ Args:
+ event_count (int): number of events being fetched
+ """
+ self._resource_usage.evt_db_fetch_count += event_count
class LoggingContextFilter(logging.Filter):
@@ -248,7 +418,7 @@ class PreserveLoggingContext(object):
context = LoggingContext.set_current_context(self.current_context)
if context != self.new_context:
- logger.debug(
+ logger.warn(
"Unexpected logging context: %s is not %s",
context, self.new_context,
)
@@ -261,105 +431,62 @@ class PreserveLoggingContext(object):
)
-class _PreservingContextDeferred(defer.Deferred):
- """A deferred that ensures that all callbacks and errbacks are called with
- the given logging context.
- """
- def __init__(self, context):
- self._log_context = context
- defer.Deferred.__init__(self)
-
- def addCallbacks(self, callback, errback=None,
- callbackArgs=None, callbackKeywords=None,
- errbackArgs=None, errbackKeywords=None):
- callback = self._wrap_callback(callback)
- errback = self._wrap_callback(errback)
- return defer.Deferred.addCallbacks(
- self, callback,
- errback=errback,
- callbackArgs=callbackArgs,
- callbackKeywords=callbackKeywords,
- errbackArgs=errbackArgs,
- errbackKeywords=errbackKeywords,
- )
+def preserve_fn(f):
+ """Function decorator which wraps the function with run_in_background"""
+ def g(*args, **kwargs):
+ return run_in_background(f, *args, **kwargs)
+ return g
- def _wrap_callback(self, f):
- def g(res, *args, **kwargs):
- with PreserveLoggingContext(self._log_context):
- res = f(res, *args, **kwargs)
- return res
- return g
+def run_in_background(f, *args, **kwargs):
+ """Calls a function, ensuring that the current context is restored after
+ return from the function, and that the sentinel context is set once the
+ deferred returned by the function completes.
-def preserve_context_over_fn(fn, *args, **kwargs):
- """Takes a function and invokes it with the given arguments, but removes
- and restores the current logging context while doing so.
+ Useful for wrapping functions that return a deferred which you don't yield
+ on (for instance because you want to pass it to deferred.gatherResults()).
- If the result is a deferred, call preserve_context_over_deferred before
- returning it.
+ Note that if you completely discard the result, you should make sure that
+ `f` doesn't raise any deferred exceptions, otherwise a scary-looking
+ CRITICAL error about an unhandled error will be logged without much
+ indication about where it came from.
"""
- with PreserveLoggingContext():
- res = fn(*args, **kwargs)
+ current = LoggingContext.current_context()
+ try:
+ res = f(*args, **kwargs)
+ except: # noqa: E722
+ # the assumption here is that the caller doesn't want to be disturbed
+ # by synchronous exceptions, so let's turn them into Failures.
+ return defer.fail()
- if isinstance(res, defer.Deferred):
- return preserve_context_over_deferred(res)
- else:
+ if not isinstance(res, defer.Deferred):
return res
-
-def preserve_context_over_deferred(deferred, context=None):
- """Given a deferred wrap it such that any callbacks added later to it will
- be invoked with the current context.
-
- Deprecated: this almost certainly doesn't do want you want, ie make
- the deferred follow the synapse logcontext rules: try
- ``make_deferred_yieldable`` instead.
- """
- if context is None:
- context = LoggingContext.current_context()
- d = _PreservingContextDeferred(context)
- deferred.chainDeferred(d)
- return d
-
-
-def preserve_fn(f):
- """Wraps a function, to ensure that the current context is restored after
- return from the function, and that the sentinel context is set once the
- deferred returned by the funtion completes.
-
- Useful for wrapping functions that return a deferred which you don't yield
- on.
- """
- def reset_context(result):
- LoggingContext.set_current_context(LoggingContext.sentinel)
- return result
-
- def g(*args, **kwargs):
- current = LoggingContext.current_context()
- res = f(*args, **kwargs)
- if isinstance(res, defer.Deferred) and not res.called:
- # The function will have reset the context before returning, so
- # we need to restore it now.
- LoggingContext.set_current_context(current)
-
- # The original context will be restored when the deferred
- # completes, but there is nothing waiting for it, so it will
- # get leaked into the reactor or some other function which
- # wasn't expecting it. We therefore need to reset the context
- # here.
- #
- # (If this feels asymmetric, consider it this way: we are
- # effectively forking a new thread of execution. We are
- # probably currently within a ``with LoggingContext()`` block,
- # which is supposed to have a single entry and exit point. But
- # by spawning off another deferred, we are effectively
- # adding a new exit point.)
- res.addBoth(reset_context)
+ if res.called and not res.paused:
+ # The function should have maintained the logcontext, so we can
+ # optimise out the messing about
return res
- return g
+
+ # The function may have reset the context before returning, so
+ # we need to restore it now.
+ ctx = LoggingContext.set_current_context(current)
+
+ # The original context will be restored when the deferred
+ # completes, but there is nothing waiting for it, so it will
+ # get leaked into the reactor or some other function which
+ # wasn't expecting it. We therefore need to reset the context
+ # here.
+ #
+ # (If this feels asymmetric, consider it this way: we are
+ # effectively forking a new thread of execution. We are
+ # probably currently within a ``with LoggingContext()`` block,
+ # which is supposed to have a single entry and exit point. But
+ # by spawning off another deferred, we are effectively
+ # adding a new exit point.)
+ res.addBoth(_set_context_cb, ctx)
+ return res
-@defer.inlineCallbacks
def make_deferred_yieldable(deferred):
"""Given a deferred, make it follow the Synapse logcontext rules:
@@ -371,11 +498,27 @@ def make_deferred_yieldable(deferred):
returning a deferred. Then, when the deferred completes, restores the
current logcontext before running callbacks/errbacks.
- (This is more-or-less the opposite operation to preserve_fn.)
+ (This is more-or-less the opposite operation to run_in_background.)
"""
- with PreserveLoggingContext():
- r = yield deferred
- defer.returnValue(r)
+ if not isinstance(deferred, defer.Deferred):
+ return deferred
+
+ if deferred.called and not deferred.paused:
+ # it looks like this deferred is ready to run any callbacks we give it
+ # immediately. We may as well optimise out the logcontext faffery.
+ return deferred
+
+ # ok, we can't be sure that a yield won't block, so let's reset the
+ # logcontext, and add a callback to the deferred to restore it.
+ prev_context = LoggingContext.set_current_context(LoggingContext.sentinel)
+ deferred.addBoth(_set_context_cb, prev_context)
+ return deferred
+
+
+def _set_context_cb(result, context):
+ """A callback function which just sets the logging context"""
+ LoggingContext.set_current_context(context)
+ return result
# modules to ignore in `logcontext_tracer`
|