diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/synapse/logging/__init__.py
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
new file mode 100644
index 0000000000..b456c31f70
--- /dev/null
+++ b/synapse/logging/context.py
@@ -0,0 +1,697 @@
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" Thread-local-alike tracking of log contexts within synapse
+
+This module provides objects and utilities for tracking contexts through
+synapse code, so that log lines can include a request identifier, and so that
+CPU and database activity can be accounted for against the request that caused
+them.
+
+See doc/log_contexts.rst for details on how this works.
+"""
+
+import logging
+import threading
+import types
+
+from twisted.internet import defer, threads
+
+logger = logging.getLogger(__name__)
+
+try:
+ import resource
+
+ # Python doesn't ship with a definition of RUSAGE_THREAD but it's defined
+ # to be 1 on linux so we hard code it.
+ RUSAGE_THREAD = 1
+
+ # If the system doesn't support RUSAGE_THREAD then this should throw an
+ # exception.
+ resource.getrusage(RUSAGE_THREAD)
+
+ def get_thread_resource_usage():
+ return resource.getrusage(RUSAGE_THREAD)
+
+
+except Exception:
+ # If the system doesn't support resource.getrusage(RUSAGE_THREAD) then we
+ # won't track resource usage by returning None.
+ def get_thread_resource_usage():
+ return None
+
+
+# get an id for the current thread.
+#
+# threading.get_ident doesn't actually return an OS-level tid, and annoyingly,
+# on Linux it actually returns the same value either side of a fork() call. However
+# we only fork in one place, so it's not worth the hoop-jumping to get a real tid.
+#
+get_thread_id = threading.get_ident
+
+
+class ContextResourceUsage(object):
+ """Object for tracking the resources used by a log context
+
+ Attributes:
+ ru_utime (float): user CPU time (in seconds)
+ ru_stime (float): system CPU time (in seconds)
+ db_txn_count (int): number of database transactions done
+ db_sched_duration_sec (float): amount of time spent waiting for a
+ database connection
+ db_txn_duration_sec (float): amount of time spent doing database
+ transactions (excluding scheduling time)
+ evt_db_fetch_count (int): number of events requested from the database
+ """
+
+ __slots__ = [
+ "ru_stime",
+ "ru_utime",
+ "db_txn_count",
+ "db_txn_duration_sec",
+ "db_sched_duration_sec",
+ "evt_db_fetch_count",
+ ]
+
+ def __init__(self, copy_from=None):
+ """Create a new ContextResourceUsage
+
+ Args:
+ copy_from (ContextResourceUsage|None): if not None, an object to
+ copy stats from
+ """
+ if copy_from is None:
+ self.reset()
+ else:
+ self.ru_utime = copy_from.ru_utime
+ self.ru_stime = copy_from.ru_stime
+ self.db_txn_count = copy_from.db_txn_count
+
+ self.db_txn_duration_sec = copy_from.db_txn_duration_sec
+ self.db_sched_duration_sec = copy_from.db_sched_duration_sec
+ self.evt_db_fetch_count = copy_from.evt_db_fetch_count
+
+ def copy(self):
+ return ContextResourceUsage(copy_from=self)
+
+ def reset(self):
+ self.ru_stime = 0.0
+ self.ru_utime = 0.0
+ self.db_txn_count = 0
+
+ self.db_txn_duration_sec = 0
+ self.db_sched_duration_sec = 0
+ self.evt_db_fetch_count = 0
+
+ def __repr__(self):
+ return (
+ "<ContextResourceUsage ru_stime='%r', ru_utime='%r', "
+ "db_txn_count='%r', db_txn_duration_sec='%r', "
+ "db_sched_duration_sec='%r', evt_db_fetch_count='%r'>"
+ ) % (
+ self.ru_stime,
+ self.ru_utime,
+ self.db_txn_count,
+ self.db_txn_duration_sec,
+ self.db_sched_duration_sec,
+ self.evt_db_fetch_count,
+ )
+
+ def __iadd__(self, other):
+ """Add another ContextResourceUsage's stats to this one's.
+
+ Args:
+ other (ContextResourceUsage): the other resource usage object
+ """
+ self.ru_utime += other.ru_utime
+ self.ru_stime += other.ru_stime
+ self.db_txn_count += other.db_txn_count
+ self.db_txn_duration_sec += other.db_txn_duration_sec
+ self.db_sched_duration_sec += other.db_sched_duration_sec
+ self.evt_db_fetch_count += other.evt_db_fetch_count
+ return self
+
+ def __isub__(self, other):
+ self.ru_utime -= other.ru_utime
+ self.ru_stime -= other.ru_stime
+ self.db_txn_count -= other.db_txn_count
+ self.db_txn_duration_sec -= other.db_txn_duration_sec
+ self.db_sched_duration_sec -= other.db_sched_duration_sec
+ self.evt_db_fetch_count -= other.evt_db_fetch_count
+ return self
+
+ def __add__(self, other):
+ res = ContextResourceUsage(copy_from=self)
+ res += other
+ return res
+
+ def __sub__(self, other):
+ res = ContextResourceUsage(copy_from=self)
+ res -= other
+ return res
+
+
+class LoggingContext(object):
+ """Additional context for log formatting. Contexts are scoped within a
+ "with" block.
+
+ If a parent is given when creating a new context, then:
+ - logging fields are copied from the parent to the new context on entry
+ - when the new context exits, the cpu usage stats are copied from the
+ child to the parent
+
+ Args:
+ name (str): Name for the context for debugging.
+ parent_context (LoggingContext|None): The parent of the new context
+ """
+
+ __slots__ = [
+ "previous_context",
+ "name",
+ "parent_context",
+ "_resource_usage",
+ "usage_start",
+ "main_thread",
+ "alive",
+ "request",
+ "tag",
+ "scope",
+ ]
+
+ thread_local = threading.local()
+
+ class Sentinel(object):
+ """Sentinel to represent the root context"""
+
+ __slots__ = []
+
+ def __str__(self):
+ return "sentinel"
+
+ def copy_to(self, record):
+ pass
+
+ def start(self):
+ pass
+
+ def stop(self):
+ pass
+
+ def add_database_transaction(self, duration_sec):
+ pass
+
+ def add_database_scheduled(self, sched_sec):
+ pass
+
+ def record_event_fetch(self, event_count):
+ pass
+
+ def __nonzero__(self):
+ return False
+
+ __bool__ = __nonzero__ # python3
+
+ sentinel = Sentinel()
+
+ def __init__(self, name=None, parent_context=None, request=None):
+ self.previous_context = LoggingContext.current_context()
+ self.name = name
+
+ # track the resources used by this context so far
+ self._resource_usage = ContextResourceUsage()
+
+ # If alive has the thread resource usage when the logcontext last
+ # became active.
+ self.usage_start = None
+
+ self.main_thread = get_thread_id()
+ self.request = None
+ self.tag = ""
+ self.alive = True
+ self.scope = None
+
+ self.parent_context = parent_context
+
+ if self.parent_context is not None:
+ self.parent_context.copy_to(self)
+
+ if request is not None:
+ # the request param overrides the request from the parent context
+ self.request = request
+
+ def __str__(self):
+ if self.request:
+ return str(self.request)
+ return "%s@%x" % (self.name, id(self))
+
+ @classmethod
+ def current_context(cls):
+ """Get the current logging context from thread local storage
+
+ Returns:
+ LoggingContext: the current logging context
+ """
+ return getattr(cls.thread_local, "current_context", cls.sentinel)
+
+ @classmethod
+ def set_current_context(cls, context):
+ """Set the current logging context in thread local storage
+ Args:
+ context(LoggingContext): The context to activate.
+ Returns:
+ The context that was previously active
+ """
+ current = cls.current_context()
+
+ if current is not context:
+ current.stop()
+ cls.thread_local.current_context = context
+ context.start()
+ return current
+
+ def __enter__(self):
+ """Enters this logging context into thread local storage"""
+ old_context = self.set_current_context(self)
+ if self.previous_context != old_context:
+ logger.warn(
+ "Expected previous context %r, found %r",
+ self.previous_context,
+ old_context,
+ )
+ self.alive = True
+
+ return self
+
+ def __exit__(self, type, value, traceback):
+ """Restore the logging context in thread local storage to the state it
+ was before this context was entered.
+ Returns:
+ None to avoid suppressing any exceptions that were thrown.
+ """
+ current = self.set_current_context(self.previous_context)
+ if current is not self:
+ if current is self.sentinel:
+ logger.warning("Expected logging context %s was lost", self)
+ else:
+ logger.warning(
+ "Expected logging context %s but found %s", self, current
+ )
+ self.previous_context = None
+ self.alive = False
+
+ # if we have a parent, pass our CPU usage stats on
+ if self.parent_context is not None and hasattr(
+ self.parent_context, "_resource_usage"
+ ):
+ self.parent_context._resource_usage += self._resource_usage
+
+ # reset them in case we get entered again
+ self._resource_usage.reset()
+
+ def copy_to(self, record):
+ """Copy logging fields from this context to a log record or
+ another LoggingContext
+ """
+
+ # we track the current request
+ record.request = self.request
+
+ # we also track the current scope:
+ record.scope = self.scope
+
+ def start(self):
+ if get_thread_id() != self.main_thread:
+ logger.warning("Started logcontext %s on different thread", self)
+ return
+
+ # If we haven't already started record the thread resource usage so
+ # far
+ if not self.usage_start:
+ self.usage_start = get_thread_resource_usage()
+
+ def stop(self):
+ if get_thread_id() != self.main_thread:
+ logger.warning("Stopped logcontext %s on different thread", self)
+ return
+
+ # When we stop, let's record the cpu used since we started
+ if not self.usage_start:
+ logger.warning("Called stop on logcontext %s without calling start", self)
+ return
+
+ utime_delta, stime_delta = self._get_cputime()
+ self._resource_usage.ru_utime += utime_delta
+ self._resource_usage.ru_stime += stime_delta
+
+ self.usage_start = None
+
+ def get_resource_usage(self):
+ """Get resources used by this logcontext so far.
+
+ Returns:
+ ContextResourceUsage: a *copy* of the object tracking resource
+ usage so far
+ """
+ # we always return a copy, for consistency
+ res = self._resource_usage.copy()
+
+ # If we are on the correct thread and we're currently running then we
+ # can include resource usage so far.
+ is_main_thread = get_thread_id() == self.main_thread
+ if self.alive and self.usage_start and is_main_thread:
+ utime_delta, stime_delta = self._get_cputime()
+ res.ru_utime += utime_delta
+ res.ru_stime += stime_delta
+
+ return res
+
+ def _get_cputime(self):
+ """Get the cpu usage time so far
+
+ Returns: Tuple[float, float]: seconds in user mode, seconds in system mode
+ """
+ current = get_thread_resource_usage()
+
+ utime_delta = current.ru_utime - self.usage_start.ru_utime
+ stime_delta = current.ru_stime - self.usage_start.ru_stime
+
+ # sanity check
+ if utime_delta < 0:
+ logger.error(
+ "utime went backwards! %f < %f",
+ current.ru_utime,
+ self.usage_start.ru_utime,
+ )
+ utime_delta = 0
+
+ if stime_delta < 0:
+ logger.error(
+ "stime went backwards! %f < %f",
+ current.ru_stime,
+ self.usage_start.ru_stime,
+ )
+ stime_delta = 0
+
+ return utime_delta, stime_delta
+
+ def add_database_transaction(self, duration_sec):
+ if duration_sec < 0:
+ raise ValueError("DB txn time can only be non-negative")
+ self._resource_usage.db_txn_count += 1
+ self._resource_usage.db_txn_duration_sec += duration_sec
+
+ def add_database_scheduled(self, sched_sec):
+ """Record a use of the database pool
+
+ Args:
+ sched_sec (float): number of seconds it took us to get a
+ connection
+ """
+ if sched_sec < 0:
+ raise ValueError("DB scheduling time can only be non-negative")
+ self._resource_usage.db_sched_duration_sec += sched_sec
+
+ def record_event_fetch(self, event_count):
+ """Record a number of events being fetched from the db
+
+ Args:
+ event_count (int): number of events being fetched
+ """
+ self._resource_usage.evt_db_fetch_count += event_count
+
+
+class LoggingContextFilter(logging.Filter):
+ """Logging filter that adds values from the current logging context to each
+ record.
+ Args:
+ **defaults: Default values to avoid formatters complaining about
+ missing fields
+ """
+
+ def __init__(self, **defaults):
+ self.defaults = defaults
+
+ def filter(self, record):
+ """Add each fields from the logging contexts to the record.
+ Returns:
+ True to include the record in the log output.
+ """
+ context = LoggingContext.current_context()
+ for key, value in self.defaults.items():
+ setattr(record, key, value)
+
+ # context should never be None, but if it somehow ends up being, then
+ # we end up in a death spiral of infinite loops, so let's check, for
+ # robustness' sake.
+ if context is not None:
+ context.copy_to(record)
+
+ return True
+
+
+class PreserveLoggingContext(object):
+ """Captures the current logging context and restores it when the scope is
+ exited. Used to restore the context after a function using
+ @defer.inlineCallbacks is resumed by a callback from the reactor."""
+
+ __slots__ = ["current_context", "new_context", "has_parent"]
+
+ def __init__(self, new_context=None):
+ if new_context is None:
+ new_context = LoggingContext.sentinel
+ self.new_context = new_context
+
+ def __enter__(self):
+ """Captures the current logging context"""
+ self.current_context = LoggingContext.set_current_context(self.new_context)
+
+ if self.current_context:
+ self.has_parent = self.current_context.previous_context is not None
+ if not self.current_context.alive:
+ logger.debug("Entering dead context: %s", self.current_context)
+
+ def __exit__(self, type, value, traceback):
+ """Restores the current logging context"""
+ context = LoggingContext.set_current_context(self.current_context)
+
+ if context != self.new_context:
+ if context is LoggingContext.sentinel:
+ logger.warning("Expected logging context %s was lost", self.new_context)
+ else:
+ logger.warning(
+ "Expected logging context %s but found %s",
+ self.new_context,
+ context,
+ )
+
+ if self.current_context is not LoggingContext.sentinel:
+ if not self.current_context.alive:
+ logger.debug("Restoring dead context: %s", self.current_context)
+
+
+def nested_logging_context(suffix, parent_context=None):
+ """Creates a new logging context as a child of another.
+
+ The nested logging context will have a 'request' made up of the parent context's
+ request, plus the given suffix.
+
+ CPU/db usage stats will be added to the parent context's on exit.
+
+ Normal usage looks like:
+
+ with nested_logging_context(suffix):
+ # ... do stuff
+
+ Args:
+ suffix (str): suffix to add to the parent context's 'request'.
+ parent_context (LoggingContext|None): parent context. Will use the current context
+ if None.
+
+ Returns:
+ LoggingContext: new logging context.
+ """
+ if parent_context is None:
+ parent_context = LoggingContext.current_context()
+ return LoggingContext(
+ parent_context=parent_context, request=parent_context.request + "-" + suffix
+ )
+
+
+def preserve_fn(f):
+ """Function decorator which wraps the function with run_in_background"""
+
+ def g(*args, **kwargs):
+ return run_in_background(f, *args, **kwargs)
+
+ return g
+
+
+def run_in_background(f, *args, **kwargs):
+ """Calls a function, ensuring that the current context is restored after
+ return from the function, and that the sentinel context is set once the
+ deferred returned by the function completes.
+
+ Useful for wrapping functions that return a deferred or coroutine, which you don't
+ yield or await on (for instance because you want to pass it to
+ deferred.gatherResults()).
+
+ Note that if you completely discard the result, you should make sure that
+ `f` doesn't raise any deferred exceptions, otherwise a scary-looking
+ CRITICAL error about an unhandled error will be logged without much
+ indication about where it came from.
+ """
+ current = LoggingContext.current_context()
+ try:
+ res = f(*args, **kwargs)
+ except: # noqa: E722
+ # the assumption here is that the caller doesn't want to be disturbed
+ # by synchronous exceptions, so let's turn them into Failures.
+ return defer.fail()
+
+ if isinstance(res, types.CoroutineType):
+ res = defer.ensureDeferred(res)
+
+ if not isinstance(res, defer.Deferred):
+ return res
+
+ if res.called and not res.paused:
+ # The function should have maintained the logcontext, so we can
+ # optimise out the messing about
+ return res
+
+ # The function may have reset the context before returning, so
+ # we need to restore it now.
+ ctx = LoggingContext.set_current_context(current)
+
+ # The original context will be restored when the deferred
+ # completes, but there is nothing waiting for it, so it will
+ # get leaked into the reactor or some other function which
+ # wasn't expecting it. We therefore need to reset the context
+ # here.
+ #
+ # (If this feels asymmetric, consider it this way: we are
+ # effectively forking a new thread of execution. We are
+ # probably currently within a ``with LoggingContext()`` block,
+ # which is supposed to have a single entry and exit point. But
+ # by spawning off another deferred, we are effectively
+ # adding a new exit point.)
+ res.addBoth(_set_context_cb, ctx)
+ return res
+
+
+def make_deferred_yieldable(deferred):
+ """Given a deferred, make it follow the Synapse logcontext rules:
+
+ If the deferred has completed (or is not actually a Deferred), essentially
+ does nothing (just returns another completed deferred with the
+ result/failure).
+
+ If the deferred has not yet completed, resets the logcontext before
+ returning a deferred. Then, when the deferred completes, restores the
+ current logcontext before running callbacks/errbacks.
+
+ (This is more-or-less the opposite operation to run_in_background.)
+ """
+ if not isinstance(deferred, defer.Deferred):
+ return deferred
+
+ if deferred.called and not deferred.paused:
+ # it looks like this deferred is ready to run any callbacks we give it
+ # immediately. We may as well optimise out the logcontext faffery.
+ return deferred
+
+ # ok, we can't be sure that a yield won't block, so let's reset the
+ # logcontext, and add a callback to the deferred to restore it.
+ prev_context = LoggingContext.set_current_context(LoggingContext.sentinel)
+ deferred.addBoth(_set_context_cb, prev_context)
+ return deferred
+
+
+def _set_context_cb(result, context):
+ """A callback function which just sets the logging context"""
+ LoggingContext.set_current_context(context)
+ return result
+
+
+def defer_to_thread(reactor, f, *args, **kwargs):
+ """
+ Calls the function `f` using a thread from the reactor's default threadpool and
+ returns the result as a Deferred.
+
+ Creates a new logcontext for `f`, which is created as a child of the current
+ logcontext (so its CPU usage metrics will get attributed to the current
+ logcontext). `f` should preserve the logcontext it is given.
+
+ The result deferred follows the Synapse logcontext rules: you should `yield`
+ on it.
+
+ Args:
+ reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
+ the Deferred will be invoked, and whose threadpool we should use for the
+ function.
+
+ Normally this will be hs.get_reactor().
+
+ f (callable): The function to call.
+
+ args: positional arguments to pass to f.
+
+ kwargs: keyword arguments to pass to f.
+
+ Returns:
+ Deferred: A Deferred which fires a callback with the result of `f`, or an
+ errback if `f` throws an exception.
+ """
+ return defer_to_threadpool(reactor, reactor.getThreadPool(), f, *args, **kwargs)
+
+
+def defer_to_threadpool(reactor, threadpool, f, *args, **kwargs):
+ """
+ A wrapper for twisted.internet.threads.deferToThreadpool, which handles
+ logcontexts correctly.
+
+ Calls the function `f` using a thread from the given threadpool and returns
+ the result as a Deferred.
+
+ Creates a new logcontext for `f`, which is created as a child of the current
+ logcontext (so its CPU usage metrics will get attributed to the current
+ logcontext). `f` should preserve the logcontext it is given.
+
+ The result deferred follows the Synapse logcontext rules: you should `yield`
+ on it.
+
+ Args:
+ reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
+ the Deferred will be invoked. Normally this will be hs.get_reactor().
+
+ threadpool (twisted.python.threadpool.ThreadPool): The threadpool to use for
+ running `f`. Normally this will be hs.get_reactor().getThreadPool().
+
+ f (callable): The function to call.
+
+ args: positional arguments to pass to f.
+
+ kwargs: keyword arguments to pass to f.
+
+ Returns:
+ Deferred: A Deferred which fires a callback with the result of `f`, or an
+ errback if `f` throws an exception.
+ """
+ logcontext = LoggingContext.current_context()
+
+ def g():
+ with LoggingContext(parent_context=logcontext):
+ return f(*args, **kwargs)
+
+ return make_deferred_yieldable(threads.deferToThreadPool(reactor, threadpool, g))
diff --git a/synapse/logging/formatter.py b/synapse/logging/formatter.py
new file mode 100644
index 0000000000..fbf570c756
--- /dev/null
+++ b/synapse/logging/formatter.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import logging
+import traceback
+
+from six import StringIO
+
+
+class LogFormatter(logging.Formatter):
+ """Log formatter which gives more detail for exceptions
+
+ This is the same as the standard log formatter, except that when logging
+ exceptions [typically via log.foo("msg", exc_info=1)], it prints the
+ sequence that led up to the point at which the exception was caught.
+ (Normally only stack frames between the point the exception was raised and
+ where it was caught are logged).
+ """
+
+ def __init__(self, *args, **kwargs):
+ super(LogFormatter, self).__init__(*args, **kwargs)
+
+ def formatException(self, ei):
+ sio = StringIO()
+ (typ, val, tb) = ei
+
+ # log the stack above the exception capture point if possible, but
+ # check that we actually have an f_back attribute to work around
+ # https://twistedmatrix.com/trac/ticket/9305
+
+ if tb and hasattr(tb.tb_frame, "f_back"):
+ sio.write("Capture point (most recent call last):\n")
+ traceback.print_stack(tb.tb_frame.f_back, None, sio)
+
+ traceback.print_exception(typ, val, tb, None, sio)
+ s = sio.getvalue()
+ sio.close()
+ if s[-1:] == "\n":
+ s = s[:-1]
+ return s
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
new file mode 100644
index 0000000000..f0ceea2a64
--- /dev/null
+++ b/synapse/logging/opentracing.py
@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.d
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.import opentracing
+
+
+# NOTE
+# This is a small wrapper around opentracing because opentracing is not currently
+# packaged downstream (specifically debian). Since opentracing instrumentation is
+# fairly invasive it was awkward to make it optional. As a result we opted to encapsulate
+# all opentracing state in these methods which effectively noop if opentracing is
+# not present. We should strongly consider encouraging the downstream distributers
+# to package opentracing and making opentracing a full dependency. In order to facilitate
+# this move the methods have work very similarly to opentracing's and it should only
+# be a matter of few regexes to move over to opentracing's access patterns proper.
+
+try:
+ import opentracing
+except ImportError:
+ opentracing = None
+try:
+ from jaeger_client import Config as JaegerConfig
+ from synapse.logging.scopecontextmanager import LogContextScopeManager
+except ImportError:
+ JaegerConfig = None
+ LogContextScopeManager = None
+
+import contextlib
+import logging
+import re
+from functools import wraps
+
+from twisted.internet import defer
+
+logger = logging.getLogger(__name__)
+
+
+class _DumTagNames(object):
+ """wrapper of opentracings tags. We need to have them if we
+ want to reference them without opentracing around. Clearly they
+ should never actually show up in a trace. `set_tags` overwrites
+ these with the correct ones."""
+
+ INVALID_TAG = "invalid-tag"
+ COMPONENT = INVALID_TAG
+ DATABASE_INSTANCE = INVALID_TAG
+ DATABASE_STATEMENT = INVALID_TAG
+ DATABASE_TYPE = INVALID_TAG
+ DATABASE_USER = INVALID_TAG
+ ERROR = INVALID_TAG
+ HTTP_METHOD = INVALID_TAG
+ HTTP_STATUS_CODE = INVALID_TAG
+ HTTP_URL = INVALID_TAG
+ MESSAGE_BUS_DESTINATION = INVALID_TAG
+ PEER_ADDRESS = INVALID_TAG
+ PEER_HOSTNAME = INVALID_TAG
+ PEER_HOST_IPV4 = INVALID_TAG
+ PEER_HOST_IPV6 = INVALID_TAG
+ PEER_PORT = INVALID_TAG
+ PEER_SERVICE = INVALID_TAG
+ SAMPLING_PRIORITY = INVALID_TAG
+ SERVICE = INVALID_TAG
+ SPAN_KIND = INVALID_TAG
+ SPAN_KIND_CONSUMER = INVALID_TAG
+ SPAN_KIND_PRODUCER = INVALID_TAG
+ SPAN_KIND_RPC_CLIENT = INVALID_TAG
+ SPAN_KIND_RPC_SERVER = INVALID_TAG
+
+
+def only_if_tracing(func):
+ """Executes the function only if we're tracing. Otherwise return.
+ Assumes the function wrapped may return None"""
+
+ @wraps(func)
+ def _only_if_tracing_inner(*args, **kwargs):
+ if opentracing:
+ return func(*args, **kwargs)
+ else:
+ return
+
+ return _only_if_tracing_inner
+
+
+# Block everything by default
+_homeserver_whitelist = None
+
+tags = _DumTagNames
+
+
+def init_tracer(config):
+ """Set the whitelists and initialise the JaegerClient tracer
+
+ Args:
+ config (Config)
+ The config used by the homeserver. Here it's used to set the service
+ name to the homeserver's.
+ """
+ global opentracing
+ if not config.tracer_config.get("tracer_enabled", False):
+ # We don't have a tracer
+ opentracing = None
+ return
+
+ if not opentracing:
+ logger.error(
+ "The server has been configure to use opentracing but opentracing is not installed."
+ )
+ raise ModuleNotFoundError("opentracing")
+
+ if not JaegerConfig:
+ logger.error(
+ "The server has been configure to use opentracing but opentracing is not installed."
+ )
+
+ # Include the worker name
+ name = config.worker_name if config.worker_name else "master"
+
+ set_homeserver_whitelist(config.tracer_config["homeserver_whitelist"])
+ jaeger_config = JaegerConfig(
+ config={"sampler": {"type": "const", "param": 1}, "logging": True},
+ service_name="{} {}".format(config.server_name, name),
+ scope_manager=LogContextScopeManager(config),
+ )
+ jaeger_config.initialize_tracer()
+
+ # Set up tags to be opentracing's tags
+ global tags
+ tags = opentracing.tags
+
+
+@contextlib.contextmanager
+def _noop_context_manager(*args, **kwargs):
+ """Does absolutely nothing really well. Can be entered and exited arbitrarily.
+ Good substitute for an opentracing scope."""
+ yield
+
+
+# Could use kwargs but I want these to be explicit
+def start_active_span(
+ operation_name,
+ child_of=None,
+ references=None,
+ tags=None,
+ start_time=None,
+ ignore_active_span=False,
+ finish_on_close=True,
+):
+ """Starts an active opentracing span. Note, the scope doesn't become active
+ until it has been entered, however, the span starts from the time this
+ message is called.
+ Args:
+ See opentracing.tracer
+ Returns:
+ scope (Scope) or noop_context_manager
+ """
+ if opentracing is None:
+ return _noop_context_manager()
+ else:
+ # We need to enter the scope here for the logcontext to become active
+ return opentracing.tracer.start_active_span(
+ operation_name,
+ child_of=child_of,
+ references=references,
+ tags=tags,
+ start_time=start_time,
+ ignore_active_span=ignore_active_span,
+ finish_on_close=finish_on_close,
+ )
+
+
+@only_if_tracing
+def close_active_span():
+ """Closes the active span. This will close it's logcontext if the context
+ was made for the span"""
+ opentracing.tracer.scope_manager.active.__exit__(None, None, None)
+
+
+@only_if_tracing
+def set_tag(key, value):
+ """Set's a tag on the active span"""
+ opentracing.tracer.active_span.set_tag(key, value)
+
+
+@only_if_tracing
+def log_kv(key_values, timestamp=None):
+ """Log to the active span"""
+ opentracing.tracer.active_span.log_kv(key_values, timestamp)
+
+
+# Note: we don't have a get baggage items because we're trying to hide all
+# scope and span state from synapse. I think this method may also be useless
+# as a result
+@only_if_tracing
+def set_baggage_item(key, value):
+ """Attach baggage to the active span"""
+ opentracing.tracer.active_span.set_baggage_item(key, value)
+
+
+@only_if_tracing
+def set_operation_name(operation_name):
+ """Sets the operation name of the active span"""
+ opentracing.tracer.active_span.set_operation_name(operation_name)
+
+
+@only_if_tracing
+def set_homeserver_whitelist(homeserver_whitelist):
+ """Sets the whitelist
+
+ Args:
+ homeserver_whitelist (iterable of strings): regex of whitelisted homeservers
+ """
+ global _homeserver_whitelist
+ if homeserver_whitelist:
+ # Makes a single regex which accepts all passed in regexes in the list
+ _homeserver_whitelist = re.compile(
+ "({})".format(")|(".join(homeserver_whitelist))
+ )
+
+
+@only_if_tracing
+def whitelisted_homeserver(destination):
+ """Checks if a destination matches the whitelist
+ Args:
+ destination (String)"""
+ global _homeserver_whitelist
+ if _homeserver_whitelist:
+ return _homeserver_whitelist.match(destination)
+ return False
+
+
+def start_active_span_from_context(
+ headers,
+ operation_name,
+ references=None,
+ tags=None,
+ start_time=None,
+ ignore_active_span=False,
+ finish_on_close=True,
+):
+ """
+ Extracts a span context from Twisted Headers.
+ args:
+ headers (twisted.web.http_headers.Headers)
+ returns:
+ span_context (opentracing.span.SpanContext)
+ """
+ # Twisted encodes the values as lists whereas opentracing doesn't.
+ # So, we take the first item in the list.
+ # Also, twisted uses byte arrays while opentracing expects strings.
+ if opentracing is None:
+ return _noop_context_manager()
+
+ header_dict = {k.decode(): v[0].decode() for k, v in headers.getAllRawHeaders()}
+ context = opentracing.tracer.extract(opentracing.Format.HTTP_HEADERS, header_dict)
+
+ return opentracing.tracer.start_active_span(
+ operation_name,
+ child_of=context,
+ references=references,
+ tags=tags,
+ start_time=start_time,
+ ignore_active_span=ignore_active_span,
+ finish_on_close=finish_on_close,
+ )
+
+
+@only_if_tracing
+def inject_active_span_twisted_headers(headers, destination):
+ """
+ Injects a span context into twisted headers inplace
+
+ Args:
+ headers (twisted.web.http_headers.Headers)
+ span (opentracing.Span)
+
+ Returns:
+ Inplace modification of headers
+
+ Note:
+ The headers set by the tracer are custom to the tracer implementation which
+ should be unique enough that they don't interfere with any headers set by
+ synapse or twisted. If we're still using jaeger these headers would be those
+ here:
+ https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py
+ """
+
+ if not whitelisted_homeserver(destination):
+ return
+
+ span = opentracing.tracer.active_span
+ carrier = {}
+ opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
+
+ for key, value in carrier.items():
+ headers.addRawHeaders(key, value)
+
+
+@only_if_tracing
+def inject_active_span_byte_dict(headers, destination):
+ """
+ Injects a span context into a dict where the headers are encoded as byte
+ strings
+
+ Args:
+ headers (dict)
+ span (opentracing.Span)
+
+ Returns:
+ Inplace modification of headers
+
+ Note:
+ The headers set by the tracer are custom to the tracer implementation which
+ should be unique enough that they don't interfere with any headers set by
+ synapse or twisted. If we're still using jaeger these headers would be those
+ here:
+ https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py
+ """
+ if not whitelisted_homeserver(destination):
+ return
+
+ span = opentracing.tracer.active_span
+
+ carrier = {}
+ opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
+
+ for key, value in carrier.items():
+ headers[key.encode()] = [value.encode()]
+
+
+def trace_servlet(servlet_name, func):
+ """Decorator which traces a serlet. It starts a span with some servlet specific
+ tags such as the servlet_name and request information"""
+
+ @wraps(func)
+ @defer.inlineCallbacks
+ def _trace_servlet_inner(request, *args, **kwargs):
+ with start_active_span_from_context(
+ request.requestHeaders,
+ "incoming-client-request",
+ tags={
+ "request_id": request.get_request_id(),
+ tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER,
+ tags.HTTP_METHOD: request.get_method(),
+ tags.HTTP_URL: request.get_redacted_uri(),
+ tags.PEER_HOST_IPV6: request.getClientIP(),
+ "servlet_name": servlet_name,
+ },
+ ):
+ result = yield defer.maybeDeferred(func, request, *args, **kwargs)
+ defer.returnValue(result)
+
+ return _trace_servlet_inner
diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py
new file mode 100644
index 0000000000..91e14462f3
--- /dev/null
+++ b/synapse/logging/scopecontextmanager.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.import logging
+
+import logging
+
+from opentracing import Scope, ScopeManager
+
+import twisted
+
+from synapse.logging.context import LoggingContext, nested_logging_context
+
+logger = logging.getLogger(__name__)
+
+
+class LogContextScopeManager(ScopeManager):
+ """
+ The LogContextScopeManager tracks the active scope in opentracing
+ by using the log contexts which are native to synapse. This is so
+ that the basic opentracing api can be used across twisted defereds.
+ (I would love to break logcontexts and this into an OS package. but
+ let's wait for twisted's contexts to be released.)
+ """
+
+ def __init__(self, config):
+ # Set the whitelists
+ logger.info(config.tracer_config)
+ self._homeserver_whitelist = config.tracer_config["homeserver_whitelist"]
+
+ @property
+ def active(self):
+ """
+ Returns the currently active Scope which can be used to access the
+ currently active Scope.span.
+ If there is a non-null Scope, its wrapped Span
+ becomes an implicit parent of any newly-created Span at
+ Tracer.start_active_span() time.
+
+ Return:
+ (Scope) : the Scope that is active, or None if not
+ available.
+ """
+ ctx = LoggingContext.current_context()
+ if ctx is LoggingContext.sentinel:
+ return None
+ else:
+ return ctx.scope
+
+ def activate(self, span, finish_on_close):
+ """
+ Makes a Span active.
+ Args
+ span (Span): the span that should become active.
+ finish_on_close (Boolean): whether Span should be automatically
+ finished when Scope.close() is called.
+
+ Returns:
+ Scope to control the end of the active period for
+ *span*. It is a programming error to neglect to call
+ Scope.close() on the returned instance.
+ """
+
+ enter_logcontext = False
+ ctx = LoggingContext.current_context()
+
+ if ctx is LoggingContext.sentinel:
+ # We don't want this scope to affect.
+ logger.error("Tried to activate scope outside of loggingcontext")
+ return Scope(None, span)
+ elif ctx.scope is not None:
+ # We want the logging scope to look exactly the same so we give it
+ # a blank suffix
+ ctx = nested_logging_context("")
+ enter_logcontext = True
+
+ scope = _LogContextScope(self, span, ctx, enter_logcontext, finish_on_close)
+ ctx.scope = scope
+ return scope
+
+
+class _LogContextScope(Scope):
+ """
+ A custom opentracing scope. The only significant difference is that it will
+ close the log context it's related to if the logcontext was created specifically
+ for this scope.
+ """
+
+ def __init__(self, manager, span, logcontext, enter_logcontext, finish_on_close):
+ """
+ Args:
+ manager (LogContextScopeManager):
+ the manager that is responsible for this scope.
+ span (Span):
+ the opentracing span which this scope represents the local
+ lifetime for.
+ logcontext (LogContext):
+ the logcontext to which this scope is attached.
+ enter_logcontext (Boolean):
+ if True the logcontext will be entered and exited when the scope
+ is entered and exited respectively
+ finish_on_close (Boolean):
+ if True finish the span when the scope is closed
+ """
+ super(_LogContextScope, self).__init__(manager, span)
+ self.logcontext = logcontext
+ self._finish_on_close = finish_on_close
+ self._enter_logcontext = enter_logcontext
+
+ def __enter__(self):
+ if self._enter_logcontext:
+ self.logcontext.__enter__()
+
+ def __exit__(self, type, value, traceback):
+ if type == twisted.internet.defer._DefGen_Return:
+ super(_LogContextScope, self).__exit__(None, None, None)
+ else:
+ super(_LogContextScope, self).__exit__(type, value, traceback)
+ if self._enter_logcontext:
+ self.logcontext.__exit__(type, value, traceback)
+ else: # the logcontext existed before the creation of the scope
+ self.logcontext.scope = None
+
+ def close(self):
+ if self.manager.active is not self:
+ logger.error("Tried to close a none active scope!")
+ return
+
+ if self._finish_on_close:
+ self.span.finish()
diff --git a/synapse/logging/utils.py b/synapse/logging/utils.py
new file mode 100644
index 0000000000..7df0fa6087
--- /dev/null
+++ b/synapse/logging/utils.py
@@ -0,0 +1,194 @@
+# -*- coding: utf-8 -*-
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import inspect
+import logging
+import time
+from functools import wraps
+from inspect import getcallargs
+
+from six import PY3
+
+_TIME_FUNC_ID = 0
+
+
+def _log_debug_as_f(f, msg, msg_args):
+ name = f.__module__
+ logger = logging.getLogger(name)
+
+ if logger.isEnabledFor(logging.DEBUG):
+ if PY3:
+ lineno = f.__code__.co_firstlineno
+ pathname = f.__code__.co_filename
+ else:
+ lineno = f.func_code.co_firstlineno
+ pathname = f.func_code.co_filename
+
+ record = logging.LogRecord(
+ name=name,
+ level=logging.DEBUG,
+ pathname=pathname,
+ lineno=lineno,
+ msg=msg,
+ args=msg_args,
+ exc_info=None,
+ )
+
+ logger.handle(record)
+
+
+def log_function(f):
+ """ Function decorator that logs every call to that function.
+ """
+ func_name = f.__name__
+
+ @wraps(f)
+ def wrapped(*args, **kwargs):
+ name = f.__module__
+ logger = logging.getLogger(name)
+ level = logging.DEBUG
+
+ if logger.isEnabledFor(level):
+ bound_args = getcallargs(f, *args, **kwargs)
+
+ def format(value):
+ r = str(value)
+ if len(r) > 50:
+ r = r[:50] + "..."
+ return r
+
+ func_args = ["%s=%s" % (k, format(v)) for k, v in bound_args.items()]
+
+ msg_args = {"func_name": func_name, "args": ", ".join(func_args)}
+
+ _log_debug_as_f(f, "Invoked '%(func_name)s' with args: %(args)s", msg_args)
+
+ return f(*args, **kwargs)
+
+ wrapped.__name__ = func_name
+ return wrapped
+
+
+def time_function(f):
+ func_name = f.__name__
+
+ @wraps(f)
+ def wrapped(*args, **kwargs):
+ global _TIME_FUNC_ID
+ id = _TIME_FUNC_ID
+ _TIME_FUNC_ID += 1
+
+ start = time.clock()
+
+ try:
+ _log_debug_as_f(f, "[FUNC START] {%s-%d}", (func_name, id))
+
+ r = f(*args, **kwargs)
+ finally:
+ end = time.clock()
+ _log_debug_as_f(
+ f, "[FUNC END] {%s-%d} %.3f sec", (func_name, id, end - start)
+ )
+
+ return r
+
+ return wrapped
+
+
+def trace_function(f):
+ func_name = f.__name__
+ linenum = f.func_code.co_firstlineno
+ pathname = f.func_code.co_filename
+
+ @wraps(f)
+ def wrapped(*args, **kwargs):
+ name = f.__module__
+ logger = logging.getLogger(name)
+ level = logging.DEBUG
+
+ s = inspect.currentframe().f_back
+
+ to_print = [
+ "\t%s:%s %s. Args: args=%s, kwargs=%s"
+ % (pathname, linenum, func_name, args, kwargs)
+ ]
+ while s:
+ if True or s.f_globals["__name__"].startswith("synapse"):
+ filename, lineno, function, _, _ = inspect.getframeinfo(s)
+ args_string = inspect.formatargvalues(*inspect.getargvalues(s))
+
+ to_print.append(
+ "\t%s:%d %s. Args: %s" % (filename, lineno, function, args_string)
+ )
+
+ s = s.f_back
+
+ msg = "\nTraceback for %s:\n" % (func_name,) + "\n".join(to_print)
+
+ record = logging.LogRecord(
+ name=name,
+ level=level,
+ pathname=pathname,
+ lineno=lineno,
+ msg=msg,
+ args=None,
+ exc_info=None,
+ )
+
+ logger.handle(record)
+
+ return f(*args, **kwargs)
+
+ wrapped.__name__ = func_name
+ return wrapped
+
+
+def get_previous_frames():
+ s = inspect.currentframe().f_back.f_back
+ to_return = []
+ while s:
+ if s.f_globals["__name__"].startswith("synapse"):
+ filename, lineno, function, _, _ = inspect.getframeinfo(s)
+ args_string = inspect.formatargvalues(*inspect.getargvalues(s))
+
+ to_return.append(
+ "{{ %s:%d %s - Args: %s }}" % (filename, lineno, function, args_string)
+ )
+
+ s = s.f_back
+
+ return ", ".join(to_return)
+
+
+def get_previous_frame(ignore=[]):
+ s = inspect.currentframe().f_back.f_back
+
+ while s:
+ if s.f_globals["__name__"].startswith("synapse"):
+ if not any(s.f_globals["__name__"].startswith(ig) for ig in ignore):
+ filename, lineno, function, _, _ = inspect.getframeinfo(s)
+ args_string = inspect.formatargvalues(*inspect.getargvalues(s))
+
+ return "{{ %s:%d %s - Args: %s }}" % (
+ filename,
+ lineno,
+ function,
+ args_string,
+ )
+
+ s = s.f_back
+
+ return None
|