6 files changed, 1252 insertions, 0 deletions
diff --git a/synapse/logging/__init__.py b/synapse/logging/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/synapse/logging/__init__.py
diff --git a/synapse/logging/context.py b/synapse/logging/context.py
new file mode 100644
index 0000000000..b456c31f70
--- /dev/null
+++ b/synapse/logging/context.py
@@ -0,0 +1,697 @@
+# Copyright 2014-2016 OpenMarket Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" Thread-local-alike tracking of log contexts within synapse
+
+This module provides objects and utilities for tracking contexts through
+synapse code, so that log lines can include a request identifier, and so that
+CPU and database activity can be accounted for against the request that caused
+them.
+
+See doc/log_contexts.rst for details on how this works.
+"""
+
+import logging
+import threading
+import types
+
+from twisted.internet import defer, threads
+
+logger = logging.getLogger(__name__)
+
+try:
+    import resource
+
+    # Python doesn't ship with a definition of RUSAGE_THREAD but it's defined
+    # to be 1 on linux so we hard code it.
+    RUSAGE_THREAD = 1
+
+    # If the system doesn't support RUSAGE_THREAD then this should throw an
+    # exception.
+    resource.getrusage(RUSAGE_THREAD)
+
+    def get_thread_resource_usage():
+        return resource.getrusage(RUSAGE_THREAD)
+
+
+except Exception:
+    # If the system doesn't support resource.getrusage(RUSAGE_THREAD) then we
+    # won't track resource usage by returning None.
+    def get_thread_resource_usage():
+        return None
+
+
+# get an id for the current thread.
+#
+# threading.get_ident doesn't actually return an OS-level tid, and annoyingly,
+# on Linux it actually returns the same value either side of a fork() call. However
+# we only fork in one place, so it's not worth the hoop-jumping to get a real tid.
+#
+get_thread_id = threading.get_ident
+
+
+class ContextResourceUsage(object):
+    """Object for tracking the resources used by a log context
+
+    Attributes:
+        ru_utime (float): user CPU time (in seconds)
+        ru_stime (float): system CPU time (in seconds)
+        db_txn_count (int): number of database transactions done
+        db_sched_duration_sec (float): amount of time spent waiting for a
+            database connection
+        db_txn_duration_sec (float): amount of time spent doing database
+            transactions (excluding scheduling time)
+        evt_db_fetch_count (int): number of events requested from the database
+    """
+
+    __slots__ = [
+        "ru_stime",
+        "ru_utime",
+        "db_txn_count",
+        "db_txn_duration_sec",
+        "db_sched_duration_sec",
+        "evt_db_fetch_count",
+    ]
+
+    def __init__(self, copy_from=None):
+        """Create a new ContextResourceUsage
+
+        Args:
+            copy_from (ContextResourceUsage|None): if not None, an object to
+                copy stats from
+        """
+        if copy_from is None:
+            self.reset()
+        else:
+            self.ru_utime = copy_from.ru_utime
+            self.ru_stime = copy_from.ru_stime
+            self.db_txn_count = copy_from.db_txn_count
+
+            self.db_txn_duration_sec = copy_from.db_txn_duration_sec
+            self.db_sched_duration_sec = copy_from.db_sched_duration_sec
+            self.evt_db_fetch_count = copy_from.evt_db_fetch_count
+
+    def copy(self):
+        return ContextResourceUsage(copy_from=self)
+
+    def reset(self):
+        self.ru_stime = 0.0
+        self.ru_utime = 0.0
+        self.db_txn_count = 0
+
+        self.db_txn_duration_sec = 0
+        self.db_sched_duration_sec = 0
+        self.evt_db_fetch_count = 0
+
+    def __repr__(self):
+        return (
+            "<ContextResourceUsage ru_stime='%r', ru_utime='%r', "
+            "db_txn_count='%r', db_txn_duration_sec='%r', "
+            "db_sched_duration_sec='%r', evt_db_fetch_count='%r'>"
+        ) % (
+            self.ru_stime,
+            self.ru_utime,
+            self.db_txn_count,
+            self.db_txn_duration_sec,
+            self.db_sched_duration_sec,
+            self.evt_db_fetch_count,
+        )
+
+    def __iadd__(self, other):
+        """Add another ContextResourceUsage's stats to this one's.
+
+        Args:
+            other (ContextResourceUsage): the other resource usage object
+        """
+        self.ru_utime += other.ru_utime
+        self.ru_stime += other.ru_stime
+        self.db_txn_count += other.db_txn_count
+        self.db_txn_duration_sec += other.db_txn_duration_sec
+        self.db_sched_duration_sec += other.db_sched_duration_sec
+        self.evt_db_fetch_count += other.evt_db_fetch_count
+        return self
+
+    def __isub__(self, other):
+        self.ru_utime -= other.ru_utime
+        self.ru_stime -= other.ru_stime
+        self.db_txn_count -= other.db_txn_count
+        self.db_txn_duration_sec -= other.db_txn_duration_sec
+        self.db_sched_duration_sec -= other.db_sched_duration_sec
+        self.evt_db_fetch_count -= other.evt_db_fetch_count
+        return self
+
+    def __add__(self, other):
+        res = ContextResourceUsage(copy_from=self)
+        res += other
+        return res
+
+    def __sub__(self, other):
+        res = ContextResourceUsage(copy_from=self)
+        res -= other
+        return res
+
+
+class LoggingContext(object):
+    """Additional context for log formatting. Contexts are scoped within a
+    "with" block.
+
+    If a parent is given when creating a new context, then:
+        - logging fields are copied from the parent to the new context on entry
+        - when the new context exits, the cpu usage stats are copied from the
+          child to the parent
+
+    Args:
+        name (str): Name for the context for debugging.
+        parent_context (LoggingContext|None): The parent of the new context
+    """
+
+    __slots__ = [
+        "previous_context",
+        "name",
+        "parent_context",
+        "_resource_usage",
+        "usage_start",
+        "main_thread",
+        "alive",
+        "request",
+        "tag",
+        "scope",
+    ]
+
+    thread_local = threading.local()
+
+    class Sentinel(object):
+        """Sentinel to represent the root context"""
+
+        __slots__ = []
+
+        def __str__(self):
+            return "sentinel"
+
+        def copy_to(self, record):
+            pass
+
+        def start(self):
+            pass
+
+        def stop(self):
+            pass
+
+        def add_database_transaction(self, duration_sec):
+            pass
+
+        def add_database_scheduled(self, sched_sec):
+            pass
+
+        def record_event_fetch(self, event_count):
+            pass
+
+        def __nonzero__(self):
+            return False
+
+        __bool__ = __nonzero__  # python3
+
+    sentinel = Sentinel()
+
+    def __init__(self, name=None, parent_context=None, request=None):
+        self.previous_context = LoggingContext.current_context()
+        self.name = name
+
+        # track the resources used by this context so far
+        self._resource_usage = ContextResourceUsage()
+
+        # If alive has the thread resource usage when the logcontext last
+        # became active.
+        self.usage_start = None
+
+        self.main_thread = get_thread_id()
+        self.request = None
+        self.tag = ""
+        self.alive = True
+        self.scope = None
+
+        self.parent_context = parent_context
+
+        if self.parent_context is not None:
+            self.parent_context.copy_to(self)
+
+        if request is not None:
+            # the request param overrides the request from the parent context
+            self.request = request
+
+    def __str__(self):
+        if self.request:
+            return str(self.request)
+        return "%s@%x" % (self.name, id(self))
+
+    @classmethod
+    def current_context(cls):
+        """Get the current logging context from thread local storage
+
+        Returns:
+            LoggingContext: the current logging context
+        """
+        return getattr(cls.thread_local, "current_context", cls.sentinel)
+
+    @classmethod
+    def set_current_context(cls, context):
+        """Set the current logging context in thread local storage
+        Args:
+            context(LoggingContext): The context to activate.
+        Returns:
+            The context that was previously active
+        """
+        current = cls.current_context()
+
+        if current is not context:
+            current.stop()
+            cls.thread_local.current_context = context
+            context.start()
+        return current
+
+    def __enter__(self):
+        """Enters this logging context into thread local storage"""
+        old_context = self.set_current_context(self)
+        if self.previous_context != old_context:
+            logger.warn(
+                "Expected previous context %r, found %r",
+                self.previous_context,
+                old_context,
+            )
+        self.alive = True
+
+        return self
+
+    def __exit__(self, type, value, traceback):
+        """Restore the logging context in thread local storage to the state it
+        was before this context was entered.
+        Returns:
+            None to avoid suppressing any exceptions that were thrown.
+        """
+        current = self.set_current_context(self.previous_context)
+        if current is not self:
+            if current is self.sentinel:
+                logger.warning("Expected logging context %s was lost", self)
+            else:
+                logger.warning(
+                    "Expected logging context %s but found %s", self, current
+                )
+        self.previous_context = None
+        self.alive = False
+
+        # if we have a parent, pass our CPU usage stats on
+        if self.parent_context is not None and hasattr(
+            self.parent_context, "_resource_usage"
+        ):
+            self.parent_context._resource_usage += self._resource_usage
+
+            # reset them in case we get entered again
+            self._resource_usage.reset()
+
+    def copy_to(self, record):
+        """Copy logging fields from this context to a log record or
+        another LoggingContext
+        """
+
+        # we track the current request
+        record.request = self.request
+
+        # we also track the current scope:
+        record.scope = self.scope
+
+    def start(self):
+        if get_thread_id() != self.main_thread:
+            logger.warning("Started logcontext %s on different thread", self)
+            return
+
+        # If we haven't already started record the thread resource usage so
+        # far
+        if not self.usage_start:
+            self.usage_start = get_thread_resource_usage()
+
+    def stop(self):
+        if get_thread_id() != self.main_thread:
+            logger.warning("Stopped logcontext %s on different thread", self)
+            return
+
+        # When we stop, let's record the cpu used since we started
+        if not self.usage_start:
+            logger.warning("Called stop on logcontext %s without calling start", self)
+            return
+
+        utime_delta, stime_delta = self._get_cputime()
+        self._resource_usage.ru_utime += utime_delta
+        self._resource_usage.ru_stime += stime_delta
+
+        self.usage_start = None
+
+    def get_resource_usage(self):
+        """Get resources used by this logcontext so far.
+
+        Returns:
+            ContextResourceUsage: a *copy* of the object tracking resource
+                usage so far
+        """
+        # we always return a copy, for consistency
+        res = self._resource_usage.copy()
+
+        # If we are on the correct thread and we're currently running then we
+        # can include resource usage so far.
+        is_main_thread = get_thread_id() == self.main_thread
+        if self.alive and self.usage_start and is_main_thread:
+            utime_delta, stime_delta = self._get_cputime()
+            res.ru_utime += utime_delta
+            res.ru_stime += stime_delta
+
+        return res
+
+    def _get_cputime(self):
+        """Get the cpu usage time so far
+
+        Returns: Tuple[float, float]: seconds in user mode, seconds in system mode
+        """
+        current = get_thread_resource_usage()
+
+        utime_delta = current.ru_utime - self.usage_start.ru_utime
+        stime_delta = current.ru_stime - self.usage_start.ru_stime
+
+        # sanity check
+        if utime_delta < 0:
+            logger.error(
+                "utime went backwards! %f < %f",
+                current.ru_utime,
+                self.usage_start.ru_utime,
+            )
+            utime_delta = 0
+
+        if stime_delta < 0:
+            logger.error(
+                "stime went backwards! %f < %f",
+                current.ru_stime,
+                self.usage_start.ru_stime,
+            )
+            stime_delta = 0
+
+        return utime_delta, stime_delta
+
+    def add_database_transaction(self, duration_sec):
+        if duration_sec < 0:
+            raise ValueError("DB txn time can only be non-negative")
+        self._resource_usage.db_txn_count += 1
+        self._resource_usage.db_txn_duration_sec += duration_sec
+
+    def add_database_scheduled(self, sched_sec):
+        """Record a use of the database pool
+
+        Args:
+            sched_sec (float): number of seconds it took us to get a
+                connection
+        """
+        if sched_sec < 0:
+            raise ValueError("DB scheduling time can only be non-negative")
+        self._resource_usage.db_sched_duration_sec += sched_sec
+
+    def record_event_fetch(self, event_count):
+        """Record a number of events being fetched from the db
+
+        Args:
+            event_count (int): number of events being fetched
+        """
+        self._resource_usage.evt_db_fetch_count += event_count
+
+
+class LoggingContextFilter(logging.Filter):
+    """Logging filter that adds values from the current logging context to each
+    record.
+    Args:
+        **defaults: Default values to avoid formatters complaining about
+            missing fields
+    """
+
+    def __init__(self, **defaults):
+        self.defaults = defaults
+
+    def filter(self, record):
+        """Add each fields from the logging contexts to the record.
+        Returns:
+            True to include the record in the log output.
+        """
+        context = LoggingContext.current_context()
+        for key, value in self.defaults.items():
+            setattr(record, key, value)
+
+        # context should never be None, but if it somehow ends up being, then
+        # we end up in a death spiral of infinite loops, so let's check, for
+        # robustness' sake.
+        if context is not None:
+            context.copy_to(record)
+
+        return True
+
+
+class PreserveLoggingContext(object):
+    """Captures the current logging context and restores it when the scope is
+    exited. Used to restore the context after a function using
+    @defer.inlineCallbacks is resumed by a callback from the reactor."""
+
+    __slots__ = ["current_context", "new_context", "has_parent"]
+
+    def __init__(self, new_context=None):
+        if new_context is None:
+            new_context = LoggingContext.sentinel
+        self.new_context = new_context
+
+    def __enter__(self):
+        """Captures the current logging context"""
+        self.current_context = LoggingContext.set_current_context(self.new_context)
+
+        if self.current_context:
+            self.has_parent = self.current_context.previous_context is not None
+            if not self.current_context.alive:
+                logger.debug("Entering dead context: %s", self.current_context)
+
+    def __exit__(self, type, value, traceback):
+        """Restores the current logging context"""
+        context = LoggingContext.set_current_context(self.current_context)
+
+        if context != self.new_context:
+            if context is LoggingContext.sentinel:
+                logger.warning("Expected logging context %s was lost", self.new_context)
+            else:
+                logger.warning(
+                    "Expected logging context %s but found %s",
+                    self.new_context,
+                    context,
+                )
+
+        if self.current_context is not LoggingContext.sentinel:
+            if not self.current_context.alive:
+                logger.debug("Restoring dead context: %s", self.current_context)
+
+
+def nested_logging_context(suffix, parent_context=None):
+    """Creates a new logging context as a child of another.
+
+    The nested logging context will have a 'request' made up of the parent context's
+    request, plus the given suffix.
+
+    CPU/db usage stats will be added to the parent context's on exit.
+
+    Normal usage looks like:
+
+        with nested_logging_context(suffix):
+            # ... do stuff
+
+    Args:
+        suffix (str): suffix to add to the parent context's 'request'.
+        parent_context (LoggingContext|None): parent context. Will use the current context
+            if None.
+
+    Returns:
+        LoggingContext: new logging context.
+    """
+    if parent_context is None:
+        parent_context = LoggingContext.current_context()
+    return LoggingContext(
+        parent_context=parent_context, request=parent_context.request + "-" + suffix
+    )
+
+
+def preserve_fn(f):
+    """Function decorator which wraps the function with run_in_background"""
+
+    def g(*args, **kwargs):
+        return run_in_background(f, *args, **kwargs)
+
+    return g
+
+
+def run_in_background(f, *args, **kwargs):
+    """Calls a function, ensuring that the current context is restored after
+    return from the function, and that the sentinel context is set once the
+    deferred returned by the function completes.
+
+    Useful for wrapping functions that return a deferred or coroutine, which you don't
+    yield or await on (for instance because you want to pass it to
+    deferred.gatherResults()).
+
+    Note that if you completely discard the result, you should make sure that
+    `f` doesn't raise any deferred exceptions, otherwise a scary-looking
+    CRITICAL error about an unhandled error will be logged without much
+    indication about where it came from.
+    """
+    current = LoggingContext.current_context()
+    try:
+        res = f(*args, **kwargs)
+    except:  # noqa: E722
+        # the assumption here is that the caller doesn't want to be disturbed
+        # by synchronous exceptions, so let's turn them into Failures.
+        return defer.fail()
+
+    if isinstance(res, types.CoroutineType):
+        res = defer.ensureDeferred(res)
+
+    if not isinstance(res, defer.Deferred):
+        return res
+
+    if res.called and not res.paused:
+        # The function should have maintained the logcontext, so we can
+        # optimise out the messing about
+        return res
+
+    # The function may have reset the context before returning, so
+    # we need to restore it now.
+    ctx = LoggingContext.set_current_context(current)
+
+    # The original context will be restored when the deferred
+    # completes, but there is nothing waiting for it, so it will
+    # get leaked into the reactor or some other function which
+    # wasn't expecting it. We therefore need to reset the context
+    # here.
+    #
+    # (If this feels asymmetric, consider it this way: we are
+    # effectively forking a new thread of execution. We are
+    # probably currently within a ``with LoggingContext()`` block,
+    # which is supposed to have a single entry and exit point. But
+    # by spawning off another deferred, we are effectively
+    # adding a new exit point.)
+    res.addBoth(_set_context_cb, ctx)
+    return res
+
+
+def make_deferred_yieldable(deferred):
+    """Given a deferred, make it follow the Synapse logcontext rules:
+
+    If the deferred has completed (or is not actually a Deferred), essentially
+    does nothing (just returns another completed deferred with the
+    result/failure).
+
+    If the deferred has not yet completed, resets the logcontext before
+    returning a deferred. Then, when the deferred completes, restores the
+    current logcontext before running callbacks/errbacks.
+
+    (This is more-or-less the opposite operation to run_in_background.)
+    """
+    if not isinstance(deferred, defer.Deferred):
+        return deferred
+
+    if deferred.called and not deferred.paused:
+        # it looks like this deferred is ready to run any callbacks we give it
+        # immediately. We may as well optimise out the logcontext faffery.
+        return deferred
+
+    # ok, we can't be sure that a yield won't block, so let's reset the
+    # logcontext, and add a callback to the deferred to restore it.
+    prev_context = LoggingContext.set_current_context(LoggingContext.sentinel)
+    deferred.addBoth(_set_context_cb, prev_context)
+    return deferred
+
+
+def _set_context_cb(result, context):
+    """A callback function which just sets the logging context"""
+    LoggingContext.set_current_context(context)
+    return result
+
+
+def defer_to_thread(reactor, f, *args, **kwargs):
+    """
+    Calls the function `f` using a thread from the reactor's default threadpool and
+    returns the result as a Deferred.
+
+    Creates a new logcontext for `f`, which is created as a child of the current
+    logcontext (so its CPU usage metrics will get attributed to the current
+    logcontext). `f` should preserve the logcontext it is given.
+
+    The result deferred follows the Synapse logcontext rules: you should `yield`
+    on it.
+
+    Args:
+        reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
+            the Deferred will be invoked, and whose threadpool we should use for the
+            function.
+
+            Normally this will be hs.get_reactor().
+
+        f (callable): The function to call.
+
+        args: positional arguments to pass to f.
+
+        kwargs: keyword arguments to pass to f.
+
+    Returns:
+        Deferred: A Deferred which fires a callback with the result of `f`, or an
+            errback if `f` throws an exception.
+    """
+    return defer_to_threadpool(reactor, reactor.getThreadPool(), f, *args, **kwargs)
+
+
+def defer_to_threadpool(reactor, threadpool, f, *args, **kwargs):
+    """
+    A wrapper for twisted.internet.threads.deferToThreadpool, which handles
+    logcontexts correctly.
+
+    Calls the function `f` using a thread from the given threadpool and returns
+    the result as a Deferred.
+
+    Creates a new logcontext for `f`, which is created as a child of the current
+    logcontext (so its CPU usage metrics will get attributed to the current
+    logcontext). `f` should preserve the logcontext it is given.
+
+    The result deferred follows the Synapse logcontext rules: you should `yield`
+    on it.
+
+    Args:
+        reactor (twisted.internet.base.ReactorBase): The reactor in whose main thread
+            the Deferred will be invoked. Normally this will be hs.get_reactor().
+
+        threadpool (twisted.python.threadpool.ThreadPool): The threadpool to use for
+            running `f`. Normally this will be hs.get_reactor().getThreadPool().
+
+        f (callable): The function to call.
+
+        args: positional arguments to pass to f.
+
+        kwargs: keyword arguments to pass to f.
+
+    Returns:
+        Deferred: A Deferred which fires a callback with the result of `f`, or an
+            errback if `f` throws an exception.
+    """
+    logcontext = LoggingContext.current_context()
+
+    def g():
+        with LoggingContext(parent_context=logcontext):
+            return f(*args, **kwargs)
+
+    return make_deferred_yieldable(threads.deferToThreadPool(reactor, threadpool, g))
diff --git a/synapse/logging/formatter.py b/synapse/logging/formatter.py
new file mode 100644
index 0000000000..fbf570c756
--- /dev/null
+++ b/synapse/logging/formatter.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+# Copyright 2017 New Vector Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import logging
+import traceback
+
+from six import StringIO
+
+
+class LogFormatter(logging.Formatter):
+    """Log formatter which gives more detail for exceptions
+
+    This is the same as the standard log formatter, except that when logging
+    exceptions [typically via log.foo("msg", exc_info=1)], it prints the
+    sequence that led up to the point at which the exception was caught.
+    (Normally only stack frames between the point the exception was raised and
+    where it was caught are logged).
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(LogFormatter, self).__init__(*args, **kwargs)
+
+    def formatException(self, ei):
+        sio = StringIO()
+        (typ, val, tb) = ei
+
+        # log the stack above the exception capture point if possible, but
+        # check that we actually have an f_back attribute to work around
+        # https://twistedmatrix.com/trac/ticket/9305
+
+        if tb and hasattr(tb.tb_frame, "f_back"):
+            sio.write("Capture point (most recent call last):\n")
+            traceback.print_stack(tb.tb_frame.f_back, None, sio)
+
+        traceback.print_exception(typ, val, tb, None, sio)
+        s = sio.getvalue()
+        sio.close()
+        if s[-1:] == "\n":
+            s = s[:-1]
+        return s
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
new file mode 100644
index 0000000000..f0ceea2a64
--- /dev/null
+++ b/synapse/logging/opentracing.py
@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.d
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.import opentracing
+
+
+# NOTE
+# This is a small wrapper around opentracing because opentracing is not currently
+# packaged downstream (specifically debian). Since opentracing instrumentation is
+# fairly invasive it was awkward to make it optional. As a result we opted to encapsulate
+# all opentracing state in these methods which effectively noop if opentracing is
+# not present. We should strongly consider encouraging the downstream distributers
+# to package opentracing and making opentracing a full dependency. In order to facilitate
+# this move the methods have work very similarly to opentracing's and it should only
+# be a matter of few regexes to move over to opentracing's access patterns proper.
+
+try:
+    import opentracing
+except ImportError:
+    opentracing = None
+try:
+    from jaeger_client import Config as JaegerConfig
+    from synapse.logging.scopecontextmanager import LogContextScopeManager
+except ImportError:
+    JaegerConfig = None
+    LogContextScopeManager = None
+
+import contextlib
+import logging
+import re
+from functools import wraps
+
+from twisted.internet import defer
+
+logger = logging.getLogger(__name__)
+
+
+class _DumTagNames(object):
+    """wrapper of opentracings tags. We need to have them if we
+    want to reference them without opentracing around. Clearly they
+    should never actually show up in a trace. `set_tags` overwrites
+    these with the correct ones."""
+
+    INVALID_TAG = "invalid-tag"
+    COMPONENT = INVALID_TAG
+    DATABASE_INSTANCE = INVALID_TAG
+    DATABASE_STATEMENT = INVALID_TAG
+    DATABASE_TYPE = INVALID_TAG
+    DATABASE_USER = INVALID_TAG
+    ERROR = INVALID_TAG
+    HTTP_METHOD = INVALID_TAG
+    HTTP_STATUS_CODE = INVALID_TAG
+    HTTP_URL = INVALID_TAG
+    MESSAGE_BUS_DESTINATION = INVALID_TAG
+    PEER_ADDRESS = INVALID_TAG
+    PEER_HOSTNAME = INVALID_TAG
+    PEER_HOST_IPV4 = INVALID_TAG
+    PEER_HOST_IPV6 = INVALID_TAG
+    PEER_PORT = INVALID_TAG
+    PEER_SERVICE = INVALID_TAG
+    SAMPLING_PRIORITY = INVALID_TAG
+    SERVICE = INVALID_TAG
+    SPAN_KIND = INVALID_TAG
+    SPAN_KIND_CONSUMER = INVALID_TAG
+    SPAN_KIND_PRODUCER = INVALID_TAG
+    SPAN_KIND_RPC_CLIENT = INVALID_TAG
+    SPAN_KIND_RPC_SERVER = INVALID_TAG
+
+
+def only_if_tracing(func):
+    """Executes the function only if we're tracing. Otherwise return.
+    Assumes the function wrapped may return None"""
+
+    @wraps(func)
+    def _only_if_tracing_inner(*args, **kwargs):
+        if opentracing:
+            return func(*args, **kwargs)
+        else:
+            return
+
+    return _only_if_tracing_inner
+
+
+# Block everything by default
+_homeserver_whitelist = None
+
+tags = _DumTagNames
+
+
+def init_tracer(config):
+    """Set the whitelists and initialise the JaegerClient tracer
+
+    Args:
+        config (Config)
+        The config used by the homeserver. Here it's used to set the service
+        name to the homeserver's.
+    """
+    global opentracing
+    if not config.tracer_config.get("tracer_enabled", False):
+        # We don't have a tracer
+        opentracing = None
+        return
+
+    if not opentracing:
+        logger.error(
+            "The server has been configure to use opentracing but opentracing is not installed."
+        )
+        raise ModuleNotFoundError("opentracing")
+
+    if not JaegerConfig:
+        logger.error(
+            "The server has been configure to use opentracing but opentracing is not installed."
+        )
+
+    # Include the worker name
+    name = config.worker_name if config.worker_name else "master"
+
+    set_homeserver_whitelist(config.tracer_config["homeserver_whitelist"])
+    jaeger_config = JaegerConfig(
+        config={"sampler": {"type": "const", "param": 1}, "logging": True},
+        service_name="{} {}".format(config.server_name, name),
+        scope_manager=LogContextScopeManager(config),
+    )
+    jaeger_config.initialize_tracer()
+
+    # Set up tags to be opentracing's tags
+    global tags
+    tags = opentracing.tags
+
+
+@contextlib.contextmanager
+def _noop_context_manager(*args, **kwargs):
+    """Does absolutely nothing really well. Can be entered and exited arbitrarily.
+    Good substitute for an opentracing scope."""
+    yield
+
+
+# Could use kwargs but I want these to be explicit
+def start_active_span(
+    operation_name,
+    child_of=None,
+    references=None,
+    tags=None,
+    start_time=None,
+    ignore_active_span=False,
+    finish_on_close=True,
+):
+    """Starts an active opentracing span. Note, the scope doesn't become active
+    until it has been entered, however, the span starts from the time this
+    message is called.
+    Args:
+        See opentracing.tracer
+    Returns:
+        scope (Scope) or noop_context_manager
+    """
+    if opentracing is None:
+        return _noop_context_manager()
+    else:
+        # We need to enter the scope here for the logcontext to become active
+        return opentracing.tracer.start_active_span(
+            operation_name,
+            child_of=child_of,
+            references=references,
+            tags=tags,
+            start_time=start_time,
+            ignore_active_span=ignore_active_span,
+            finish_on_close=finish_on_close,
+        )
+
+
+@only_if_tracing
+def close_active_span():
+    """Closes the active span. This will close it's logcontext if the context
+    was made for the span"""
+    opentracing.tracer.scope_manager.active.__exit__(None, None, None)
+
+
+@only_if_tracing
+def set_tag(key, value):
+    """Set's a tag on the active span"""
+    opentracing.tracer.active_span.set_tag(key, value)
+
+
+@only_if_tracing
+def log_kv(key_values, timestamp=None):
+    """Log to the active span"""
+    opentracing.tracer.active_span.log_kv(key_values, timestamp)
+
+
+# Note: we don't have a get baggage items because we're trying to hide all
+# scope and span state from synapse. I think this method may also be useless
+# as a result
+@only_if_tracing
+def set_baggage_item(key, value):
+    """Attach baggage to the active span"""
+    opentracing.tracer.active_span.set_baggage_item(key, value)
+
+
+@only_if_tracing
+def set_operation_name(operation_name):
+    """Sets the operation name of the active span"""
+    opentracing.tracer.active_span.set_operation_name(operation_name)
+
+
+@only_if_tracing
+def set_homeserver_whitelist(homeserver_whitelist):
+    """Sets the whitelist
+
+    Args:
+        homeserver_whitelist (iterable of strings): regex of whitelisted homeservers
+    """
+    global _homeserver_whitelist
+    if homeserver_whitelist:
+        # Makes a single regex which accepts all passed in regexes in the list
+        _homeserver_whitelist = re.compile(
+            "({})".format(")|(".join(homeserver_whitelist))
+        )
+
+
+@only_if_tracing
+def whitelisted_homeserver(destination):
+    """Checks if a destination matches the whitelist
+    Args:
+        destination (String)"""
+    global _homeserver_whitelist
+    if _homeserver_whitelist:
+        return _homeserver_whitelist.match(destination)
+    return False
+
+
+def start_active_span_from_context(
+    headers,
+    operation_name,
+    references=None,
+    tags=None,
+    start_time=None,
+    ignore_active_span=False,
+    finish_on_close=True,
+):
+    """
+    Extracts a span context from Twisted Headers.
+    args:
+        headers (twisted.web.http_headers.Headers)
+    returns:
+        span_context (opentracing.span.SpanContext)
+    """
+    # Twisted encodes the values as lists whereas opentracing doesn't.
+    # So, we take the first item in the list.
+    # Also, twisted uses byte arrays while opentracing expects strings.
+    if opentracing is None:
+        return _noop_context_manager()
+
+    header_dict = {k.decode(): v[0].decode() for k, v in headers.getAllRawHeaders()}
+    context = opentracing.tracer.extract(opentracing.Format.HTTP_HEADERS, header_dict)
+
+    return opentracing.tracer.start_active_span(
+        operation_name,
+        child_of=context,
+        references=references,
+        tags=tags,
+        start_time=start_time,
+        ignore_active_span=ignore_active_span,
+        finish_on_close=finish_on_close,
+    )
+
+
+@only_if_tracing
+def inject_active_span_twisted_headers(headers, destination):
+    """
+    Injects a span context into twisted headers inplace
+
+    Args:
+        headers (twisted.web.http_headers.Headers)
+        span (opentracing.Span)
+
+    Returns:
+        Inplace modification of headers
+
+    Note:
+        The headers set by the tracer are custom to the tracer implementation which
+        should be unique enough that they don't interfere with any headers set by
+        synapse or twisted. If we're still using jaeger these headers would be those
+        here:
+        https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py
+    """
+
+    if not whitelisted_homeserver(destination):
+        return
+
+    span = opentracing.tracer.active_span
+    carrier = {}
+    opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
+
+    for key, value in carrier.items():
+        headers.addRawHeaders(key, value)
+
+
+@only_if_tracing
+def inject_active_span_byte_dict(headers, destination):
+    """
+    Injects a span context into a dict where the headers are encoded as byte
+    strings
+
+    Args:
+        headers (dict)
+        span (opentracing.Span)
+
+    Returns:
+        Inplace modification of headers
+
+    Note:
+        The headers set by the tracer are custom to the tracer implementation which
+        should be unique enough that they don't interfere with any headers set by
+        synapse or twisted. If we're still using jaeger these headers would be those
+        here:
+        https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py
+    """
+    if not whitelisted_homeserver(destination):
+        return
+
+    span = opentracing.tracer.active_span
+
+    carrier = {}
+    opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier)
+
+    for key, value in carrier.items():
+        headers[key.encode()] = [value.encode()]
+
+
+def trace_servlet(servlet_name, func):
+    """Decorator which traces a serlet. It starts a span with some servlet specific
+    tags such as the servlet_name and request information"""
+
+    @wraps(func)
+    @defer.inlineCallbacks
+    def _trace_servlet_inner(request, *args, **kwargs):
+        with start_active_span_from_context(
+            request.requestHeaders,
+            "incoming-client-request",
+            tags={
+                "request_id": request.get_request_id(),
+                tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER,
+                tags.HTTP_METHOD: request.get_method(),
+                tags.HTTP_URL: request.get_redacted_uri(),
+                tags.PEER_HOST_IPV6: request.getClientIP(),
+                "servlet_name": servlet_name,
+            },
+        ):
+            result = yield defer.maybeDeferred(func, request, *args, **kwargs)
+        defer.returnValue(result)
+
+    return _trace_servlet_inner
diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py
new file mode 100644
index 0000000000..91e14462f3
--- /dev/null
+++ b/synapse/logging/scopecontextmanager.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Matrix.org Foundation C.I.C.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.import logging
+
+import logging
+
+from opentracing import Scope, ScopeManager
+
+import twisted
+
+from synapse.logging.context import LoggingContext, nested_logging_context
+
+logger = logging.getLogger(__name__)
+
+
+class LogContextScopeManager(ScopeManager):
+    """
+    The LogContextScopeManager tracks the active scope in opentracing
+    by using the log contexts which are native to synapse. This is so
+    that the basic opentracing api can be used across twisted defereds.
+    (I would love to break logcontexts and this into an OS package. but
+    let's wait for twisted's contexts to be released.)
+    """
+
+    def __init__(self, config):
+        # Set the whitelists
+        logger.info(config.tracer_config)
+        self._homeserver_whitelist = config.tracer_config["homeserver_whitelist"]
+
+    @property
+    def active(self):
+        """
+        Returns the currently active Scope which can be used to access the
+        currently active Scope.span.
+        If there is a non-null Scope, its wrapped Span
+        becomes an implicit parent of any newly-created Span at
+        Tracer.start_active_span() time.
+
+        Return:
+            (Scope) : the Scope that is active, or None if not
+            available.
+        """
+        ctx = LoggingContext.current_context()
+        if ctx is LoggingContext.sentinel:
+            return None
+        else:
+            return ctx.scope
+
+    def activate(self, span, finish_on_close):
+        """
+        Makes a Span active.
+        Args
+            span (Span): the span that should become active.
+            finish_on_close (Boolean): whether Span should be automatically
+                finished when Scope.close() is called.
+
+        Returns:
+            Scope to control the end of the active period for
+            *span*. It is a programming error to neglect to call
+            Scope.close() on the returned instance.
+        """
+
+        enter_logcontext = False
+        ctx = LoggingContext.current_context()
+
+        if ctx is LoggingContext.sentinel:
+            # We don't want this scope to affect.
+            logger.error("Tried to activate scope outside of loggingcontext")
+            return Scope(None, span)
+        elif ctx.scope is not None:
+            # We want the logging scope to look exactly the same so we give it
+            # a blank suffix
+            ctx = nested_logging_context("")
+            enter_logcontext = True
+
+        scope = _LogContextScope(self, span, ctx, enter_logcontext, finish_on_close)
+        ctx.scope = scope
+        return scope
+
+
+class _LogContextScope(Scope):
+    """
+    A custom opentracing scope. The only significant difference is that it will
+    close the log context it's related to if the logcontext was created specifically
+    for this scope.
+    """
+
+    def __init__(self, manager, span, logcontext, enter_logcontext, finish_on_close):
+        """
+        Args:
+            manager (LogContextScopeManager):
+                the manager that is responsible for this scope.
+            span (Span):
+                the opentracing span which this scope represents the local
+                lifetime for.
+            logcontext (LogContext):
+                the logcontext to which this scope is attached.
+            enter_logcontext (Boolean):
+                if True the logcontext will be entered and exited when the scope
+                is entered and exited respectively
+            finish_on_close (Boolean):
+                if True finish the span when the scope is closed
+        """
+        super(_LogContextScope, self).__init__(manager, span)
+        self.logcontext = logcontext
+        self._finish_on_close = finish_on_close
+        self._enter_logcontext = enter_logcontext
+
+    def __enter__(self):
+        if self._enter_logcontext:
+            self.logcontext.__enter__()
+
+    def __exit__(self, type, value, traceback):
+        if type == twisted.internet.defer._DefGen_Return:
+            super(_LogContextScope, self).__exit__(None, None, None)
+        else:
+            super(_LogContextScope, self).__exit__(type, value, traceback)
+        if self._enter_logcontext:
+            self.logcontext.__exit__(type, value, traceback)
+        else:  # the logcontext existed before the creation of the scope
+            self.logcontext.scope = None
+
+    def close(self):
+        if self.manager.active is not self:
+            logger.error("Tried to close a none active scope!")
+            return
+
+        if self._finish_on_close:
+            self.span.finish()
diff --git a/synapse/util/logutils.py b/synapse/logging/utils.py
index 7df0fa6087..7df0fa6087 100644
--- a/synapse/util/logutils.py
+++ b/synapse/logging/utils.py