Merge branch 'develop' of github.com:matrix-org/synapse into matrix-org-hotfixes

author: Erik Johnston <erik@matrix.org> 2019-07-23 09:20:26 +0100
committer: Erik Johnston <erik@matrix.org> 2019-07-23 09:20:26 +0100
commit: c02f26319dbe03e6766980796d6a24f5d054b06f (patch)
tree: 7a50e20396132d3af5c73930607a0f1d740ea108 /synapse
parent: Merge branch 'develop' of github.com:matrix-org/synapse into matrix-org-hotfixes (diff)
parent: Fix logging in workers (#5729) (diff)
download: synapse-c02f26319dbe03e6766980796d6a24f5d054b06f.tar.xz
7 files changed, 194 insertions, 69 deletions
diff --git a/synapse/__init__.py b/synapse/__init__.py
index cf22fabd61..f26e49da36 100644
--- a/synapse/__init__.py
+++ b/synapse/__init__.py
@@ -35,4 +35,4 @@ try:
 except ImportError:
     pass
 
-__version__ = "1.1.0"
+__version__ = "1.2.0rc1"
diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py
index a2ce9ab3f6..4479454415 100644
--- a/synapse/config/tracer.py
+++ b/synapse/config/tracer.py
@@ -48,18 +48,8 @@ class TracerConfig(Config):
             #enabled: true
 
             # The list of homeservers we wish to send and receive span contexts and span baggage.
-            #
-            # Though it's mostly safe to send and receive span contexts to and from
-            # untrusted users since span contexts are usually opaque ids it can lead to
-            # two problems, namely:
-            # - If the span context is marked as sampled by the sending homeserver the receiver will
-            # sample it. Therefore two homeservers with wildly disparaging sampling policies
-            # could incur higher sampling counts than intended.
-            # - Span baggage can be arbitrary data. For safety this has been disabled in synapse
-            # but that doesn't prevent another server sending you baggage which will be logged
-            # to opentracing logs.
-            #
-            # This a list of regexes which are matched against the server_name of the
+            # See docs/opentracing.rst
+            # This is a list of regexes which are matched against the server_name of the
             # homeserver.
             #
             # By defult, it is empty, so no servers are matched.
diff --git a/synapse/config/workers.py b/synapse/config/workers.py
index 246d72cd61..bc0fc165e3 100644
--- a/synapse/config/workers.py
+++ b/synapse/config/workers.py
@@ -31,6 +31,7 @@ class WorkerConfig(Config):
         self.worker_listeners = config.get("worker_listeners", [])
         self.worker_daemonize = config.get("worker_daemonize")
         self.worker_pid_file = config.get("worker_pid_file")
+        self.worker_log_config = config.get("worker_log_config")
 
         # The host used to connect to the main synapse
         self.worker_replication_host = config.get("worker_replication_host", None)
diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py
index 341c863152..e8bb420ad1 100644
--- a/synapse/crypto/keyring.py
+++ b/synapse/crypto/keyring.py
@@ -238,27 +238,9 @@ class Keyring(object):
         """
 
         try:
-            # create a deferred for each server we're going to look up the keys
-            # for; we'll resolve them once we have completed our lookups.
-            # These will be passed into wait_for_previous_lookups to block
-            # any other lookups until we have finished.
-            # The deferreds are called with no logcontext.
-            server_to_deferred = {
-                rq.server_name: defer.Deferred() for rq in verify_requests
-            }
-
-            # We want to wait for any previous lookups to complete before
-            # proceeding.
-            yield self.wait_for_previous_lookups(server_to_deferred)
+            ctx = LoggingContext.current_context()
 
-            # Actually start fetching keys.
-            self._get_server_verify_keys(verify_requests)
-
-            # When we've finished fetching all the keys for a given server_name,
-            # resolve the deferred passed to `wait_for_previous_lookups` so that
-            # any lookups waiting will proceed.
-            #
-            # map from server name to a set of request ids
+            # map from server name to a set of outstanding request ids
             server_to_request_ids = {}
 
             for verify_request in verify_requests:
@@ -266,40 +248,61 @@ class Keyring(object):
                 request_id = id(verify_request)
                 server_to_request_ids.setdefault(server_name, set()).add(request_id)
 
-            def remove_deferreds(res, verify_request):
+            # Wait for any previous lookups to complete before proceeding.
+            yield self.wait_for_previous_lookups(server_to_request_ids.keys())
+
+            # take out a lock on each of the servers by sticking a Deferred in
+            # key_downloads
+            for server_name in server_to_request_ids.keys():
+                self.key_downloads[server_name] = defer.Deferred()
+                logger.debug("Got key lookup lock on %s", server_name)
+
+            # When we've finished fetching all the keys for a given server_name,
+            # drop the lock by resolving the deferred in key_downloads.
+            def drop_server_lock(server_name):
+                d = self.key_downloads.pop(server_name)
+                d.callback(None)
+
+            def lookup_done(res, verify_request):
                 server_name = verify_request.server_name
-                request_id = id(verify_request)
-                server_to_request_ids[server_name].discard(request_id)
-                if not server_to_request_ids[server_name]:
-                    d = server_to_deferred.pop(server_name, None)
-                    if d:
-                        d.callback(None)
+                server_requests = server_to_request_ids[server_name]
+                server_requests.remove(id(verify_request))
+
+                # if there are no more requests for this server, we can drop the lock.
+                if not server_requests:
+                    with PreserveLoggingContext(ctx):
+                        logger.debug("Releasing key lookup lock on %s", server_name)
+
+                    # ... but not immediately, as that can cause stack explosions if
+                    # we get a long queue of lookups.
+                    self.clock.call_later(0, drop_server_lock, server_name)
+
                 return res
 
             for verify_request in verify_requests:
-                verify_request.key_ready.addBoth(remove_deferreds, verify_request)
+                verify_request.key_ready.addBoth(lookup_done, verify_request)
+
+            # Actually start fetching keys.
+            self._get_server_verify_keys(verify_requests)
         except Exception:
             logger.exception("Error starting key lookups")
 
     @defer.inlineCallbacks
-    def wait_for_previous_lookups(self, server_to_deferred):
+    def wait_for_previous_lookups(self, server_names):
         """Waits for any previous key lookups for the given servers to finish.
 
         Args:
-            server_to_deferred (dict[str, Deferred]): server_name to deferred which gets
-                resolved once we've finished looking up keys for that server.
-                The Deferreds should be regular twisted ones which call their
-                callbacks with no logcontext.
-
-        Returns: a Deferred which resolves once all key lookups for the given
-            servers have completed. Follows the synapse rules of logcontext
-            preservation.
+            server_names (Iterable[str]): list of servers which we want to look up
+
+        Returns:
+            Deferred[None]: resolves once all key lookups for the given servers have
+                completed. Follows the synapse rules of logcontext preservation.
         """
         loop_count = 1
         while True:
             wait_on = [
                 (server_name, self.key_downloads[server_name])
-                for server_name in server_to_deferred.keys()
+                for server_name in server_names
                 if server_name in self.key_downloads
             ]
             if not wait_on:
@@ -314,19 +317,6 @@ class Keyring(object):
 
             loop_count += 1
 
-        ctx = LoggingContext.current_context()
-
-        def rm(r, server_name_):
-            with PreserveLoggingContext(ctx):
-                logger.debug("Releasing key lookup lock on %s", server_name_)
-                self.key_downloads.pop(server_name_, None)
-            return r
-
-        for server_name, deferred in server_to_deferred.items():
-            logger.debug("Got key lookup lock on %s", server_name)
-            self.key_downloads[server_name] = deferred
-            deferred.addBoth(rm, server_name)
-
     def _get_server_verify_keys(self, verify_requests):
         """Tries to find at least one key for each verify request
 
diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py
index 56d900080b..04393697c0 100644
--- a/synapse/logging/opentracing.py
+++ b/synapse/logging/opentracing.py
@@ -24,6 +24,131 @@
 # this move the methods have work very similarly to opentracing's and it should only
 # be a matter of few regexes to move over to opentracing's access patterns proper.
 
+"""
+============================
+Using OpenTracing in Synapse
+============================
+
+Python-specific tracing concepts are at https://opentracing.io/guides/python/.
+Note that Synapse wraps OpenTracing in a small module (this one) in order to make the
+OpenTracing dependency optional. That means that the access patterns are
+different to those demonstrated in the OpenTracing guides. However, it is
+still useful to know, especially if OpenTracing is included as a full dependency
+in the future or if you are modifying this module.
+
+
+OpenTracing is encapsulated so that
+no span objects from OpenTracing are exposed in Synapse's code. This allows
+OpenTracing to be easily disabled in Synapse and thereby have OpenTracing as
+an optional dependency. This does however limit the number of modifiable spans
+at any point in the code to one. From here out references to `opentracing`
+in the code snippets refer to the Synapses module.
+
+Tracing
+-------
+
+In Synapse it is not possible to start a non-active span. Spans can be started
+using the ``start_active_span`` method. This returns a scope (see
+OpenTracing docs) which is a context manager that needs to be entered and
+exited. This is usually done by using ``with``.
+
+.. code-block:: python
+
+   from synapse.logging.opentracing import start_active_span
+
+   with start_active_span("operation name"):
+       # Do something we want to tracer
+
+Forgetting to enter or exit a scope will result in some mysterious and grievous log
+context errors.
+
+At anytime where there is an active span ``opentracing.set_tag`` can be used to
+set a tag on the current active span.
+
+Tracing functions
+-----------------
+
+Functions can be easily traced using decorators. There is a decorator for
+'normal' function and for functions which are actually deferreds. The name of
+the function becomes the operation name for the span.
+
+.. code-block:: python
+
+   from synapse.logging.opentracing import trace, trace_deferred
+
+   # Start a span using 'normal_function' as the operation name
+   @trace
+   def normal_function(*args, **kwargs):
+       # Does all kinds of cool and expected things
+       return something_usual_and_useful
+
+   # Start a span using 'deferred_function' as the operation name
+   @trace_deferred
+   @defer.inlineCallbacks
+   def deferred_function(*args, **kwargs):
+       # We start
+       yield we_wait
+       # we finish
+       defer.returnValue(something_usual_and_useful)
+
+Operation names can be explicitly set for functions by using
+``trace_using_operation_name`` and
+``trace_deferred_using_operation_name``
+
+.. code-block:: python
+
+   from synapse.logging.opentracing import (
+       trace_using_operation_name,
+       trace_deferred_using_operation_name
+   )
+
+   @trace_using_operation_name("A *much* better operation name")
+   def normal_function(*args, **kwargs):
+       # Does all kinds of cool and expected things
+       return something_usual_and_useful
+
+   @trace_deferred_using_operation_name("Another exciting operation name!")
+   @defer.inlineCallbacks
+   def deferred_function(*args, **kwargs):
+       # We start
+       yield we_wait
+       # we finish
+       defer.returnValue(something_usual_and_useful)
+
+Contexts and carriers
+---------------------
+
+There are a selection of wrappers for injecting and extracting contexts from
+carriers provided. Unfortunately OpenTracing's three context injection
+techniques are not adequate for our inject of OpenTracing span-contexts into
+Twisted's http headers, EDU contents and our database tables. Also note that
+the binary encoding format mandated by OpenTracing is not actually implemented
+by jaeger_client v4.0.0 - it will silently noop.
+Please refer to the end of ``logging/opentracing.py`` for the available
+injection and extraction methods.
+
+Homeserver whitelisting
+-----------------------
+
+Most of the whitelist checks are encapsulated in the modules's injection
+and extraction method but be aware that using custom carriers or crossing
+unchartered waters will require the enforcement of the whitelist.
+``logging/opentracing.py`` has a ``whitelisted_homeserver`` method which takes
+in a destination and compares it to the whitelist.
+
+=======
+Gotchas
+=======
+
+- Checking whitelists on span propagation
+- Inserting pii
+- Forgetting to enter or exit a scope
+- Span source: make sure that the span you expect to be active across a
+  function call really will be that one. Does the current function have more
+  than one caller? Will all of those calling functions have be in a context
+  with an active span?
+"""
+
 import contextlib
 import logging
 import re
diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py
index 6b0ca80087..86a333a919 100644
--- a/synapse/storage/__init__.py
+++ b/synapse/storage/__init__.py
@@ -469,7 +469,7 @@ class DataStore(
         return self._simple_select_list(
             table="users",
             keyvalues={},
-            retcols=["name", "password_hash", "is_guest", "admin"],
+            retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
             desc="get_users",
         )
 
@@ -494,7 +494,7 @@ class DataStore(
             orderby=order,
             start=start,
             limit=limit,
-            retcols=["name", "password_hash", "is_guest", "admin"],
+            retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
         )
         count = yield self.runInteraction("get_users_paginate", self.get_user_count_txn)
         retval = {"users": users, "total": count}
@@ -514,7 +514,7 @@ class DataStore(
             table="users",
             term=term,
             col="name",
-            retcols=["name", "password_hash", "is_guest", "admin"],
+            retcols=["name", "password_hash", "is_guest", "admin", "user_type"],
             desc="search_users",
         )
 
diff --git a/synapse/util/versionstring.py b/synapse/util/versionstring.py
index a4d9a462f7..fa404b9d75 100644
--- a/synapse/util/versionstring.py
+++ b/synapse/util/versionstring.py
@@ -22,6 +22,23 @@ logger = logging.getLogger(__name__)
 
 
 def get_version_string(module):
+    """Given a module calculate a git-aware version string for it.
+
+    If called on a module not in a git checkout will return `__verison__`.
+
+    Args:
+        module (module)
+
+    Returns:
+        str
+    """
+
+    cached_version = getattr(module, "_synapse_version_string_cache", None)
+    if cached_version:
+        return cached_version
+
+    version_string = module.__version__
+
     try:
         null = open(os.devnull, "w")
         cwd = os.path.dirname(os.path.abspath(module.__file__))
@@ -80,8 +97,10 @@ def get_version_string(module):
                 s for s in (git_branch, git_tag, git_commit, git_dirty) if s
             )
 
-            return "%s (%s)" % (module.__version__, git_version)
+            version_string = "%s (%s)" % (module.__version__, git_version)
     except Exception as e:
         logger.info("Failed to check for git repository: %s", e)
 
-    return module.__version__
+    module._synapse_version_string_cache = version_string
+
+    return version_string
author	Erik Johnston <erik@matrix.org>	2019-07-23 09:20:26 +0100
committer	Erik Johnston <erik@matrix.org>	2019-07-23 09:20:26 +0100
commit	c02f26319dbe03e6766980796d6a24f5d054b06f (patch)
tree	7a50e20396132d3af5c73930607a0f1d740ea108 /synapse
parent	Merge branch 'develop' of github.com:matrix-org/synapse into matrix-org-hotfixes (diff)
parent	Fix logging in workers (#5729) (diff)
download	synapse-c02f26319dbe03e6766980796d6a24f5d054b06f.tar.xz