From 38a6d3eea7e3ce1a2e37f9f88fd6742fcadc90d0 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Thu, 11 Jul 2019 10:36:03 +0100 Subject: Add basic opentracing support (#5544) * Configure and initialise tracer Includes config options for the tracer and sets up JaegerClient. * Scope manager using LogContexts We piggy-back our tracer scopes by using log context. The current log context gives us the current scope. If new scope is created we create a stack of scopes in the context. * jaeger is a dependency now * Carrier inject and extraction for Twisted Headers * Trace federation requests on the way in and out. The span is created in _started_processing and closed in _finished_processing because we need a meaningful log context. * Create logcontext for new scope. Instead of having a stack of scopes in a logcontext we create a new context for a new scope if the current logcontext already has a scope. * Remove scope from logcontext if logcontext is top level * Disable tracer if not configured * typo * Remove dependence on jaeger internals * bools * Set service name * :Explicitely state that the tracer is disabled * Black is the new black * Newsfile * Code style * Use the new config setup. * Generate config. * Copyright * Rename config to opentracing * Remove user whitelisting * Empty whitelist by default * User ConfigError instead of RuntimeError * Use isinstance * Use tag constants for opentracing. * Remove debug comment and no need to explicitely record error * Two errors a "s(c)entry" * Docstrings! * Remove debugging brainslip * Homeserver Whitlisting * Better opentracing config comment * linting * Inclue worker name in service_name * Make opentracing an optional dependency * Neater config retreival * Clean up dummy tags * Instantiate tracing as object instead of global class * Inlcude opentracing as a homeserver member. * Thread opentracing to the request level * Reference opetnracing through hs * Instantiate dummy opentracin g for tests. * About to revert, just keeping the unfinished changes just in case * Revert back to global state, commit number: 9ce4a3d9067bf9889b86c360c05ac88618b85c4f * Use class level methods in tracerutils * Start and stop requests spans in a place where we have access to the authenticated entity * Seen it, isort it * Make sure to close the active span. * I'm getting black and blue from this. * Logger formatting Co-Authored-By: Erik Johnston * Outdated comment * Import opentracing at the top * Return a contextmanager * Start tracing client requests from the servlet * Return noop context manager if not tracing * Explicitely say that these are federation requests * Include servlet name in client requests * Use context manager * Move opentracing to logging/ * Seen it, isort it again! * Ignore twisted return exceptions on context exit * Escape the scope * Scopes should be entered to make them useful. * Nicer decorator names * Just one init, init? * Don't need to close something that isn't open * Docs make you smarter --- synapse/logging/opentracing.py | 362 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 362 insertions(+) create mode 100644 synapse/logging/opentracing.py (limited to 'synapse/logging/opentracing.py') diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py new file mode 100644 index 0000000000..f0ceea2a64 --- /dev/null +++ b/synapse/logging/opentracing.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- +# Copyright 2019 The Matrix.org Foundation C.I.C.d +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.import opentracing + + +# NOTE +# This is a small wrapper around opentracing because opentracing is not currently +# packaged downstream (specifically debian). Since opentracing instrumentation is +# fairly invasive it was awkward to make it optional. As a result we opted to encapsulate +# all opentracing state in these methods which effectively noop if opentracing is +# not present. We should strongly consider encouraging the downstream distributers +# to package opentracing and making opentracing a full dependency. In order to facilitate +# this move the methods have work very similarly to opentracing's and it should only +# be a matter of few regexes to move over to opentracing's access patterns proper. + +try: + import opentracing +except ImportError: + opentracing = None +try: + from jaeger_client import Config as JaegerConfig + from synapse.logging.scopecontextmanager import LogContextScopeManager +except ImportError: + JaegerConfig = None + LogContextScopeManager = None + +import contextlib +import logging +import re +from functools import wraps + +from twisted.internet import defer + +logger = logging.getLogger(__name__) + + +class _DumTagNames(object): + """wrapper of opentracings tags. We need to have them if we + want to reference them without opentracing around. Clearly they + should never actually show up in a trace. `set_tags` overwrites + these with the correct ones.""" + + INVALID_TAG = "invalid-tag" + COMPONENT = INVALID_TAG + DATABASE_INSTANCE = INVALID_TAG + DATABASE_STATEMENT = INVALID_TAG + DATABASE_TYPE = INVALID_TAG + DATABASE_USER = INVALID_TAG + ERROR = INVALID_TAG + HTTP_METHOD = INVALID_TAG + HTTP_STATUS_CODE = INVALID_TAG + HTTP_URL = INVALID_TAG + MESSAGE_BUS_DESTINATION = INVALID_TAG + PEER_ADDRESS = INVALID_TAG + PEER_HOSTNAME = INVALID_TAG + PEER_HOST_IPV4 = INVALID_TAG + PEER_HOST_IPV6 = INVALID_TAG + PEER_PORT = INVALID_TAG + PEER_SERVICE = INVALID_TAG + SAMPLING_PRIORITY = INVALID_TAG + SERVICE = INVALID_TAG + SPAN_KIND = INVALID_TAG + SPAN_KIND_CONSUMER = INVALID_TAG + SPAN_KIND_PRODUCER = INVALID_TAG + SPAN_KIND_RPC_CLIENT = INVALID_TAG + SPAN_KIND_RPC_SERVER = INVALID_TAG + + +def only_if_tracing(func): + """Executes the function only if we're tracing. Otherwise return. + Assumes the function wrapped may return None""" + + @wraps(func) + def _only_if_tracing_inner(*args, **kwargs): + if opentracing: + return func(*args, **kwargs) + else: + return + + return _only_if_tracing_inner + + +# Block everything by default +_homeserver_whitelist = None + +tags = _DumTagNames + + +def init_tracer(config): + """Set the whitelists and initialise the JaegerClient tracer + + Args: + config (Config) + The config used by the homeserver. Here it's used to set the service + name to the homeserver's. + """ + global opentracing + if not config.tracer_config.get("tracer_enabled", False): + # We don't have a tracer + opentracing = None + return + + if not opentracing: + logger.error( + "The server has been configure to use opentracing but opentracing is not installed." + ) + raise ModuleNotFoundError("opentracing") + + if not JaegerConfig: + logger.error( + "The server has been configure to use opentracing but opentracing is not installed." + ) + + # Include the worker name + name = config.worker_name if config.worker_name else "master" + + set_homeserver_whitelist(config.tracer_config["homeserver_whitelist"]) + jaeger_config = JaegerConfig( + config={"sampler": {"type": "const", "param": 1}, "logging": True}, + service_name="{} {}".format(config.server_name, name), + scope_manager=LogContextScopeManager(config), + ) + jaeger_config.initialize_tracer() + + # Set up tags to be opentracing's tags + global tags + tags = opentracing.tags + + +@contextlib.contextmanager +def _noop_context_manager(*args, **kwargs): + """Does absolutely nothing really well. Can be entered and exited arbitrarily. + Good substitute for an opentracing scope.""" + yield + + +# Could use kwargs but I want these to be explicit +def start_active_span( + operation_name, + child_of=None, + references=None, + tags=None, + start_time=None, + ignore_active_span=False, + finish_on_close=True, +): + """Starts an active opentracing span. Note, the scope doesn't become active + until it has been entered, however, the span starts from the time this + message is called. + Args: + See opentracing.tracer + Returns: + scope (Scope) or noop_context_manager + """ + if opentracing is None: + return _noop_context_manager() + else: + # We need to enter the scope here for the logcontext to become active + return opentracing.tracer.start_active_span( + operation_name, + child_of=child_of, + references=references, + tags=tags, + start_time=start_time, + ignore_active_span=ignore_active_span, + finish_on_close=finish_on_close, + ) + + +@only_if_tracing +def close_active_span(): + """Closes the active span. This will close it's logcontext if the context + was made for the span""" + opentracing.tracer.scope_manager.active.__exit__(None, None, None) + + +@only_if_tracing +def set_tag(key, value): + """Set's a tag on the active span""" + opentracing.tracer.active_span.set_tag(key, value) + + +@only_if_tracing +def log_kv(key_values, timestamp=None): + """Log to the active span""" + opentracing.tracer.active_span.log_kv(key_values, timestamp) + + +# Note: we don't have a get baggage items because we're trying to hide all +# scope and span state from synapse. I think this method may also be useless +# as a result +@only_if_tracing +def set_baggage_item(key, value): + """Attach baggage to the active span""" + opentracing.tracer.active_span.set_baggage_item(key, value) + + +@only_if_tracing +def set_operation_name(operation_name): + """Sets the operation name of the active span""" + opentracing.tracer.active_span.set_operation_name(operation_name) + + +@only_if_tracing +def set_homeserver_whitelist(homeserver_whitelist): + """Sets the whitelist + + Args: + homeserver_whitelist (iterable of strings): regex of whitelisted homeservers + """ + global _homeserver_whitelist + if homeserver_whitelist: + # Makes a single regex which accepts all passed in regexes in the list + _homeserver_whitelist = re.compile( + "({})".format(")|(".join(homeserver_whitelist)) + ) + + +@only_if_tracing +def whitelisted_homeserver(destination): + """Checks if a destination matches the whitelist + Args: + destination (String)""" + global _homeserver_whitelist + if _homeserver_whitelist: + return _homeserver_whitelist.match(destination) + return False + + +def start_active_span_from_context( + headers, + operation_name, + references=None, + tags=None, + start_time=None, + ignore_active_span=False, + finish_on_close=True, +): + """ + Extracts a span context from Twisted Headers. + args: + headers (twisted.web.http_headers.Headers) + returns: + span_context (opentracing.span.SpanContext) + """ + # Twisted encodes the values as lists whereas opentracing doesn't. + # So, we take the first item in the list. + # Also, twisted uses byte arrays while opentracing expects strings. + if opentracing is None: + return _noop_context_manager() + + header_dict = {k.decode(): v[0].decode() for k, v in headers.getAllRawHeaders()} + context = opentracing.tracer.extract(opentracing.Format.HTTP_HEADERS, header_dict) + + return opentracing.tracer.start_active_span( + operation_name, + child_of=context, + references=references, + tags=tags, + start_time=start_time, + ignore_active_span=ignore_active_span, + finish_on_close=finish_on_close, + ) + + +@only_if_tracing +def inject_active_span_twisted_headers(headers, destination): + """ + Injects a span context into twisted headers inplace + + Args: + headers (twisted.web.http_headers.Headers) + span (opentracing.Span) + + Returns: + Inplace modification of headers + + Note: + The headers set by the tracer are custom to the tracer implementation which + should be unique enough that they don't interfere with any headers set by + synapse or twisted. If we're still using jaeger these headers would be those + here: + https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py + """ + + if not whitelisted_homeserver(destination): + return + + span = opentracing.tracer.active_span + carrier = {} + opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) + + for key, value in carrier.items(): + headers.addRawHeaders(key, value) + + +@only_if_tracing +def inject_active_span_byte_dict(headers, destination): + """ + Injects a span context into a dict where the headers are encoded as byte + strings + + Args: + headers (dict) + span (opentracing.Span) + + Returns: + Inplace modification of headers + + Note: + The headers set by the tracer are custom to the tracer implementation which + should be unique enough that they don't interfere with any headers set by + synapse or twisted. If we're still using jaeger these headers would be those + here: + https://github.com/jaegertracing/jaeger-client-python/blob/master/jaeger_client/constants.py + """ + if not whitelisted_homeserver(destination): + return + + span = opentracing.tracer.active_span + + carrier = {} + opentracing.tracer.inject(span, opentracing.Format.HTTP_HEADERS, carrier) + + for key, value in carrier.items(): + headers[key.encode()] = [value.encode()] + + +def trace_servlet(servlet_name, func): + """Decorator which traces a serlet. It starts a span with some servlet specific + tags such as the servlet_name and request information""" + + @wraps(func) + @defer.inlineCallbacks + def _trace_servlet_inner(request, *args, **kwargs): + with start_active_span_from_context( + request.requestHeaders, + "incoming-client-request", + tags={ + "request_id": request.get_request_id(), + tags.SPAN_KIND: tags.SPAN_KIND_RPC_SERVER, + tags.HTTP_METHOD: request.get_method(), + tags.HTTP_URL: request.get_redacted_uri(), + tags.PEER_HOST_IPV6: request.getClientIP(), + "servlet_name": servlet_name, + }, + ): + result = yield defer.maybeDeferred(func, request, *args, **kwargs) + defer.returnValue(result) + + return _trace_servlet_inner -- cgit 1.5.1 From 82345bc09a9980de58c13a18b489403237acf4bd Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 18 Jul 2019 15:06:54 +0100 Subject: Clean up opentracing configuration options (#5712) Clean up config settings and dead code. This is mostly about cleaning up the config format, to bring it into line with our conventions. In particular: * There should be a blank line after `## Section ##' headings * There should be a blank line between each config setting * There should be a `#`-only line between a comment and the setting it describes * We don't really do the `# #` style commenting-out of whole sections if we can help it * rename `tracer_enabled` to `enabled` While we're here, do more config parsing upfront, which makes it easier to use later on. Also removes redundant code from LogContextScopeManager. Also changes the changelog fragment to a `feature` - it's exciting! --- changelog.d/5544.feature | 2 ++ changelog.d/5544.misc | 1 - changelog.d/5712.feature | 2 ++ docs/sample_config.yaml | 45 ++++++++++++++++-------- synapse/config/tracer.py | 63 ++++++++++++++++++++++------------ synapse/logging/opentracing.py | 42 ++++++++++------------- synapse/logging/scopecontextmanager.py | 4 +-- 7 files changed, 96 insertions(+), 63 deletions(-) create mode 100644 changelog.d/5544.feature delete mode 100644 changelog.d/5544.misc create mode 100644 changelog.d/5712.feature (limited to 'synapse/logging/opentracing.py') diff --git a/changelog.d/5544.feature b/changelog.d/5544.feature new file mode 100644 index 0000000000..7d3459129d --- /dev/null +++ b/changelog.d/5544.feature @@ -0,0 +1,2 @@ +Add support for opentracing. + diff --git a/changelog.d/5544.misc b/changelog.d/5544.misc deleted file mode 100644 index 81d6f74c31..0000000000 --- a/changelog.d/5544.misc +++ /dev/null @@ -1 +0,0 @@ -Added opentracing and configuration options. diff --git a/changelog.d/5712.feature b/changelog.d/5712.feature new file mode 100644 index 0000000000..7d3459129d --- /dev/null +++ b/changelog.d/5712.feature @@ -0,0 +1,2 @@ +Add support for opentracing. + diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 663ff31622..5b804d16a4 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1409,17 +1409,34 @@ password_config: ## Opentracing ## -# These settings enable opentracing which implements distributed tracing -# This allows you to observe the causal chain of events across servers -# including requests, key lookups etc. across any server running -# synapse or any other other services which supports opentracing. -# (specifically those implemented with jaeger) - -#opentracing: -# # Enable / disable tracer -# tracer_enabled: false -# # The list of homeservers we wish to expose our current traces to. -# # The list is a list of regexes which are matched against the -# # servername of the homeserver -# homeserver_whitelist: -# - ".*" + +# These settings enable opentracing, which implements distributed tracing. +# This allows you to observe the causal chains of events across servers +# including requests, key lookups etc., across any server running +# synapse or any other other services which supports opentracing +# (specifically those implemented with Jaeger). +# +opentracing: + # tracing is disabled by default. Uncomment the following line to enable it. + # + #enabled: true + + # The list of homeservers we wish to send and receive span contexts and span baggage. + # + # Though it's mostly safe to send and receive span contexts to and from + # untrusted users since span contexts are usually opaque ids it can lead to + # two problems, namely: + # - If the span context is marked as sampled by the sending homeserver the receiver will + # sample it. Therefore two homeservers with wildly disparaging sampling policies + # could incur higher sampling counts than intended. + # - Span baggage can be arbitrary data. For safety this has been disabled in synapse + # but that doesn't prevent another server sending you baggage which will be logged + # to opentracing logs. + # + # This a list of regexes which are matched against the server_name of the + # homeserver. + # + # By defult, it is empty, so no servers are matched. + # + #homeserver_whitelist: + # - ".*" diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index 63a637984a..a2ce9ab3f6 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -18,33 +18,52 @@ from ._base import Config, ConfigError class TracerConfig(Config): def read_config(self, config, **kwargs): - self.tracer_config = config.get("opentracing") + opentracing_config = config.get("opentracing") + if opentracing_config is None: + opentracing_config = {} - self.tracer_config = config.get("opentracing", {"tracer_enabled": False}) + self.opentracer_enabled = opentracing_config.get("enabled", False) + if not self.opentracer_enabled: + return - if self.tracer_config.get("tracer_enabled", False): - # The tracer is enabled so sanitize the config - # If no whitelists are given - self.tracer_config.setdefault("homeserver_whitelist", []) + # The tracer is enabled so sanitize the config - if not isinstance(self.tracer_config.get("homeserver_whitelist"), list): - raise ConfigError("Tracer homesererver_whitelist config is malformed") + self.opentracer_whitelist = opentracing_config.get("homeserver_whitelist", []) + if not isinstance(self.opentracer_whitelist, list): + raise ConfigError("Tracer homeserver_whitelist config is malformed") def generate_config_section(cls, **kwargs): return """\ ## Opentracing ## - # These settings enable opentracing which implements distributed tracing - # This allows you to observe the causal chain of events across servers - # including requests, key lookups etc. across any server running - # synapse or any other other services which supports opentracing. - # (specifically those implemented with jaeger) - - #opentracing: - # # Enable / disable tracer - # tracer_enabled: false - # # The list of homeservers we wish to expose our current traces to. - # # The list is a list of regexes which are matched against the - # # servername of the homeserver - # homeserver_whitelist: - # - ".*" + + # These settings enable opentracing, which implements distributed tracing. + # This allows you to observe the causal chains of events across servers + # including requests, key lookups etc., across any server running + # synapse or any other other services which supports opentracing + # (specifically those implemented with Jaeger). + # + opentracing: + # tracing is disabled by default. Uncomment the following line to enable it. + # + #enabled: true + + # The list of homeservers we wish to send and receive span contexts and span baggage. + # + # Though it's mostly safe to send and receive span contexts to and from + # untrusted users since span contexts are usually opaque ids it can lead to + # two problems, namely: + # - If the span context is marked as sampled by the sending homeserver the receiver will + # sample it. Therefore two homeservers with wildly disparaging sampling policies + # could incur higher sampling counts than intended. + # - Span baggage can be arbitrary data. For safety this has been disabled in synapse + # but that doesn't prevent another server sending you baggage which will be logged + # to opentracing logs. + # + # This a list of regexes which are matched against the server_name of the + # homeserver. + # + # By defult, it is empty, so no servers are matched. + # + #homeserver_whitelist: + # - ".*" """ diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index f0ceea2a64..415040f5ee 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2019 The Matrix.org Foundation C.I.C.d +# Copyright 2019 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,6 +24,15 @@ # this move the methods have work very similarly to opentracing's and it should only # be a matter of few regexes to move over to opentracing's access patterns proper. +import contextlib +import logging +import re +from functools import wraps + +from twisted.internet import defer + +from synapse.config import ConfigError + try: import opentracing except ImportError: @@ -35,12 +44,6 @@ except ImportError: JaegerConfig = None LogContextScopeManager = None -import contextlib -import logging -import re -from functools import wraps - -from twisted.internet import defer logger = logging.getLogger(__name__) @@ -91,7 +94,8 @@ def only_if_tracing(func): return _only_if_tracing_inner -# Block everything by default +# A regex which matches the server_names to expose traces for. +# None means 'block everything'. _homeserver_whitelist = None tags = _DumTagNames @@ -101,31 +105,24 @@ def init_tracer(config): """Set the whitelists and initialise the JaegerClient tracer Args: - config (Config) - The config used by the homeserver. Here it's used to set the service - name to the homeserver's. + config (HomeserverConfig): The config used by the homeserver """ global opentracing - if not config.tracer_config.get("tracer_enabled", False): + if not config.opentracer_enabled: # We don't have a tracer opentracing = None return - if not opentracing: - logger.error( - "The server has been configure to use opentracing but opentracing is not installed." - ) - raise ModuleNotFoundError("opentracing") - - if not JaegerConfig: - logger.error( - "The server has been configure to use opentracing but opentracing is not installed." + if not opentracing or not JaegerConfig: + raise ConfigError( + "The server has been configured to use opentracing but opentracing is not " + "installed." ) # Include the worker name name = config.worker_name if config.worker_name else "master" - set_homeserver_whitelist(config.tracer_config["homeserver_whitelist"]) + set_homeserver_whitelist(config.opentracer_whitelist) jaeger_config = JaegerConfig( config={"sampler": {"type": "const", "param": 1}, "logging": True}, service_name="{} {}".format(config.server_name, name), @@ -232,7 +229,6 @@ def whitelisted_homeserver(destination): """Checks if a destination matches the whitelist Args: destination (String)""" - global _homeserver_whitelist if _homeserver_whitelist: return _homeserver_whitelist.match(destination) return False diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py index 91e14462f3..8c661302c9 100644 --- a/synapse/logging/scopecontextmanager.py +++ b/synapse/logging/scopecontextmanager.py @@ -34,9 +34,7 @@ class LogContextScopeManager(ScopeManager): """ def __init__(self, config): - # Set the whitelists - logger.info(config.tracer_config) - self._homeserver_whitelist = config.tracer_config["homeserver_whitelist"] + pass @property def active(self): -- cgit 1.5.1 From 826e6ec3bdfca92a5815f032c3246fab9a2aef88 Mon Sep 17 00:00:00 2001 From: Jorik Schellekens Date: Mon, 22 Jul 2019 11:15:21 +0100 Subject: Opentracing Documentation (#5703) * Opentracing survival guide * Update decorator names in doc * Doc cleanup These are all alterations as a result of comments in #5703, it includes mostly typos and clarifications. The most interesting changes are: - Split developer and user docs into two sections - Add a high level description of OpenTracing * newsfile * Move contributer specific info to docstring. * Sample config. * Trailing whitespace. * Update 5703.misc * Apply suggestions from code review Mostly just rewording parts of the docs for clarity. Co-Authored-By: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/5703.misc | 1 + docs/opentracing.rst | 100 +++++++++++++++++++++++++++++++++ docs/sample_config.yaml | 14 +---- synapse/config/tracer.py | 14 +---- synapse/logging/opentracing.py | 125 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 230 insertions(+), 24 deletions(-) create mode 100644 changelog.d/5703.misc create mode 100644 docs/opentracing.rst (limited to 'synapse/logging/opentracing.py') diff --git a/changelog.d/5703.misc b/changelog.d/5703.misc new file mode 100644 index 0000000000..6e9b2d734e --- /dev/null +++ b/changelog.d/5703.misc @@ -0,0 +1 @@ +Documentation for opentracing. diff --git a/docs/opentracing.rst b/docs/opentracing.rst new file mode 100644 index 0000000000..b91a2208a8 --- /dev/null +++ b/docs/opentracing.rst @@ -0,0 +1,100 @@ +=========== +OpenTracing +=========== + +Background +---------- + +OpenTracing is a semi-standard being adopted by a number of distributed tracing +platforms. It is a common api for facilitating vendor-agnostic tracing +instrumentation. That is, we can use the OpenTracing api and select one of a +number of tracer implementations to do the heavy lifting in the background. +Our current selected implementation is Jaeger. + +OpenTracing is a tool which gives an insight into the causal relationship of +work done in and between servers. The servers each track events and report them +to a centralised server - in Synapse's case: Jaeger. The basic unit used to +represent events is the span. The span roughly represents a single piece of work +that was done and the time at which it occurred. A span can have child spans, +meaning that the work of the child had to be completed for the parent span to +complete, or it can have follow-on spans which represent work that is undertaken +as a result of the parent but is not depended on by the parent to in order to +finish. + +Since this is undertaken in a distributed environment a request to another +server, such as an RPC or a simple GET, can be considered a span (a unit or +work) for the local server. This causal link is what OpenTracing aims to +capture and visualise. In order to do this metadata about the local server's +span, i.e the 'span context', needs to be included with the request to the +remote. + +It is up to the remote server to decide what it does with the spans +it creates. This is called the sampling policy and it can be configured +through Jaeger's settings. + +For OpenTracing concepts see +https://opentracing.io/docs/overview/what-is-tracing/. + +For more information about Jaeger's implementation see +https://www.jaegertracing.io/docs/ + +===================== +Seting up OpenTracing +===================== + +To receive OpenTracing spans, start up a Jaeger server. This can be done +using docker like so: + +.. code-block:: bash + + docker run -d --name jaeger + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p 16686:16686 \ + -p 14268:14268 \ + jaegertracing/all-in-one:1.13 + +Latest documentation is probably at +https://www.jaegertracing.io/docs/1.13/getting-started/ + + +Enable OpenTracing in Synapse +----------------------------- + +OpenTracing is not enabled by default. It must be enabled in the homeserver +config by uncommenting the config options under ``opentracing`` as shown in +the `sample config <./sample_config.yaml>`_. For example: + +.. code-block:: yaml + + opentracing: + tracer_enabled: true + homeserver_whitelist: + - "mytrustedhomeserver.org" + - "*.myotherhomeservers.com" + +Homeserver whitelisting +----------------------- + +The homeserver whitelist is configured using regular expressions. A list of regular +expressions can be given and their union will be compared when propagating any +spans contexts to another homeserver. + +Though it's mostly safe to send and receive span contexts to and from +untrusted users since span contexts are usually opaque ids it can lead to +two problems, namely: + +- If the span context is marked as sampled by the sending homeserver the receiver will + sample it. Therefore two homeservers with wildly different sampling policies + could incur higher sampling counts than intended. +- Sending servers can attach arbitrary data to spans, known as 'baggage'. For safety this has been disabled in Synapse + but that doesn't prevent another server sending you baggage which will be logged + to OpenTracing's logs. + +================== +Configuring Jaeger +================== + +Sampling strategies can be set as in this document: +https://www.jaegertracing.io/docs/1.13/sampling/ diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 5b804d16a4..0a96197ca6 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -1422,18 +1422,8 @@ opentracing: #enabled: true # The list of homeservers we wish to send and receive span contexts and span baggage. - # - # Though it's mostly safe to send and receive span contexts to and from - # untrusted users since span contexts are usually opaque ids it can lead to - # two problems, namely: - # - If the span context is marked as sampled by the sending homeserver the receiver will - # sample it. Therefore two homeservers with wildly disparaging sampling policies - # could incur higher sampling counts than intended. - # - Span baggage can be arbitrary data. For safety this has been disabled in synapse - # but that doesn't prevent another server sending you baggage which will be logged - # to opentracing logs. - # - # This a list of regexes which are matched against the server_name of the + # See docs/opentracing.rst + # This is a list of regexes which are matched against the server_name of the # homeserver. # # By defult, it is empty, so no servers are matched. diff --git a/synapse/config/tracer.py b/synapse/config/tracer.py index a2ce9ab3f6..4479454415 100644 --- a/synapse/config/tracer.py +++ b/synapse/config/tracer.py @@ -48,18 +48,8 @@ class TracerConfig(Config): #enabled: true # The list of homeservers we wish to send and receive span contexts and span baggage. - # - # Though it's mostly safe to send and receive span contexts to and from - # untrusted users since span contexts are usually opaque ids it can lead to - # two problems, namely: - # - If the span context is marked as sampled by the sending homeserver the receiver will - # sample it. Therefore two homeservers with wildly disparaging sampling policies - # could incur higher sampling counts than intended. - # - Span baggage can be arbitrary data. For safety this has been disabled in synapse - # but that doesn't prevent another server sending you baggage which will be logged - # to opentracing logs. - # - # This a list of regexes which are matched against the server_name of the + # See docs/opentracing.rst + # This is a list of regexes which are matched against the server_name of the # homeserver. # # By defult, it is empty, so no servers are matched. diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index 415040f5ee..3da33d7826 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -24,6 +24,131 @@ # this move the methods have work very similarly to opentracing's and it should only # be a matter of few regexes to move over to opentracing's access patterns proper. +""" +============================ +Using OpenTracing in Synapse +============================ + +Python-specific tracing concepts are at https://opentracing.io/guides/python/. +Note that Synapse wraps OpenTracing in a small module (this one) in order to make the +OpenTracing dependency optional. That means that the access patterns are +different to those demonstrated in the OpenTracing guides. However, it is +still useful to know, especially if OpenTracing is included as a full dependency +in the future or if you are modifying this module. + + +OpenTracing is encapsulated so that +no span objects from OpenTracing are exposed in Synapse's code. This allows +OpenTracing to be easily disabled in Synapse and thereby have OpenTracing as +an optional dependency. This does however limit the number of modifiable spans +at any point in the code to one. From here out references to `opentracing` +in the code snippets refer to the Synapses module. + +Tracing +------- + +In Synapse it is not possible to start a non-active span. Spans can be started +using the ``start_active_span`` method. This returns a scope (see +OpenTracing docs) which is a context manager that needs to be entered and +exited. This is usually done by using ``with``. + +.. code-block:: python + + from synapse.logging.opentracing import start_active_span + + with start_active_span("operation name"): + # Do something we want to tracer + +Forgetting to enter or exit a scope will result in some mysterious and grievous log +context errors. + +At anytime where there is an active span ``opentracing.set_tag`` can be used to +set a tag on the current active span. + +Tracing functions +----------------- + +Functions can be easily traced using decorators. There is a decorator for +'normal' function and for functions which are actually deferreds. The name of +the function becomes the operation name for the span. + +.. code-block:: python + + from synapse.logging.opentracing import trace, trace_deferred + + # Start a span using 'normal_function' as the operation name + @trace + def normal_function(*args, **kwargs): + # Does all kinds of cool and expected things + return something_usual_and_useful + + # Start a span using 'deferred_function' as the operation name + @trace_deferred + @defer.inlineCallbacks + def deferred_function(*args, **kwargs): + # We start + yield we_wait + # we finish + defer.returnValue(something_usual_and_useful) + +Operation names can be explicitly set for functions by using +``trace_using_operation_name`` and +``trace_deferred_using_operation_name`` + +.. code-block:: python + + from synapse.logging.opentracing import ( + trace_using_operation_name, + trace_deferred_using_operation_name + ) + + @trace_using_operation_name("A *much* better operation name") + def normal_function(*args, **kwargs): + # Does all kinds of cool and expected things + return something_usual_and_useful + + @trace_deferred_using_operation_name("Another exciting operation name!") + @defer.inlineCallbacks + def deferred_function(*args, **kwargs): + # We start + yield we_wait + # we finish + defer.returnValue(something_usual_and_useful) + +Contexts and carriers +--------------------- + +There are a selection of wrappers for injecting and extracting contexts from +carriers provided. Unfortunately OpenTracing's three context injection +techniques are not adequate for our inject of OpenTracing span-contexts into +Twisted's http headers, EDU contents and our database tables. Also note that +the binary encoding format mandated by OpenTracing is not actually implemented +by jaeger_client v4.0.0 - it will silently noop. +Please refer to the end of ``logging/opentracing.py`` for the available +injection and extraction methods. + +Homeserver whitelisting +----------------------- + +Most of the whitelist checks are encapsulated in the modules's injection +and extraction method but be aware that using custom carriers or crossing +unchartered waters will require the enforcement of the whitelist. +``logging/opentracing.py`` has a ``whitelisted_homeserver`` method which takes +in a destination and compares it to the whitelist. + +======= +Gotchas +======= + +- Checking whitelists on span propagation +- Inserting pii +- Forgetting to enter or exit a scope +- Span source: make sure that the span you expect to be active across a + function call really will be that one. Does the current function have more + than one caller? Will all of those calling functions have be in a context + with an active span? +""" + import contextlib import logging import re -- cgit 1.5.1